Why the keras do not take my data from CSV - csv

I am following the tutorial about import CVS data into tensorflow.
I followed every step.
How ever it does not works out.
It showed that one of my feature is not in the dictionary.
If so, how can i put it into dictionary
The link to my colab
The code is write below and the error information is written in the end
import functools
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
train_file_path = "/content/Productivity training.csv"
test_file_path = "/content/Productivity Testing.csv"
LABEL_COLUMN = 'Productivity'
def get_dataset(file_path):
dataset = tf.data.experimental.make_csv_dataset(
file_path,
batch_size=12,
label_name=LABEL_COLUMN,
na_value="?",
num_epochs=1,
ignore_errors=True)
return dataset
raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)
examples, labels = next(iter(raw_train_data)) # 第一个批次
print("EXAMPLES: \n", examples, "\n")
print("LABELS: \n", labels)
CATEGORIES = {
'over working hours': ['0', '2'],
'experience' : ['0.5', '0.75', '1'],
'absent' : ['0.5', '0.25', '0']
}
categorical_columns = []
for feature, vocab in CATEGORIES.items():
cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
key=feature, vocabulary_list=vocab)
categorical_columns.append(tf.feature_column.indicator_column(cat_col))
def process_continuous_data(mean, data):
# 标准化数据
data = tf.cast(data, tf.float32) * 1/(2*mean)
return tf.reshape(data, [-1, 1])
MEANS = {
'Weekday' : 4,
'highest' : 9.2540,
'lowest' : 3.47,
'Weather' : 2.63,
'Wind speed': 2.31
}
numerical_columns = []
for feature in MEANS.keys():
num_col = tf.feature_column.numeric_column(feature, normalizer_fn=functools.partial(process_continuous_data, MEANS[feature]))
numerical_columns.append(num_col)
preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns+numerical_columns)
model = tf.keras.Sequential([
preprocessing_layer,
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid'),
])
model.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
train_data = raw_train_data
test_data = raw_test_data
model.fit(train_data, epochs=20)
Here is the error info:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:754 train_step
y_pred = self(x, training=True)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:1012 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/sequential.py:389 call
outputs = layer(inputs, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py:1012 __call__
outputs = call_fn(inputs, *args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/feature_column/dense_features.py:169 call **
self._state_manager)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/feature_column/feature_column_v2.py:2592 get_dense_tensor
return transformation_cache.get(self, state_manager)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/feature_column/feature_column_v2.py:2355 get
transformed = column.transform_feature(self, state_manager)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/feature_column/feature_column_v2.py:2564 transform_feature
input_tensor = transformation_cache.get(self.key, state_manager)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/feature_column/feature_column_v2.py:2339 get
raise ValueError('Feature {} is not in features dictionary.'.format(key))
ValueError: Feature Weather is not in features dictionary.

Related

What is wrong with my neural net model with LSTM for regression problem that it doesn't return the model as output?

So, the questionn is this:
What I am doing wrong when defining the neural net architecture? Look at sections Define the neural network model and Define the learning rate scheduler train the model
Details:
I have written the code of this where revenue_data shape is (1749, 2) while weather_data shape is (86990, 10) X_train shape is ([69010, 14]), y_train is ([69010]), X_val is ([17253, 14]), y_val = ([17253]) and have done the preprocesing, scaling, removing oputliers and splitting the data as here:
Convert date and time columns to datetime format
revenue_data['Date'] = pd.to_datetime(revenue_data['Date'], format='%Y%m%d')
weather_data['dt'] = pd.to_datetime(weather_data['dt'], format='%Y%m%d')
weather_data['time'] = pd.to_datetime(weather_data['time'], format='%H:%M:%S')
Convert wind and condition columns to embeddings
wind_embeddings = nn.Embedding(len(weather_data['wind'].unique()), 5)
weather_data['wind_code'] = weather_data['wind'].astype('category').cat.codes
wind_vectors = wind_embeddings(torch.tensor(weather_data['wind_code'].values, dtype=torch.long))
weather_data['wind_x'] = wind_vectors[:, 0].detach().numpy()
weather_data['wind_y'] = wind_vectors[:, 1].detach().numpy()
weather_data['wind_z'] = wind_vectors[:, 2].detach().numpy()
weather_data['wind_t'] = wind_vectors[:, 3].detach().numpy()
weather_data['wind_u'] = wind_vectors[:, 4].detach().numpy()
condition_embeddings = nn.Embedding(len(weather_data['condition'].unique()), 3)
weather_data['condition_code'] = weather_data['condition'].astype('category').cat.codes
condition_vectors = condition_embeddings(torch.tensor(weather_data['condition_code'].values, dtype=torch.long))
weather_data['condition_x'] = condition_vectors[:, 0].detach().numpy()
weather_data['condition_y'] = condition_vectors[:, 1].detach().numpy()
weather_data['condition_z'] = condition_vectors[:, 2].detach().numpy()
Group the weather data by date and hour and calculate the mean for each date and hour
weather_data = weather_data.groupby(['dt', 'time']).mean()
weather_data = weather_data.reset_index()
weather_data['Date'] = weather_data['dt']
weather_data.drop(['dt', 'time', 'wind_code', 'condition_code'], axis=1, inplace=True)
Merge the revenue and weather data on the 'Date' column and drop 'Date'
merged_data = pd.merge(revenue_data, weather_data, on='Date')
merged_data.drop('Date', axis=1, inplace=True)
merged_data.head()
Scale the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(merged_data)
Split the data into input and target sets
X = scaled_data[:, 1:]
y = scaled_data[:, 0]
from scipy.stats import zscore
Calculate z-scores for each feature | Remove outliers that have z-scor bigger that 3
z_scores = zscore(X)
Identify rows where any feature has a z-score > 3
mask = (z_scores > 3).any(axis=1)
Remove rows with high z-scores from the x and y
features = X[~mask, :]
target = y[~mask]
Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
Convert the data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
but the I am struggling to realise what is wrong with the neural net architecture defined:
Define the neural network model
class RevenuePredictor(nn.Module):
def __init__(self):
super().__init__()
self.lstm = nn.LSTM(input_size=14, hidden_size=32, num_layers=1, batch_first=True)
self.fc1 = nn.Linear(32, 16)
self.fc2 = nn.Linear(16, 1)
def forward(self, x, lengths):
print('x shape:', x.shape)
Get the lengths of the input sequences
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lengths = lengths.to(device)
lengths = lengths.cpu()
print('lengths shape:', lengths.shape)
Sort the input sequences by length
sorted_lengths, sorted_idx = torch.sort(lengths, descending=True)
sorted_x = x[sorted_idx]
Pack the sorted input sequences
packed_x = nn.utils.rnn.pack_padded_sequence(sorted_x, sorted_lengths, batch_first=True)
Convert the packed sequence to a tensor with two dimensions
x_data, batch_sizes = nn.utils.rnn.pad_packed_sequence(packed_x, batch_first=True)
Convert the packed sequence to a tensor with two dimensions
x_data, batch_sizes = x.data, x.batch_sizes
seq_len = batch_sizes[0]
batch_size = len(batch_sizes)
x = x_data.new_zeros((batch_size, seq_len, 14))
s = 0
for i, l in enumerate(batch_sizes):
x[i, :l] = x_data[s:(s+l)]
s += l
Pass the packed input sequences through the LSTM
lstm_output, (h, c) = self.lstm(packed_x)
Unpack the LSTM output sequences
unpacked_output, _ = nn.utils.rnn.pad_packed_sequence(lstm_output, batch_first=True)
Re-sort the output sequences to their original order
unsorted_idx = sorted_idx.sort(0)
output = unpacked_output[unsorted_idx]
Pass the output sequences through the fully connected layers
output = nn.functional.relu(self.fc1(output[:, -1, :]))
output = self.fc2(output)
return output
Then Create the model
model = RevenuePredictor()
followed by loss and metrics
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
metrics = {
'mse': MeanSquaredError(),
'mae': MeanAbsoluteError(),
'r2': R2Score(),
}
Define the learning rate scheduler train the model
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)
best_val_loss = np.inf
for epoch in range(num_epochs):
# Set the model to training mode
model.train()
train_loss = 0.0
num_batches = 0
for X_train, y_train in train_loader:
lengths = torch.ones(X_train.shape[0], dtype=torch.long)
optimizer.zero_grad()
output = model(X_train, lengths)
loss = loss_fn(output, y_train)
loss.backward()
optimizer.step()
train_loss += loss.item()
num_batches += 1
val_loss = 0.0
for X_val, y_val in val_loader:
lengths = torch.ones(X_val.shape[0], dtype=torch.long)
output = model(X_val, lengths)
loss = loss_fn(output, y_val)
val_loss += loss.item()
scheduler.step(val_loss)
val_loss /= len(val_loader)
val_mse = metrics['mse'].compute()
val_mae = metrics['mae'].compute()
val_r2 = metrics['r2'].compute()
for metric in metrics.values():
metric.reset()
if (epoch+1) % 100 == 0:
print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, MSE: {:.4f}, MAE: {:.4f}, R2: {:.4f}'
.format(epoch+1, num_epochs, train_loss/num_batches, val_loss, val_mse, val_mae, val_r2))
I get this error which I think is because of something being wwrong in defining neural netwrok model:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3326, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-164-e20b93c25048>", line 3, in <module>
output = model(X_train, lengths)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "<ipython-input-163-43b2ef5c15db>", line 38, in forward
lstm_output, (h, c) = self.lstm(packed_x)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/rnn.py", line 772, in forward
self.check_forward_args(input, hx, batch_sizes)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/rnn.py", line 697, in check_forward_args
self.check_input(input, batch_sizes)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/rnn.py", line 206, in check_input
raise RuntimeError(
# RuntimeError: input must have 2 dimensions, got 1
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 2040, in showtraceback
stb = value._render_traceback_()
AttributeError: 'RuntimeError' object has no attribute '_render_traceback_'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/IPython/core/ultratb.py", line 1101, in get_records
return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
File "/usr/local/lib/python3.8/dist-packages/IPython/core/ultratb.py", line 319, in wrapped
return f(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/IPython/core/ultratb.py", line 353, in _fixed_getinnerframes
records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
File "/usr/lib/python3.8/inspect.py", line 1515, in getinnerframes
frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)
File "/usr/lib/python3.8/inspect.py", line 1473, in getframeinfo
filename = getsourcefile(frame) or getfile(frame)
File "/usr/lib/python3.8/inspect.py", line 708, in getsourcefile
if getattr(getmodule(object, filename), '__loader__', None) is not None:
File "/usr/lib/python3.8/inspect.py", line 737, in getmodule
file = getabsfile(object, _filename)
File "/usr/lib/python3.8/inspect.py", line 721, in getabsfile
return os.path.normcase(os.path.abspath(_filename))
File "/usr/lib/python3.8/posixpath.py", line 379, in abspath
cwd = os.getcwd()
FileNotFoundError: [Errno 2] No such file or directory
---------------------------------------------------------------------------
I tried coverting the packed sequence to a tensor with two dimensions in different way:
x_data, ba
tch_sizes = x.data, x.batch_sizes
seq_len = batch_sizes[0]
batch_size = len(batch_sizes)
x = x_data.new_zeros((batch_size, seq_len, 14))
s = 0
for i, l in enumerate(batch_sizes):
x[i, :l] = x_data[s:(s+l)]
s += l
Didnt work.
Then tried rehsaping x to have three dimensions like:
batch_size, seq_len, input_size = x.shape
Didn't work and finally tried:
unsqueze(-1) on output after I defined model like:
model = REvenuePredictor()
output = model(X_train, lengths).unsqueeze(-1)

Mask equal None when using TimeDistributed Tensorflow 2

I have model with encoding layer. I want to use TimeDistributed to encoding layer in my model. But when I run the model with TimeDistributed, it stuck at encoding layer in mask = tf.cast(mask, dtype=self.dtype) and it mean my mask will always None when using TimeDistributed layer. How can I apply TimeDistributed layer to my encoding layer?
The encoding layer code:
class Encoding(Layer):
def __init__(self,
**kwargs):
super().__init__(**kwargs)
def build(self, input_shape):
self.encoding = self.add_weight(shape=(input_shape[-2], input_shape[-1]),
trainable=True,
initializer=tf.initializers.Ones(),
constraint=None,
dtype=self.dtype, name='encoding')
super().build(input_shape)
def call(self, inputs, mask=None):
mask = tf.cast(mask, dtype=self.dtype)
mask = tf.expand_dims(mask, axis=-1)
return tf.reduce_sum(mask * self.encoding * inputs, axis=-2)
def compute_mask(self, inputs, mask=None):
if mask is None:
return None
return tf.reduce_any(mask, axis=-1)
def compute_output_shape(self, input_shape):
return (input_shape[0], input_shape[-1])
if I build model with encoding in it like this:
encoding = Encoding(name='encoded')
story_encoded = encoding(story_embedded)
query_encoded = encoding(query_embedded)
It can run without error. But of I use TimeDistributed to my encoding:
encoding = Encoding(name='encoded')
story_encoded = TimeDistributed(encoding, name='story_encoding')(story_embedded)
query_encoded = TimeDistributed(encoding, name='query_encoding')(query_embedded)
It raise error like this:
ValueError Traceback (most recent call last)
<ipython-input-15-6c1bbfdcab4e> in <module>()
42 shuffle=True,
43 callbacks=callbacks,
---> 44 verbose=1
45 )
46
1 frames
/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in autograph_handler(*args, **kwargs)
1145 except Exception as e: # pylint:disable=broad-except
1146 if hasattr(e, "ag_error_metadata"):
-> 1147 raise e.ag_error_metadata.to_exception(e)
1148 else:
1149 raise
ValueError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function *
return step_function(self, iterator)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1010, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1000, in run_step **
outputs = model.train_step(data)
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 859, in train_step
y_pred = self(x, training=True)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
ValueError: Exception encountered when calling layer "encoded" (type Encoding).
in user code:
File "<ipython-input-4-9d7376af8f80>", line 26, in call *
mask = tf.cast(mask, dtype=self.dtype)
ValueError: None values not supported.
Call arguments received:
• inputs=tf.Tensor(shape=(30, 3, 80), dtype=float32)
• mask=None

Pytorch: RuntimeError: mat1 dim 1 must match mat2 dim 0 using 2D CNN model with pretrained vgg_16

I am working on cardiac CT data(axial, sagittal, coronal). I am using
the pre-trained model vgg_16. But got the following error. According
to this error, my dimension is not correct but according to my code,
I write things coreectly Can somebody guide me regarding this I try
to correct my code already but got the same error? Below is Error and
Code.
Traceback (most recent call last):
File "ct_pretrained.py", line 199, in <module>
loss, metric = train(model, train_loader, optimizer)
File "ct_pretrained.py", line 57, in train
output = model(axial, sagittal, coronal, emr)
File "/root/miniconda/lib/python3.8/site->packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/data/heart_ct/torch/models/test.py", line 38, in forward
axial_feature = self.axial_model(axial)
File "/root/miniconda/lib/python3.8/site->packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/root/miniconda/lib/python3.8/site->packages/torchvision/models/vgg.py", line 46, in forward
x = self.classifier(x)
File "/root/miniconda/lib/python3.8/site->packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/root/miniconda/lib/python3.8/site->packages/torch/nn/modules/linear.py", line 93, in forward
return F.linear(input, self.weight, self.bias)
File "/root/miniconda/lib/python3.8/site-> packages/torch/nn/functional.py", line 1690, in linear
ret = torch.addmm(bias, input, weight.t())
RuntimeError: mat1 dim 1 must match mat2 dim 0
The code is:
import torch
import torch.nn as nn
from torchvision import models
__all__ = ['VGG']
class VGG(nn.Module):
def __init__(self, is_emr=False, mode='sum'):
super().__init__()
self.is_emr = is_emr
self.mode = mode
in_dim = 45
self.axial_model = models.vgg16(pretrained=True)
out_channels = self.axial_model.features[0].out_channels
self.axial_model.features[0] = nn.Conv2d(1, out_channels, kernel_size=7, stride=1, padding=0, bias=False)
self.axial_model.features[3] = nn.MaxPool2d(1)
num_ftrs = self.axial_model.classifier.in_features #error in this line of code
self.axial_model.classifier = nn.Linear(num_ftrs, 15)
self.sa_co_model = models.vgg16(pretrained=True)
self.sa_co_model.features[0] = nn.Conv2d(1, out_channels, kernel_size=7, stride=1, padding=(3,0), bias=False)
self.sa_co_model.features[3] = nn.MaxPool2d(1)
self.sa_co_model.classifier = nn.Linear(num_ftrs, 15)
if self.is_emr:
self.emr_model = EMRModel()
if self.mode == 'concat': in_dim = 90
self.classifier = Classifier(in_dim)
def forward(self, axial, sagittal, coronal, emr):
axial = axial[:,:,:-3,:-3]
sagittal = sagittal[:,:,:,:-3]
coronal = coronal[:,:,:,:-3]
axial_feature = self.axial_model(axial)
sagittal_feature = self.sa_co_model(sagittal)
coronal_feature = self.sa_co_model(coronal)
out = torch.cat([axial_feature, sagittal_feature, coronal_feature], dim=1)
if self.is_emr:
emr_feature = self.emr_model(emr)
if self.mode == 'concat':
out = torch.cat([out, emr_feature], dim=1)
elif self.mode == 'sum':
out += emr_feature
out = self.classifier(out)
return out
As the error suggests: there's a mismatch between the dimension of the computed features out of your backbones and the dimensions of the classification layer of self.classifier.
Try to inspect (in debug mode, or using print) the shape and dimensions of out and what is in_dim of the classifier layer.

'Net' object has no attribute 'parameters'

I am fairly new to machine learning. I learned to write this code from youtube tutorials but I keep getting this error
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/Users/aniket/Desktop/DeepLearning/PythonLearningPyCharm/CatVsDogs.py", line 109, in <module>
optimizer = optim.Adam(net.parameters(), lr=0.001) # tweaks the weights from what I understand
AttributeError: 'Net' object has no attribute 'parameters'
this is the Net class
class Net():
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1,32,5)
self.conv2 = nn.Conv2d(32,64,5)
self.conv3 = nn.Conv2d(64,128,5)
self.to_linear = None
x = torch.randn(50,50).view(-1,1,50,50)
self.Conv2d_Linear_Link(x)
self.fc1 = nn.Linear(self.to_linear, 512)
self.fc2 = nn.Linear(512, 2)
def Conv2d_Linear_Link(self , x):
x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
x = F.max_pool2d(F.relu(self.conv2(x)),(2,2))
x = F.max_pool2d(F.relu(self.conv3(x)),(2,2))
if self.to_linear is None :
self.to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
return x
def forward(self, x):
x = self.Conv2d_Linear_Link(x)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.softmax(x, dim=1)
and this is the function train
def train():
for epoch in range(epochs):
for i in tqdm(range(0,len(X_train), batch)):
batch_x = train_X[i:i + batch].view(-1, 1, 50, 50)
batch_y = train_y[i:i + batch]
net.zero_grad() # i don't understand why we do this but we do we don't want the probabilites adding up
output = net(batch_x)
loss = loss_function(output, batch_y)
loss.backward()
optimizer.step()
print(loss)
and the optimizer and loss functions and data
optimizer = optim.Adam(net.parameters(), lr=0.001) # tweaks the weights from what I understand
loss_function = nn.MSELoss() # gives the loss
You're not subclassing nn.Module. It should look like this:
class Net(nn.Module):
def __init__(self):
super().__init__()
This allows your network to inherit all the properties of the nn.Module class, such as the parameters attribute.
You may have a spelling problem and you should look to Net which parameters has.
You need to import optim from torch
from torch import optim

Does IOError: [Errno 2] No such file or directory: mean the file hasn't been written?

I'm using Tweepy for the first time. Currently getting this error
---------------------------------------------------------------------------
IOError Traceback (most recent call last)
<ipython-input-11-cdd7ebe0c00f> in <module>()
----> 1 data_json = io.open('raw_tweets.json', mode='r', encoding='utf-8').read() #reads in the JSON file
2 data_python = json.loads(data_json)
3
4 csv_out = io.open('tweets_out_utf8.csv', mode='w', encoding='utf-8') #opens csv file
IOError: [Errno 2] No such file or directory: 'raw_tweets.json'
I've got a feeling that the code I've got isn't working. For example print(status) doesn't print anything. Also I see no saved CSV or JSON file in the directory.
I'm a newbie so any help/documentation you can offer would be great!
import time
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import os
import json
import csv
import io
from pymongo import MongoClient
ckey = 'blah'
consumer_secret = 'blah'
access_token_key = 'blah'
access_token_secret = 'blah'
#start_time = time.time() #grabs the system time
keyword_list = ['keyword'] #track list
#Listener Class Override
class listener(StreamListener):
def __init__(self, start_time, time_limit=60):
self.time = start_time
self.limit = time_limit
self.tweet_data = []
def on_data(self, data):
saveFile = io.open('raw_tweets.json', 'a', encoding='utf-8')
while (time.time() - self.time) < self.limit:
try:
self.tweet_data.append(data)
return True
except BaseException, e:
print 'failed ondata,', str(e)
time.sleep(5)
pass
saveFile = io.open('raw_tweets.json', 'w', encoding='utf-8')
saveFile.write(u'[\n')
saveFile.write(','.join(self.tweet_data))
saveFile.write(u'\n]')
saveFile.close()
exit()
def on_error(self, status):
print status
class listener(StreamListener):
def __init__(self, start_time, time_limit=10):
self.time = start_time
self.limit = time_limit
def on_data(self, data):
while (time.time() - self.time) < self.limit:
print(data)
try:
client = MongoClient('blah', 27017)
db = client['blah']
collection = db['blah']
tweet = json.loads(data)
collection.insert(tweet)
return True
except BaseException as e:
print('failed ondata,')
print(str(e))
time.sleep(5)
pass
exit()
def on_error(self, status):
print(status)
data_json = io.open('raw_tweets.json', mode='r', encoding='utf-8').read() #reads in the JSON file
data_python = json.loads(data_json)
csv_out = io.open('tweets_out_utf8.csv', mode='w', encoding='utf-8') #opens csv file
UPDATED: Creates file but file is empty
import tweepy
import datetime
auth = tweepy.OAuthHandler('xxx', 'xxx')
auth.set_access_token('xxx', 'xxx')
class listener(tweepy.StreamListener):
def __init__(self, timeout, file_name, *args, **kwargs):
super(listener, self).__init__(*args, **kwargs)
self.start_time = None
self.timeout = timeout
self.file_name = file_name
self.tweet_data = []
def on_data(self, data):
if self.start_time is None:
self.start_time = datetime.datetime.now()
while (datetime.datetime.now() - self.start_time).seconds < self.timeout:
with open(self.file_name, 'a') as data_file:
data_file.write('\n')
data_file.write(data)
def on_error(self, status):
print status
l = listener(60, 'stack_raw_tweets.json')
mstream = tweepy.Stream(auth=auth, listener=l)
mstream.filter(track=['python'], async=True)
You are not creating a Stream for the listener. The last but one line of the code below does that. Followed by that you have to start the Stream, which is the last line. I must warn you that storing this in mongodb is the right thing to do as the file that I am storing it seems to grow easily to several GB. Also the file is not exactly a json. Each line in the file is a json. You must tweak it to your needs.
import tweepy
import datetime
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
class listener(tweepy.StreamListener):
def __init__(self, timeout, file_name, *args, **kwargs):
super(listener, self).__init__(*args, **kwargs)
self.start_time = None
self.timeout = timeout
self.file_name = file_name
self.tweet_data = []
def on_data(self, data):
if self.start_time is None:
self.start_time = datetime.datetime.now()
while (datetime.datetime.now() - self.start_time).seconds < self.timeout:
with open(self.file_name, 'a') as data_file:
data_file.write('\n')
data_file.write(data)
def on_error(self, status):
print status
l = listener(60, 'raw_tweets.json')
mstream = tweepy.Stream(auth=auth, listener=l)
mstream.filter(track=['python'], async=True)