Related
So, the questionn is this:
What I am doing wrong when defining the neural net architecture? Look at sections Define the neural network model and Define the learning rate scheduler train the model
Details:
I have written the code of this where revenue_data shape is (1749, 2) while weather_data shape is (86990, 10) X_train shape is ([69010, 14]), y_train is ([69010]), X_val is ([17253, 14]), y_val = ([17253]) and have done the preprocesing, scaling, removing oputliers and splitting the data as here:
Convert date and time columns to datetime format
revenue_data['Date'] = pd.to_datetime(revenue_data['Date'], format='%Y%m%d')
weather_data['dt'] = pd.to_datetime(weather_data['dt'], format='%Y%m%d')
weather_data['time'] = pd.to_datetime(weather_data['time'], format='%H:%M:%S')
Convert wind and condition columns to embeddings
wind_embeddings = nn.Embedding(len(weather_data['wind'].unique()), 5)
weather_data['wind_code'] = weather_data['wind'].astype('category').cat.codes
wind_vectors = wind_embeddings(torch.tensor(weather_data['wind_code'].values, dtype=torch.long))
weather_data['wind_x'] = wind_vectors[:, 0].detach().numpy()
weather_data['wind_y'] = wind_vectors[:, 1].detach().numpy()
weather_data['wind_z'] = wind_vectors[:, 2].detach().numpy()
weather_data['wind_t'] = wind_vectors[:, 3].detach().numpy()
weather_data['wind_u'] = wind_vectors[:, 4].detach().numpy()
condition_embeddings = nn.Embedding(len(weather_data['condition'].unique()), 3)
weather_data['condition_code'] = weather_data['condition'].astype('category').cat.codes
condition_vectors = condition_embeddings(torch.tensor(weather_data['condition_code'].values, dtype=torch.long))
weather_data['condition_x'] = condition_vectors[:, 0].detach().numpy()
weather_data['condition_y'] = condition_vectors[:, 1].detach().numpy()
weather_data['condition_z'] = condition_vectors[:, 2].detach().numpy()
Group the weather data by date and hour and calculate the mean for each date and hour
weather_data = weather_data.groupby(['dt', 'time']).mean()
weather_data = weather_data.reset_index()
weather_data['Date'] = weather_data['dt']
weather_data.drop(['dt', 'time', 'wind_code', 'condition_code'], axis=1, inplace=True)
Merge the revenue and weather data on the 'Date' column and drop 'Date'
merged_data = pd.merge(revenue_data, weather_data, on='Date')
merged_data.drop('Date', axis=1, inplace=True)
merged_data.head()
Scale the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(merged_data)
Split the data into input and target sets
X = scaled_data[:, 1:]
y = scaled_data[:, 0]
from scipy.stats import zscore
Calculate z-scores for each feature | Remove outliers that have z-scor bigger that 3
z_scores = zscore(X)
Identify rows where any feature has a z-score > 3
mask = (z_scores > 3).any(axis=1)
Remove rows with high z-scores from the x and y
features = X[~mask, :]
target = y[~mask]
Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
Convert the data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)
but the I am struggling to realise what is wrong with the neural net architecture defined:
Define the neural network model
class RevenuePredictor(nn.Module):
def __init__(self):
super().__init__()
self.lstm = nn.LSTM(input_size=14, hidden_size=32, num_layers=1, batch_first=True)
self.fc1 = nn.Linear(32, 16)
self.fc2 = nn.Linear(16, 1)
def forward(self, x, lengths):
print('x shape:', x.shape)
Get the lengths of the input sequences
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lengths = lengths.to(device)
lengths = lengths.cpu()
print('lengths shape:', lengths.shape)
Sort the input sequences by length
sorted_lengths, sorted_idx = torch.sort(lengths, descending=True)
sorted_x = x[sorted_idx]
Pack the sorted input sequences
packed_x = nn.utils.rnn.pack_padded_sequence(sorted_x, sorted_lengths, batch_first=True)
Convert the packed sequence to a tensor with two dimensions
x_data, batch_sizes = nn.utils.rnn.pad_packed_sequence(packed_x, batch_first=True)
Convert the packed sequence to a tensor with two dimensions
x_data, batch_sizes = x.data, x.batch_sizes
seq_len = batch_sizes[0]
batch_size = len(batch_sizes)
x = x_data.new_zeros((batch_size, seq_len, 14))
s = 0
for i, l in enumerate(batch_sizes):
x[i, :l] = x_data[s:(s+l)]
s += l
Pass the packed input sequences through the LSTM
lstm_output, (h, c) = self.lstm(packed_x)
Unpack the LSTM output sequences
unpacked_output, _ = nn.utils.rnn.pad_packed_sequence(lstm_output, batch_first=True)
Re-sort the output sequences to their original order
unsorted_idx = sorted_idx.sort(0)
output = unpacked_output[unsorted_idx]
Pass the output sequences through the fully connected layers
output = nn.functional.relu(self.fc1(output[:, -1, :]))
output = self.fc2(output)
return output
Then Create the model
model = RevenuePredictor()
followed by loss and metrics
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
metrics = {
'mse': MeanSquaredError(),
'mae': MeanAbsoluteError(),
'r2': R2Score(),
}
Define the learning rate scheduler train the model
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)
best_val_loss = np.inf
for epoch in range(num_epochs):
# Set the model to training mode
model.train()
train_loss = 0.0
num_batches = 0
for X_train, y_train in train_loader:
lengths = torch.ones(X_train.shape[0], dtype=torch.long)
optimizer.zero_grad()
output = model(X_train, lengths)
loss = loss_fn(output, y_train)
loss.backward()
optimizer.step()
train_loss += loss.item()
num_batches += 1
val_loss = 0.0
for X_val, y_val in val_loader:
lengths = torch.ones(X_val.shape[0], dtype=torch.long)
output = model(X_val, lengths)
loss = loss_fn(output, y_val)
val_loss += loss.item()
scheduler.step(val_loss)
val_loss /= len(val_loader)
val_mse = metrics['mse'].compute()
val_mae = metrics['mae'].compute()
val_r2 = metrics['r2'].compute()
for metric in metrics.values():
metric.reset()
if (epoch+1) % 100 == 0:
print('Epoch [{}/{}], Train Loss: {:.4f}, Val Loss: {:.4f}, MSE: {:.4f}, MAE: {:.4f}, R2: {:.4f}'
.format(epoch+1, num_epochs, train_loss/num_batches, val_loss, val_mse, val_mae, val_r2))
I get this error which I think is because of something being wwrong in defining neural netwrok model:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 3326, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-164-e20b93c25048>", line 3, in <module>
output = model(X_train, lengths)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "<ipython-input-163-43b2ef5c15db>", line 38, in forward
lstm_output, (h, c) = self.lstm(packed_x)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/rnn.py", line 772, in forward
self.check_forward_args(input, hx, batch_sizes)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/rnn.py", line 697, in check_forward_args
self.check_input(input, batch_sizes)
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/rnn.py", line 206, in check_input
raise RuntimeError(
# RuntimeError: input must have 2 dimensions, got 1
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/IPython/core/interactiveshell.py", line 2040, in showtraceback
stb = value._render_traceback_()
AttributeError: 'RuntimeError' object has no attribute '_render_traceback_'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/IPython/core/ultratb.py", line 1101, in get_records
return _fixed_getinnerframes(etb, number_of_lines_of_context, tb_offset)
File "/usr/local/lib/python3.8/dist-packages/IPython/core/ultratb.py", line 319, in wrapped
return f(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/IPython/core/ultratb.py", line 353, in _fixed_getinnerframes
records = fix_frame_records_filenames(inspect.getinnerframes(etb, context))
File "/usr/lib/python3.8/inspect.py", line 1515, in getinnerframes
frameinfo = (tb.tb_frame,) + getframeinfo(tb, context)
File "/usr/lib/python3.8/inspect.py", line 1473, in getframeinfo
filename = getsourcefile(frame) or getfile(frame)
File "/usr/lib/python3.8/inspect.py", line 708, in getsourcefile
if getattr(getmodule(object, filename), '__loader__', None) is not None:
File "/usr/lib/python3.8/inspect.py", line 737, in getmodule
file = getabsfile(object, _filename)
File "/usr/lib/python3.8/inspect.py", line 721, in getabsfile
return os.path.normcase(os.path.abspath(_filename))
File "/usr/lib/python3.8/posixpath.py", line 379, in abspath
cwd = os.getcwd()
FileNotFoundError: [Errno 2] No such file or directory
---------------------------------------------------------------------------
I tried coverting the packed sequence to a tensor with two dimensions in different way:
x_data, ba
tch_sizes = x.data, x.batch_sizes
seq_len = batch_sizes[0]
batch_size = len(batch_sizes)
x = x_data.new_zeros((batch_size, seq_len, 14))
s = 0
for i, l in enumerate(batch_sizes):
x[i, :l] = x_data[s:(s+l)]
s += l
Didnt work.
Then tried rehsaping x to have three dimensions like:
batch_size, seq_len, input_size = x.shape
Didn't work and finally tried:
unsqueze(-1) on output after I defined model like:
model = REvenuePredictor()
output = model(X_train, lengths).unsqueeze(-1)
I have an issue with multiprocessing in pytorch in AWS sagemaker. Essentially my GAN model runs fine when using single processing (num workers = 0) in AWS sagemaker, but produces error when using multiprocessing (num workers > 0). One thing weird is that my GAN model runs fine with multiprocessing when I'm using a local environment, but not in AWS Sagemaker, which or may or may not be due to different OS I think.
Traceback (most recent call last):
File "main.py", line 371, in <module>
g_scaler=g_scaler, d_scaler=d_scaler, runtime_log_folder=runtime_log_folder, runtime_log_file_name=runtime_log_file_name)
File "main.py", line 78, in train_fn
for idx, (x, y) in enumerate(loop):
File "/opt/conda/lib/python3.6/site-packages/tqdm/std.py", line 1171, in __iter__
for obj in iterable:
File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 525, in __next__
(data, worker_id) = self._next_data()
File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 1252, in _next_data
return (self._process_data(data), w_id)
File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 1299, in _process_data
data.reraise()
File "/opt/conda/lib/python3.6/site-packages/torch/_utils.py", line 429, in reraise
raise self.exc_type(msg)
File "/opt/conda/lib/python3.6/site-packages/botocore/exceptions.py", line 84, in __init__
super(HTTPClientError, self).__init__(**kwargs)
File "/opt/conda/lib/python3.6/site-packages/botocore/exceptions.py", line 40, in __init__
msg = self.fmt.format(**kwargs)
KeyError: 'error'
---------------------------------------------------------------------------
UnexpectedStatusException Traceback (most recent call last)
<ipython-input-1-81655136a841> in <module>
58 py_version='py3')
59
---> 60 pytorch_estimator.fit({'train': Runtime.dataset_path}, job_name=Runtime.job_name)
61
62 #print(pytorch_estimator.latest_job_tensorboard_artifacts_path())
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/sagemaker/estimator.py in fit(self, inputs, wait, logs, job_name, experiment_config)
955 self.jobs.append(self.latest_training_job)
956 if wait:
--> 957 self.latest_training_job.wait(logs=logs)
958
959 def _compilation_job_name(self):
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/sagemaker/estimator.py in wait(self, logs)
1954 # If logs are requested, call logs_for_jobs.
1955 if logs != "None":
-> 1956 self.sagemaker_session.logs_for_job(self.job_name, wait=True, log_type=logs)
1957 else:
1958 self.sagemaker_session.wait_for_job(self.job_name)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/sagemaker/session.py in logs_for_job(self, job_name, wait, poll, log_type)
3751
3752 if wait:
-> 3753 self._check_job_status(job_name, description, "TrainingJobStatus")
3754 if dot:
3755 print()
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/sagemaker/session.py in _check_job_status(self, job, desc, status_key_name)
3304 ),
3305 allowed_statuses=["Completed", "Stopped"],
-> 3306 actual_status=status,
3307 )
3308
UnexpectedStatusException: Error for Training job 2022-06-03-05-16-49-pix2pix-U12239-2022-05-09-14-39-18-training: Failed. Reason: AlgorithmError: ExecuteUserScriptError:
Command "/opt/conda/bin/python3.6 main.py --runtime_var dataset_name=U12239-2022-05-09-14-39-18,job_name=2022-06-03-05-16-49-pix2pix-U12239-2022-05-09-14-39-18-training,model_name=pix2pix"
0%| | 0/248 [00:00<?, ?it/s]
0%| | 1/248 [00:30<2:07:28, 30.97s/it]
0%| | 1/248 [00:30<2:07:28, 30.97s/it]
Traceback (most recent call last):
File "main.py", line 371, in <module>
g_scaler=g_scaler, d_scaler=d_scaler, runtime_log_folder=runtime_log_folder, runtime_log_file_name=runtime_log_file_name)
File "main.py", line 78, in train_fn
for idx, (x, y) in enumerate(loop):
File "/opt/conda/lib/python3.6/site-packages/tqdm/std.py", line 1171, in __iter__
for obj in iterable:
File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 525, in __next__
(data, worker_id) = self._next_data()
File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 1252, in _next_data
return (self
This is the code that produces the error
def train_fn(disc, gen, loader, opt_disc, opt_gen, l1, bce, g_scaler, d_scaler,runtime_log_folder,runtime_log_file_name):
total_output=''
loop = tqdm(loader, leave=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Loop")
print(loop)
print("Length loop")
print(len(loop))
for idx, (x, y) in enumerate(loop): #<--error happens here
print("Loop index")
print(idx)
print("Loop item")
print(x,y)
x = x.to(device)
y = y.to(device)
# train discriminator
with torch.cuda.amp.autocast():
y_fake = gen(x)
D_real = disc(x, y)
D_fake = disc(x, y_fake.detach())
# use detach so as to avoid breaking computational graph when do optimizer.step on discriminator
# can use detach, or when do loss.backward put loss.backward(retain_graph = True)
D_real_loss = bce(D_real, torch.ones_like(D_real))
D_fake_loss = bce(D_fake, torch.ones_like(D_fake))
D_loss = (D_real_loss + D_fake_loss) / 2
# log tensorboard
disc.zero_grad()
d_scaler.scale(D_loss).backward()
d_scaler.step(opt_disc)
d_scaler.update()
# train generator
with torch.cuda.amp.autocast():
D_fake = disc(x, y_fake)
# compute fake loss
# trick discriminator to believe these are real, hence send in torch.oneslikedfake
G_fake_loss = bce(D_fake, torch.ones_like(D_fake))
# compute L1 loss
L1 = l1(y_fake, y) * args.l1_lambda
G_loss = G_fake_loss + L1
# log tensorboard
opt_gen.zero_grad()
g_scaler.scale(G_loss).backward()
g_scaler.step(opt_gen)
g_scaler.update()
# print epoch, generator loss, discriminator loss
print(f'[Epoch {epoch}/{args.num_epochs} (b: {idx})] [D loss: {D_loss}, D real loss: {D_real_loss}, D fake loss: {D_fake_loss}] [G loss: ##{G_loss}, G fake loss: {G_fake_loss}, L1 loss: {L1}]')
output = f'[Epoch {epoch}/{args.num_epochs} (b: {idx})] [D loss: {D_loss}, D real loss: {D_real_loss}, D fake loss: {D_fake_loss}] [G loss: ##{G_loss}, G fake loss: {G_fake_loss}, L1 loss: {L1}]\n'
total_output+=output
runtime_log = get_json_file_from_s3(runtime_log_folder, runtime_log_file_name)
runtime_log += total_output
upload_json_file_to_s3(runtime_log_folder,runtime_log_file_name,json.dumps(runtime_log))
def __getitem__(self, index):
print("Index ",index)
pair_key = self.list_files[index]
print("Pair key ",pair_key)
pair = Boto.s3_client.list_objects(Bucket=Boto.bucket_name, Prefix=pair_key, Delimiter='/')
input_image_key = pair.get('Contents')[1].get('Key')
input_image_path = f's3://{Boto.bucket_name}/{input_image_key}'
print("Input image path ",input_image_path)
input_image_s3_source = get_file_from_filepath(input_image_path)
input_image = np.array(Image.open(input_image_s3_source))
target_image_key = pair.get('Contents')[0].get('Key')
target_image_path = f's3://{Boto.bucket_name}/{target_image_key}'
print("Target image path ",target_image_path)
target_image_s3_source = get_file_from_filepath(target_image_path)
target_image = np.array(Image.open(target_image_s3_source))
augmentations = config.both_transform(image=input_image, image0=target_image)
# get input image and target image by doing augmentations of images
input_image, target_image = augmentations['image'], augmentations['image0']
input_image = config.transform_only_input(image=input_image)['image']
target_image = config.transform_only_mask(image=target_image)['image']
print("Input image size ",input_image.size())
print("Target image size ",target_image.size())
return input_image, target_image
Here are a summary of traces of the failure points from logs
i) 2022-06-03-05-00-04-pix2pix-U12239-2022-05-09-14-39-18-training
No index shown
[Epoch 0/100 (b: 0)]
ii) 2022-06-03-05-16-49-pix2pix-U12239-2022-05-09-14-39-18-training
Index 160
[Epoch 0/100 (b: 0)]
iii) 2022-06-03-05-44-46-pix2pix-U12239-2022-05-09-14-39-18-training
Index 160
[Epoch 0/100 (b: 0)]
iv) 2022-06-03-06-08-33-pix2pix-U12239-2022-05-09-14-39-18-training
Index 160
[Epoch 1/100 (b: 0)]
v) 2022-06-15-02-49-20-pix2pix-U12239-2022-05-09-14-39-18-training
Index 160
Pair key datasets/training-data/testing/2022-05-09-14-39-18/match-raws-finals/U12239/P423712/Pair_71/
[Epoch 0/100 (b: 0)
vi) 2022-06-15-02-59-43-pix2pix-U12239-2022-05-09-14-39-18-training
Index 64
Pair key datasets/training-data/testing/2022-05-09-14-39-18/match-raws-finals/U12239/P425642/Pair_27/
[Epoch 0/100 (b: 247)]
vii) 2022-06-15-04-49-33-pix2pix-U12239-2022-05-09-14-39-18-training
Index 64
Pair key datasets/training-data/testing/2022-05-09-14-39-18/match-raws-finals/U12239/P415414/Pair_124/
No specific epoch
Essentially it seems to fail either at the start of the epoch(batch 0) or end of epoch(batch 247), and due to multiprocessing. Does anyone have suggestions on how to fix it please?
everyone!
I am working on an object detection project using the VGG network in the PASCAL VOC dataset. I used custom dataset loading to load the PASCAL VOC dataset. And coded network from scratch. (They are similar to PyTorch's Vision method).
Currently, I'm getting the following error:
Traceback (most recent call last):
File "/home/khushi/Documents/deep-learning/benchmarking-deep-neural-networks/vgg/main.py", line 59, in <module>
main()
File "/home/khushi/Documents/deep-learning/benchmarking-deep-neural-networks/vgg/main.py", line 55, in main
train(data, model, num_epochs, criteria, optimizer)
File "/home/khushi/Documents/deep-learning/benchmarking-deep-neural-networks/vgg/main.py", line 21, in train
outputs = model(image)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/khushi/Documents/deep-learning/benchmarking-deep-neural-networks/vgg/vgg_torch.py", line 39, in forward
x = self.features(x)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/container.py", line 141, in forward
input = module(input)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/conv.py", line 446, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/home/khushi/.local/lib/python3.9/site-packages/torch/nn/modules/conv.py", line 442, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
TypeError: conv2d() received an invalid combination of arguments - got (Image, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (Image, Parameter, Parameter, tuple, tuple, tuple, int)
* (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (Image, Parameter, Parameter, tuple, tuple, tuple, int)
The code I am implementing:
import vgg_torch
import voc_loader
import time
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
num_epochs = 5
learning_rate = 0.01
# https://github.com/khushi-411/tutorials/pytorch
def train(data, model, num_epochs, criteria, optimizer):
steps = len(data)
for epochs in range(num_epochs):
for i, (image, target) in enumerate(data):
# forward pass
# https://stackoverflow.com/questions/57237381
#outputs = model(image[None, ...])
outputs = model(image)
loss = criteria(outputs, target)
# backward pass and optimization
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epochs+1, num_epochs, i+1, steps, loss.item()))
def main():
# give absolute path to dataset
# https://stackoverflow.com/questions/56741108
data = voc_loader.VOCDetection('/home/khushi/Documents/deep-learning/datasets/pascal-voc/')
""",
transform=transforms.Compose([
transforms.ToTensor(),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
)
"""
# Load model: vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19, vgg19_bn
model = vgg_torch.vgg11()
print(model)
# Loss function and optimizer
criteria = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# training
start = time.time()
train(data, model, num_epochs, criteria, optimizer)
print("Total time taken to train: ", time.time() - start)
if __name__ == "__main__":
main()
Dependencies
PyTorch: 1.10.0+cu102
OS: Manjaro Distro
RAM: 16GB
Will anyone please help me out to resolve this error? Thanks!
gdd.forward(x) call error, but why?
This code uses imcol to implement the convolution layer
Traceback (most recent call last):
File "E:/PycharmProjects/untitled2/kk.py", line 61, in <module>
gdd.forward(x)
File "E:/PycharmProjects/untitled2/kk.py", line 46, in forward
FN,C,FH,FW=self.W.shape
ValueError: not enough values to unpack (expected 4, got 2)
import numpy as np
class Convolution:
# 卷积核大小
def __init__(self,W,b,stride=1,pad=0):
self.W = W
self.b = b
self.stride = stride
self.pad = pad
def forward(self,x):
FN,C,FH,FW=self.W.shape
N,C,H,W = x.shape
out_h = int(1+(H+ 2*self.pad - FH) / self.stride)
out_w = int(1+(W + 2*self.pad -FW) / self.stride)
e = np.array([[2,0,1],[0,1,2],[1,0,2]])
x = np.array([[1,2,3,0],[0,1,2,3],[3,0,1,2],[2,3,0,1]])
gdd = Convolution(e,3,1,0)
gdd.forward(x)
not enough value to unpack means that there are 2 outputs, but you are expecting 4:
FN,C,FH,FW=self.W.shape
just get rid of 2 of them and you are good to go :)
BTW I'm assuming you speak Chinese? 我说中文, 不懂可以用中文问一下
I am trying to create a CNN implemented with data augmentation in pytorch to classify dogs and cats. The issue that I am having is that when I try to input my dataset and enumerate through it I keep getting this error:
Traceback (most recent call last):
File "<ipython-input-55-6337e0536bae>", line 75, in <module>
for i, (inputs, labels) in enumerate(trainloader):
File "/usr/local/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 188, in __next__
batch = self.collate_fn([self.dataset[i] for i in indices])
File "/usr/local/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 188, in <listcomp>
batch = self.collate_fn([self.dataset[i] for i in indices])
File "/usr/local/lib/python3.6/site-packages/torchvision/datasets/folder.py", line 124, in __getitem__
img = self.transform(img)
File "/usr/local/lib/python3.6/site-packages/torchvision/transforms/transforms.py", line 42, in __call__
img = t(img)
File "/usr/local/lib/python3.6/site-packages/torchvision/transforms/transforms.py", line 147, in __call__
return F.resize(img, self.size, self.interpolation)
File "/usr/local/lib/python3.6/site-packages/torchvision/transforms/functional.py", line 197, in resize
return img.resize((ow, oh), interpolation)
File "/usr/local/lib/python3.6/site-packages/PIL/Image.py", line 1724, in resize
raise ValueError("unknown resampling filter")
ValueError: unknown resampling filter
and I really dont know whats wrong with my code. I have provided the code below:
# Creating the CNN
# Importing the libraries
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import torchvision
from torchvision import transforms
#Creating the CNN Model
class CNN(nn.Module):
def __init__(self, nb_outputs):
super(CNN, self).__init__() #activates the inheritance and allows the use of all the tools in the nn.Module
#making the 3 convolutional layers that will be used in the convolutional neural network
self.convolution1 = nn.Conv2d(in_channels = 1, out_channels = 32, kernel_size = 5) #kernal_size -> the deminson of the feature detector e.g kernel_size = 5 => feature detector of size 5x5
self.convolution2 = nn.Conv2d(in_channels = 32, out_channels = 64, kernel_size = 2)
#making 2 full connections one to connect the inputs of the ANN to the hidden layer and another to connect the hidden layer to the outputs of the ANN
self.fc1 = nn.Linear(in_features = self.count_neurons((1, 64,64)), out_features = 40)
self.fc2 = nn.Linear(in_features = 40, out_features = nb_outputs)
def count_neurons(self, image_dim):
x = Variable(torch.rand(1, *image_dim)) #this variable repersents a fake image to allow us to compute the number of neruons
#in order to pass the elements of the tuple image_dim into our function as a list of arguments we need to add a * before image_dim
#since x will be going into our neural network we need to convert it into a torch variable using the Variable() function
x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2)) #first we apply the convolution to x then apply max_pooling to the convolutional fake images and then activate all the neurons in the pooling layer
x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2)) #the signals are now propragated up to the thrid convoulational layer
#Now to flatten x to obtain the number of neurons in the flattening layer
return x.data.view(1, -1).size(1) #this will flatten x into a huge vector and returns the size of the vector, that size repersents the number of neurons that will be inputted into the ANN
#even though x is not a real image from the game since the size of the flattened vector only depends on the dimention of the inputted image we can just set x to have the same dimentions as the image
def forward(self, x):
x = F.relu(F.max_pool2d(self.convolution1(x), 3, 2)) #first we apply the convolution to x then apply max_pooling to the convolutional fake images and then activate all the neurons in the pooling layer
x = F.relu(F.max_pool2d(self.convolution2(x), 3, 2))
#flattening layer of the CNN
x = x.view(x.size(0), -1)
#x is now the inputs to the ANN
x = F.relu(self.fc1(x)) #we propagte the signals from the flatten layer to the full connected layer and activate the neruons by breaking the linearilty with the relu function
x = F.sigmoid(self.fc2(x))
#x is now the output neurons of the ANN
return x
train_tf = transforms.Compose([transforms.RandomHorizontalFlip(),
transforms.Resize(64,64),
transforms.RandomRotation(20),
transforms.RandomGrayscale(.2),
transforms.ToTensor()])
test_tf = transforms.Compose([transforms.Resize(64,64),
transforms.ToTensor()])
training_set = torchvision.datasets.ImageFolder(root = './dataset/training_set',
transform = train_tf)
test_set = torchvision.datasets.ImageFolder(root = './dataset/test_set',
transform = transforms.Compose([transforms.Resize(64,64),
transforms.ToTensor()]) )
trainloader = torch.utils.data.DataLoader(training_set, batch_size=32,
shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(test_set, batch_size= 32,
shuffle=False, num_workers=0)
#training the model
cnn = CNN(1)
cnn.train()
loss = nn.BCELoss()
optimizer = optim.Adam(cnn.parameters(), lr = 0.001) #the optimizer => Adam optimizer
nb_epochs = 25
for epoch in range(nb_epochs):
train_loss = 0.0
train_acc = 0.0
total = 0.0
for i, (inputs, labels) in enumerate(trainloader):
inputs, labels = Variable(inputs), Variable(labels)
cnn.zero_grad()
outputs = cnn(inputs)
loss_error = loss(outputs, labels)
optimizer.step()
_, pred = torch.max(outputs.data, 1)
total += labels.size(0)
train_loss += loss_error.data[0]
train_acc += (pred == labels).sum()
train_loss = train_loss/len(training_loader)
train_acc = train_acc/total
print('Epoch: %d, loss: %.4f, accuracy: %.4f' %(epoch+1, train_loss, train_acc))
The folder arrangement for the code is /dataset/training_set and inside the training_set folder are two more folders one for all the cat images and the other for all the dog images. Each image is name either dog.xxxx.jpg or cat.xxxx.jpg, where the xxxx represents the number so for the first cat image it would be cat.1.jpg up to cat.4000.jpg. This is the same format for the test_set folder. The number of training images is 8000 and the number of test images is 2000. If anyone can point out my error I would greatly appreciate it.
Thank you
Try to set the desired size in transforms.Resize as a tuple:
transforms.Resize((64, 64))
PIL is using the second argument (in your case 64) as the interpolation method.
in torchvision.transforms.Compose([put every transform in these brackets]),
This, will not give the error.