pytorch: simple recurrent neural network for image classification - deep-learning

I am making a simple recurrent neural network architecture for CIFAR10 image classification. I am not interested not use pre-defined RNN class in PyTorch because i am implementing from scratch according to figure. I am getting input tensor errors in the same device. I am not sure whether my code is right or wrong. Any simple way to write FC layer without defining shape and hard coded parameters.
Figure
Code
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size, num_classes):
super(RNN, self).__init__()
self.hidden_size = hidden_size
self.input_to_hidden = nn.Linear(in_features=input_size + hidden_size, out_features=output_size)
self.input_to_output = nn.Linear(in_features=input_size + hidden_size, out_features=output_size)
self.softmax = nn.LogSoftmax(dim=1)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, input_tensor):
combined = torch.cat((input_tensor, torch.zeros(input_tensor.size(0))), 1)
hidden = self.input_to_hidden(combined)
output = self.input_to_output(combined)
output = self.softmax(output)
return output, hidden
Trackback
Traceback (most recent call last):
File "/media/cvpr/CM_1/tutorials/rnn.py", line 81, in <module>
outputs = model(images)
File "/home/cvpr/anaconda3/envs/tutorials/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/media/cvpr/CM_1/tutorials/rnn.py", line 33, in forward
combined = torch.cat((input_tensor, torch.zeros(input_tensor.size(0))), 1)
RuntimeError: All input tensors must be on the same device. Received cuda:0 and cpu

You need to make sure the tensors are on the same device (cpu/gpu) before you are contacting them
you can add a device parameter to your class and use it:
class RNN(nn.Module):
def __init__(self, input_size, hidden_size, output_size, num_classes, device='cuda'):
super(RNN, self).__init__()
self.device = device
self.hidden_size = hidden_size
self.input_to_hidden = nn.Linear(in_features=input_size + hidden_size, out_features=output_size)
self.input_to_output = nn.Linear(in_features=input_size + hidden_size, out_features=output_size)
self.softmax = nn.LogSoftmax(dim=1)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, input_tensor):
combined = torch.cat((input_tensor.to(device), torch.zeros(input_tensor.size(0), device=self.device)), 1)
hidden = self.input_to_hidden(combined)
output = self.input_to_output(combined)
output = self.softmax(output)
return output, hidden

Related

Cannot properly load PyTorch Lightning model from checkpoint

I have trained a Pytorch lightning model of the following class:
class LSTMClassifier(pl.LightningModule):
def __init__(self, n_features, hidden_size, batch_size, num_layers, dropout, learning_rate):
super(LSTMClassifier, self).__init__()
self.save_hyperparameters()
# Params
self.n_features = n_features
self.hidden_size = hidden_size
self.batch_size = batch_size
self.num_layers = num_layers
self.dropout = dropout
self.learning_rate = learning_rate
# Architecture Baseline
self.lstm = nn.LSTM(input_size=n_features,
hidden_size=hidden_size,
num_layers=num_layers,
dropout=dropout,
batch_first=True)
self.relu = nn.ReLU()
self.fc = nn.Linear(hidden_size, 2)
self.sigmoid = nn.Sigmoid()
It gives me a test set accuracy of 0.76 when I call the trainer.test() function directly after training:
# Init PyTorch model
model = LSTMClassifier(
n_features=p['n_features'],
hidden_size=p['hidden_size'],
batch_size=p['batch_size'],
num_layers=p['num_layers'],
dropout=p['dropout'],
learning_rate=p['learning_rate']
)
model_checkpoint = ModelCheckpoint(
filename='[PATH.ckpt]'
)
# Trainer GPU
trainer = Trainer(max_epochs=p['max_epochs'], callbacks=[model_checkpoint], gpus=int(GPU))
trainer.fit(model, dm)
trainer.test(model, test_dataloaders=dm.test_dataloader())
However, when I load the checkpoint at a later time with the exact same dataloader, it gives me an accuracy of 0.48:
model_checkpoint = ModelCheckpoint(
filename='LSTM-batch-{batch_size}-epoch-{max_epochs}-hidden-{hidden_size}-layers-{'
'num_layers}-dropout-{dropout}-lr-{learning_rate}'.format(**p)
)
# Trainer GPU
trainer = Trainer(max_epochs=p['max_epochs'], callbacks=[model_checkpoint], gpus=int(GPU))
model = LSTMClassifier.load_from_checkpoint([PATH TO CHECKPOINT])
model.eval()
trainer.test(model, test_dataloaders=dm.test_dataloader())
I suspect the model does not load correctly, but I cannot figure out what to do differently. Any ideas?
Using PyTorch Lightning 1.4.4
Turns out, that trainer.test(model, test_dataloaders=dm.test_dataloader()) was the issue. Once I replaced it with trainer.test(model, datamodule=dm) as per the updated documentation, it works.

input.size(-1) must be equal to input_size. Expected 763, got 1

I am tring to train my model with the batch size of 50. However I am getting error:
input.size(-1) must be equal to input_size. Expected 763, got 1
My code is:
for epoch in range(1, n_epochs + 1):
for i, (x_batch, y_batch) in enumerate(trn_dl):
#model.to(device)
#model.train()
x_batch = x_batch.to(device)
y_batch = y_batch.to(device)
#sched.step()
print('shape of the input batch')
print(x_batch.shape)
opt.zero_grad()
x_batch=torch.unsqueeze(x_batch,2)
print(x_batch.shape)
print(x_batch)
out = model(x_batch) # here I am getting error
y_batch=torch.unsqueeze(y_batch,0)
print('NOW')
print(y_batch.dtype)
y_batch = y_batch.to(torch.float32)
out = out.to(torch.float32)
out=torch.transpose(out,1,0)
loss = loss_function(out, torch.max(y_batch, 1)[1])
#(out, y_batch)
#targets = targets.to(torch.float32)
loss.backward()
opt.step()
My model is:
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size)
self.linear =nn.Linear(hidden_size, output_size)
self.hidden_cell = (torch.zeros(1,1,self.hidden_size),
torch.zeros(1,1,self.hidden_size))
def forward(self, input_seq):
h0 = torch.zeros(1, input_seq.size(0), self.hidden_size).to(device)
c0 = torch.zeros(1, input_seq.size(0), self.hidden_size).to(device)
lstm_out, _ = self.lstm(input_seq, (h0,c0))
lstm_out = self.fc(lstm_out[:, -1, :])
predictions = self.Linear(lstm_out.view(len(input_seq), -1))
print("predictions",predictions)
return predictions[-1]
Could anyone please look into it and help me.
By the looks of it, you are trying to pick the last step of the LSTM's output: lstm_out[:, -1, :]. However, by default with nn.RNNs the batch axis is second, not first: (sequence_length, batch_size, features). So you end up picking the last batch element, not the last sequence step. You might want to use batch_first=True when initializing your nn.LSTM:
Something like:
self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)

Implement a Network in Network CNN model using pytorch-lightning

I am trying to implement a NiN model. Basically trying to replicate code from d2l Here is my code.
import pandas as pd
import torch
from torch import nn
import torchmetrics
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from torchvision.datasets import FashionMNIST
import wandb
from pytorch_lightning.loggers import WandbLogger
wandb.login()
## class definition
class Lightning_nin(pl.LightningModule):
def __init__(self):
super().__init__()
self.accuracy = torchmetrics.Accuracy(top_k=1)
self.model = nn.Sequential(
self.nin_block(1, 96, kernel_size=11, strides=4, padding=0),
nn.MaxPool2d(3, stride=2),
self.nin_block(96, 256, kernel_size=5, strides=1, padding=2),
nn.MaxPool2d(3, stride=2),
self.nin_block(256, 384, kernel_size=3, strides=1, padding=1),
nn.MaxPool2d(3, stride=2), nn.Dropout(0.5),
# There are 10 label classes
self.nin_block(384, 10, kernel_size=3, strides=1, padding=1),
nn.AdaptiveAvgPool2d((1, 1)),
# Transform the four-dimensional output into two-dimensional output with a
# shape of (batch size, 10)
nn.Flatten())
for layer in self.model:
if type(layer) == nn.Linear or type(layer) == nn.Conv2d:
nn.init.xavier_uniform_(layer.weight)
def nin_block(self,in_channels, out_channels, kernel_size, strides, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, strides, padding),
nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=1),
nn.ReLU(), nn.Conv2d(out_channels, out_channels, kernel_size=1),
nn.ReLU())
def forward(self, x):
x = self.model(x)
return x
def loss_fn(self,logits,y):
loss = nn.CrossEntropyLoss()
return loss(logits,y)
def training_step(self,train_batch,batch_idx):
X, y = train_batch
logits = self.forward(X)
loss = self.loss_fn(logits,y)
self.log('train_loss',loss)
m = nn.Softmax(dim=1)
output = m(logits)
self.log('train_acc',self.accuracy(output,y))
return loss
def validation_step(self,val_batch,batch_idx):
X,y = val_batch
logits = self.forward(X)
loss = self.loss_fn(logits,y)
self.log('test_loss',loss)
m = nn.Softmax(dim=1)
output = m(logits)
self.log('test_acc',self.accuracy(output,y))
def configure_optimizers(self):
optimizer = torch.optim.SGD(self.model.parameters(),lr= 0.1)
return optimizer
class Light_DataModule(pl.LightningDataModule):
def __init__(self,resize= None):
super().__init__()
if resize:
self.resize = resize
def setup(self, stage):
# transforms for images
trans = [transforms.ToTensor()]
if self.resize:
trans.insert(0, transforms.Resize(self.resize))
trans = transforms.Compose(trans)
# prepare transforms standard to MNIST
self.mnist_train = FashionMNIST(root="../data", train=True, download=True, transform=trans)
self.mnist_test = FashionMNIST(root="../data", train=False, download=True, transform=trans)
def train_dataloader(self):
return DataLoader(self.mnist_train, batch_size=128,shuffle=True,num_workers=4)
def val_dataloader(self):
return DataLoader(self.mnist_test, batch_size=128,num_workers=4)
## Train model
data_module = Light_DataModule(resize=224)
wandb_logger = WandbLogger(project="d2l",name ='NIN')
model = Lightning_nin()
trainer = pl.Trainer(logger=wandb_logger,max_epochs=4,gpus=1,progress_bar_refresh_rate =1)
trainer.fit(model, data_module)
wandb.finish()
After running the code I am only getting an accuracy of 0.1. Not sure where I am going wrong. I have been able to implement other CNN (like VGG) using the same template. Not sure where I am going wrong. The accuracy should be close to 0.9 after 10 epochs.
The kernel_size & strides are very big for the image size of 224. It will drastically reduce the information that is passed on to subsequent layers. Try reducing them. Also, VGG was a very carefully designed architecture.

'Net' object has no attribute 'parameters'

I am fairly new to machine learning. I learned to write this code from youtube tutorials but I keep getting this error
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/Users/aniket/Desktop/DeepLearning/PythonLearningPyCharm/CatVsDogs.py", line 109, in <module>
optimizer = optim.Adam(net.parameters(), lr=0.001) # tweaks the weights from what I understand
AttributeError: 'Net' object has no attribute 'parameters'
this is the Net class
class Net():
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1,32,5)
self.conv2 = nn.Conv2d(32,64,5)
self.conv3 = nn.Conv2d(64,128,5)
self.to_linear = None
x = torch.randn(50,50).view(-1,1,50,50)
self.Conv2d_Linear_Link(x)
self.fc1 = nn.Linear(self.to_linear, 512)
self.fc2 = nn.Linear(512, 2)
def Conv2d_Linear_Link(self , x):
x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
x = F.max_pool2d(F.relu(self.conv2(x)),(2,2))
x = F.max_pool2d(F.relu(self.conv3(x)),(2,2))
if self.to_linear is None :
self.to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
return x
def forward(self, x):
x = self.Conv2d_Linear_Link(x)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.softmax(x, dim=1)
and this is the function train
def train():
for epoch in range(epochs):
for i in tqdm(range(0,len(X_train), batch)):
batch_x = train_X[i:i + batch].view(-1, 1, 50, 50)
batch_y = train_y[i:i + batch]
net.zero_grad() # i don't understand why we do this but we do we don't want the probabilites adding up
output = net(batch_x)
loss = loss_function(output, batch_y)
loss.backward()
optimizer.step()
print(loss)
and the optimizer and loss functions and data
optimizer = optim.Adam(net.parameters(), lr=0.001) # tweaks the weights from what I understand
loss_function = nn.MSELoss() # gives the loss
You're not subclassing nn.Module. It should look like this:
class Net(nn.Module):
def __init__(self):
super().__init__()
This allows your network to inherit all the properties of the nn.Module class, such as the parameters attribute.
You may have a spelling problem and you should look to Net which parameters has.
You need to import optim from torch
from torch import optim

Passing word2vec embedding to a custom LSTM pytorch model

I have a set of input sentences. I am using the pretrained word2vec model from gensim to get the embedding of the input sentences. I want to pass these embeddings as input to a custom pytorch LSTM model
hidden_size = 32
num_layers = 1
num_classes = 2
class customModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(customModel, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.bilstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=False, bidirectional=True)
self.fcl = nn.Linear(hidden_size*2, num_classes)
def forward(self, x):
# Set initial hidden and cell states
h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device)
c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device)
# Forward propagate LSTM
out, hidden = self.bilstm(x, (h0, c0))
fw_bilstm = out[-1, :, :self.hidden_size]
bk_bilstm = out[0, :, :self.hidden_size]
concat_fw_bw = torch.cat((fw_bilstm, bk_bilstm), dim = 1)
fc = self.fcl(concat_fw_bw)
x = F.softmax(F.relu(fc))
return x
Now I initialize the model object.
model = customModel(300, hidden_size, num_layers, num_classes)
Get embedding for the input sentences
sentences = [['my', 'name', 'is', 'nad'], ['i', 'love', 'nlp', 'proc']]
embedding = create_embedding(sentences)
embedding_torch = torch.FloatTensor(embedding)
Now I want to pass these embeddings to the model to get the prediction
for item in embedding_torch:
item = item.view((1, item.size()[0], item.size()[1]))
for epoch in range(1):
tag_scores = model(item)
print (tag_scores)
Which throws me runtime error
RuntimeError: Expected hidden[0] size (2, 4, 32), got (2, 1, 32)
I am not sure why this is happening. My understanding is h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) line is calculating the hidden dimension properly.
What am I missing? Please suggest.
The backbone of your model is nn.LSTM which expects inputs with size [sequence_length, batch_size, embedding_size]. On the other hand, the inputs you are providing the model have size [1, sequence_lenth, embedding_size]. What I would do is create the nn.LSTM as:
# With batch_first=True
self.bilstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
That way, the model would expect the inputs to be of size [batch_size, sequence_length, embedding_size]. Then, instead of going through each element in the batch separately, do:
tag_scores = model(embedding_torch)