Can I figure out whether there is a miss correspondence between my loss function and accuracy? - deep-learning

I think my loss does not match the accuracy
Training accuracy of 0.037.
Training loss of 0.35.
I am using CrossEntropyLoss and multiclassAccuracy with 397 classes
This is my model
class ResNetLightningModule(pl.LightningModule):
def __init__(self, num_labels=397, lr=1.0e-03):
super().__init__()
self.save_hyperparameters()
self.loss_function = nn.CrossEntropyLoss()
self.model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
self.model.classifier = nn.Sequential(nn.Dropout(0.5), nn.Flatten(start_dim=1, end_dim=-1), nn.Linear(2048, num_labels))
self.accuracy_top_1 = MulticlassAccuracy(num_classes=self.hparams.num_labels, top_k=1)
self.accuracy_top_5 = MulticlassAccuracy(num_classes=self.hparams.num_labels, top_k=5)
def forward(self, X):
logits = self.model(X).logits
return logits
def training_step(self, batch, batch_idx):
X, y = batch
logits = self(X)
loss = self.loss_function(logits, y)
self.log("train_loss", loss, on_epoch=True)
acc1 = self.accuracy_top_1(logits, y)
self.log("train_accuracy_1", acc1, on_epoch=True, prog_bar=True)
return loss

Related

LSTM for time series forecasting

I have created a LSTM model in Pytorch which looks like this:
LSTMNet
Now I want to build another LSTM model (NewLSTMNet) on top of it (LSTMNet) by freezing the fc1 layer. I used:
model.fc1.weight.requires_grad = False
model.fc1.bias.requires_grad = False
and then I changed fc2 layer with a linear layer with input features = 40 and output features = 40.
So far I did:
class NewLSTMNet(nn.Module):
def __init__(self, model, input_size, hidden_size, num_layers):
super(NewLSTMNet, self).__init__()
self.model = model
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.model.fc2 = nn.Linear(40, 40)
# self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc3 = nn.Sequential(
# nn.ReLU(),
nn.Linear (40 , 128),
nn.ReLU(),
nn.Linear(128, 40),
nn.ReLU(),
nn.Linear(40,1),
nn.ReLU(),
)
def forward(self,x):
# input = self.model(x)
# h0 = Variable(torch.zeros(self.num_layers, input.size(0), self.hidden_size))
# c0 = Variable(torch.zeros(self.num_layers, input.size(0), self.hidden_size))
# _, (h_out,_) = self.lstm(input, (h0,c0))
# h_out = h_out.view(-1, self.hidden_size)
# print(h_out.shape)
# out = self.fc3(out)
out = self.model(x)
out = self.fc3(out)
return out
Now my new LSTM model looks like this:
NewLSTMNet
My training loop looks like this:
for epoch in range(EPOCHS):
model.train()
output = model(X_train)
train_loss = criterion(output, y_train)
optimizer.zero_grad()
train_loss.backward()
optimizer.step()
with torch.no_grad():
model.eval()
output_val = model(X_valid)
valid_loss = criterion(output_val, y_valid)
if valid_loss <= valid_loss_min:
torch.save(model.state_dict(), './state_dict_new.pt')
print(
f'Epoch {epoch + 0:01}: Validation loss decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving model ...')
valid_loss_min = valid_loss
early_stopping_counter = 0 # reset counter if validation loss decreases
else:
print(f'Epoch {epoch + 0:01}: Validation loss did not decrease')
early_stopping_counter += 1
if early_stopping_counter > early_stopping_patience:
print('Early stopped at epoch :', epoch)
break
print(f'\t Train_Loss: {train_loss:.4f} Val_Loss: {valid_loss:.4f} BEST VAL Loss: {valid_loss_min:.4f}\n')
Now the model is working fine. But I want to create a LSTM layer in the NewLSTMNet model. I already tried to add a LSTM layer but I was expecting a vector in the output but I am getting a matrix in the output of the prediction. So there is shape mismatch!
How should I modify my code? Any help is appreciated. Thanks in advance!

Resolve overfitting in a convolutional network

I've written a snippet to classify Omniglot images. I calculate the training and validation losses in each epoch, where the latter is computed using images that were not seen by the network before. The two plots are as below:
Since the training loss decreases while the validation loss increases, I have concluded that my model overfits. I've tried several suggestions (e.g. here) to overcome this, including:
Increasing the size of the training set.
shuffling the data.
Adding dropout layers (up to p=0.9).
Using smaller model.
Altering the architecture.
Changing the learning rate.
Reducing the batch size.
Adding weight decay.
However, the validation loss still increases. I wonder if there are any other suggestions to improve this behavior or if this is not overfitting, but the problem is something else. Below is the snippet used in this question.
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn, optim
from torch.utils.data import DataLoader
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
dim_out = 964
# -- embedding params
self.cn1 = nn.Conv2d(1, 16, 7)
self.cn2 = nn.Conv2d(16, 32, 4)
self.cn3 = nn.Conv2d(32, 64, 3)
self.pool = nn.MaxPool2d(2)
self.bn1 = nn.BatchNorm2d(16)
self.bn2 = nn.BatchNorm2d(32)
self.bn3 = nn.BatchNorm2d(64)
# -- prediction params
self.fc1 = nn.Linear(256, 170)
self.fc2 = nn.Linear(170, 50)
self.fc3 = nn.Linear(50, dim_out)
# -- non-linearity
self.relu = nn.ReLU()
self.Beta = 10
self.sopl = nn.Softplus(beta=self.Beta)
def forward(self, x):
y1 = self.pool(self.bn1(self.relu(self.cn1(x))))
y2 = self.pool(self.bn2(self.relu(self.cn2(y1))))
y3 = self.relu(self.bn3(self.cn3(y2)))
y3 = y3.view(y3.size(0), -1)
y5 = self.sopl(self.fc1(y3))
y6 = self.sopl(self.fc2(y5))
return self.fc3(y6)
class Train:
def __init__(self):
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# -- data
dim = 28
batch_size = 400
my_transforms = transforms.Compose([transforms.Resize((dim, dim)), transforms.ToTensor()])
trainset = torchvision.datasets.Omniglot(root="./data/omniglot_train/", download=False, transform=my_transforms)
validset = torchvision.datasets.Omniglot(root="./data/omniglot_train/", background=False, download=False,
transform=my_transforms)
self.TrainDataset = DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True)
self.ValidDataset = DataLoader(dataset=validset, batch_size=len(validset), shuffle=False)
self.N_train = len(trainset)
self.N_valid = len(validset)
# -- model
self.model = MyModel().to(self.device)
# -- train
self.epochs = 3000
self.loss = nn.CrossEntropyLoss()
self.optimizer = optim.Adam(self.model.parameters(), lr=1e-3)
def train_epoch(self):
self.model.train()
train_loss = 0
for batch_idx, data_batch in enumerate(self.TrainDataset):
# -- predict
predict = self.model(data_batch[0].to(self.device))
# -- loss
loss = self.loss(predict, data_batch[1].to(self.device))
# -- optimize
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
train_loss += loss.item()
return train_loss/(batch_idx+1)
def valid_epoch(self):
with torch.no_grad():
self.model.eval()
for data_batch in self.ValidDataset:
# -- predict
predict = self.model(data_batch[0].to(self.device))
# -- loss
loss = self.loss(predict, data_batch[1].to(self.device))
return loss.item()
def __call__(self):
for epoch in range(self.epochs):
train_loss = self.train_epoch()
valid_loss = self.valid_epoch()
print('Epoch {}: Training loss = {:.5f}, Validation loss = {:.5f}.'.format(epoch, train_loss, valid_loss))
torch.save(self.model.state_dict(), './model_stat.pth')
if __name__ == '__main__':
my_train = Train()
my_train()
If your train accuracy is good but testing (data not used in training) accuracy is bad then you have an overfitting problem. I had the same problem with a CNN model. You can use two methods to overcome overfitting. First is early stopping for your train and second is regularization. Check the below example:
# L2 regularizers for layers
model = keras.Sequential([
keras.layers.InputLayer(input_shape=(32, 32)),
keras.layers.Reshape(target_shape=(32, 32, 1)),
keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation=tf.nn.relu, use_bias=True , kernel_regularizer =tf.keras.regularizers.l2( l=0.01)),
keras.layers.MaxPooling2D(pool_size=(2, 2)),
keras.layers.Flatten(),
keras.layers.Dense(10, activation = 'softmax', use_bias=True)
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy'])
#Early Stopping
history = model.fit(X_train, Y_train,
validation_data=(X_dev, Y_dev),
epochs=4000,
callbacks=EarlyStopping(monitor='val_loss'))
Do not forget to import for early stopping.
from tensorflow.keras.callbacks import EarlyStopping

ValueError: Expected input batch_size (59) to match target batch_size (1)

I'm trying to build a semantic segmentation model with pytorch. However, I encounter this error and do not know how to fix it.
This is the model:
class SegmentationNN(pl.LightningModule):
def __init__(self, num_classes=23, hparams=None):
super().__init__()
self.hparams = hparams
self.model=models.alexnet(pretrained=True).features
self.conv=nn.Conv2d(256, 3, kernel_size=1)
self.upsample = nn.Upsample(size=(240,240))
def forward(self, x):
print('Input:', x.shape)
x = self.model(x)
print('After Alexnet convs:', x.shape)
x = self.conv(x)
print('After 1-conv:', x.shape)
x = self.upsample(x)
print('After upsampling:', x.shape)
return x
def training_step(self, batch, batch_idx):
images, targets = batch
# targets = targets.view(targets.size(0), -1)
out = self.forward(images)
loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduction='mean')
loss = loss_func(out, targets.unsqueeze(0))
tensorboard_logs = {'loss': loss}
return {'loss': loss, 'log':tensorboard_logs}
def validation_step(self, batch, batch_idx):
images, targets = batch
# targets = targets.view(targets.size(0), -1)
out = self.forward(images)
loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduction='mean')
loss = loss_func(out, targets.unsqueeze(0))
tensorboard_logs = {'loss': loss}
return {'loss': loss, 'log':tensorboard_logs}
def configure_optimizers(self):
optim = torch.optim.Adam(self.parameters(), lr=self.hparams['learning_rate'])
return optim
And this is the training and fit:
train_dataloader = DataLoader(train_data, batch_size=hparams['batch_size'])
val_dataloader = DataLoader(val_data, batch_size=hparams['batch_size'])
trainer = pl.Trainer(
max_epochs=50,
gpus=1 if torch.cuda.is_available() else None
)
pass
trainer.fit(model, train_dataloader, val_dataloader)
These are the sizes of the tensor after each layer:
Input: torch.Size([59, 3, 240, 240])
After Alexnet convs: torch.Size([59, 256, 6, 6])
After 1-conv: torch.Size([59, 3, 6, 6])
After upsampling: torch.Size([59, 3, 240, 240])
I am pretty a beginner with Pytorch and Pytorch Lightning so every advice would be apprreciated!
Can you delete the unsqueeze(0) part here : loss = loss_func(out, targets.unsqueeze(0))

Pytorch Fully Connected Feed Forward Network for a regression problem - Gives same result for all inputs

I build a neural network model in Pytorch for a simple regression problem (w1x1+w2x2+w3x3 = y) where I generated 2000 records for training data with random values for x1,x2,x3 and W1=4, W2=6, W3=2. I created a test dataset of 20 records with just values for x1,x2,x3 and I was hoping to get the result for But, the model returns same value for all 20 input rows. I don't know where the issue is. Below is the code snippet.
inputs = df[['x1', 'x2', 'x3']]
target = df['y']
inputs = torch.tensor(inputs.values).float()
target = torch.tensor(target.values).float()
test_data = torch.tensor(test_data.values).float()
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
hidden1 = 10
hidden2 = 15
self.fc1 = nn.Linear(3,hidden1)
self.fc2 = nn.Linear(hidden1,hidden2)
self.fc3 = nn.Linear(hidden2,1)
def forward(self,x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
#instantiate the model
model = Net()
print(model)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)
model.train()
#epochs
epochs = 500
for x in range(epochs):
#initialize the training loss to 0
train_loss = 0
#clear out gradients
optimizer.zero_grad()
#calculate the output
output = model(inputs)
#calculate loss
loss = criterion(output,target)
#backpropagate
loss.backward()
#update parameters
optimizer.step()
if ((x%5)==0):
print('Training Loss after epoch {:2d} is {:2.6f}'.format(x,loss))
#set the model in evaluation mode
model.eval()
#Test the model on unseen data
test_output = model(test_data)
print(test_output)

LSTM layer returns nan when fed by its own output in PyTorch

I’m trying to generate time-series data with an LSTM and a Mixture Density Network as described in https://arxiv.org/pdf/1308.0850.pdf
Here is a link to my implementation: https://github.com/NeoVand/MDNLSTM
The repository contains a toy dataset to train the network.
On training, the LSTM layer returns nan for its hidden state after one iteration. A similar issue is reported here.
For your convenience, here is the code:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as npr
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
ts = torch.load('LDS_Toy_Data.pt')
def detach(states):
return [state.detach() for state in states]
class MDNLSTM(nn.Module):
def __init__(self, d_obs, d_lat=2, n_gaussians=2, n_layers=1):
super(MDNLSTM, self).__init__()
self.d_obs = d_obs
self.d_lat = d_lat
self.n_gaussians = n_gaussians
self.n_layers = n_layers
self.lstm = nn.LSTM(d_obs, d_lat, n_layers, batch_first=True)
self.fcPi = nn.Linear(d_lat, n_gaussians*d_obs)
self.fcMu = nn.Linear(d_lat, n_gaussians*d_obs)
self.fcSigma = nn.Linear(d_lat, n_gaussians*d_obs)
def get_mixture_coef(self, y):
time_steps = y.size(1)
pi, mu, sigma = self.fcPi(y), self.fcMu(y), self.fcSigma(y)
pi = pi.view(-1, time_steps, self.n_gaussians, self.d_obs)
mu = mu.view(-1, time_steps, self.n_gaussians, self.d_obs)
sigma = sigma.view(-1, time_steps, self.n_gaussians, self.d_obs)
pi = F.softmax(pi, 2)
sigma = torch.exp(sigma)
return pi, mu, sigma
def forward(self, x, h):
y, (h, c) = self.lstm(x, h)
#print(h)
pi, mu, sigma = self.get_mixture_coef(y)
return (pi, mu, sigma), (h, c)
def init_hidden(self, bsz):
return (torch.zeros(self.n_layers, bsz, self.d_lat).to(device),
torch.zeros(self.n_layers, bsz, self.d_lat).to(device))
def mdn_loss_fn(y, pi, mu, sigma):
m = torch.distributions.Normal(loc=mu, scale=sigma)
loss = torch.exp(m.log_prob(y))
loss = torch.sum(loss * pi, dim=2)
loss = -torch.log(loss)
return loss.mean()
def criterion(y, pi, mu, sigma):
y = y.unsqueeze(2)
return mdn_loss_fn(y, pi, mu, sigma)
DOBS = 10
DLAT = 2
INSTS = 100
seqlen = 30
epochs = 200
mdnlstm = MDNLSTM(DOBS, DLAT).to(device)
optimizer = torch.optim.Adam(mdnlstm.parameters())
z = torch.from_numpy(ts[:INSTS,:,:]).float().to(device)
# hiddens=[]
# Train the model
for epoch in range(epochs):
# Set initial hidden and cell states
hidden = mdnlstm.init_hidden(INSTS)
for i in range(0, z.size(1) - seqlen, seqlen):
# Get mini-batch inputs and targets
inputs = z[:, i:i+seqlen, :]
targets = z[:, (i+1):(i+1)+seqlen, :]
hidden = detach(hidden)
# hiddens.append(hidden)
(pi, mu, sigma), hidden = mdnlstm(inputs, hidden)
loss = criterion(targets, pi, mu, sigma)
mdnlstm.zero_grad()
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print ('Epoch [{}/{}], Loss: {:.4f}'
.format(epoch, epochs, loss.item()))
I would appreciate any help on this.
The issue was caused by the log-sum-exp operation not being done in a stable way. Here is an implementation of a weighted log-sum-exp trick that I used and could fix the problem:
def weighted_logsumexp(x,w, dim=None, keepdim=False):
if dim is None:
x, dim = x.view(-1), 0
xm, _ = torch.max(x, dim, keepdim=True)
x = torch.where(
# to prevent nasty nan's
(xm == float('inf')) | (xm == float('-inf')),
xm,
xm + torch.log(torch.sum(torch.exp(x - xm)*w, dim, keepdim=True)))
return x if keepdim else x.squeeze(dim)
and using that implemented the stable loss function:
def mdn_loss_stable(y,pi,mu,sigma):
m = torch.distributions.Normal(loc=mu, scale=sigma)
m_lp_y = m.log_prob(y)
loss = -weighted_logsumexp(m_lp_y,pi,dim=2)
return loss.mean()
This worked like a charm. In general, the problem is that torch won't report under-flows.