pytorch save and load model - deep-learning

Is there any difference between original model and saved then loaded model?
Before training, I just saved model and then loaded because I wanted to know if there is any changes during saving and loading.
Here's my code
just model for test
class test_model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 50, kernel_size = 3, stride=1, padding=1, bias = False)
self.maxpool1 = nn.MaxPool2d(2, 2)
self.bn1 = nn.BatchNorm2d(50)
self.conv2_ = nn.Conv2d(in_channels = 50, out_channels = 10, kernel_size = 1, stride=1, padding=0, bias = False)
self.conv2 = nn.Conv2d(in_channels = 10, out_channels = 50, kernel_size = 3, stride=1, padding=1, bias = False)
self.maxpool2 = nn.MaxPool2d(2, 2)
self.bn2 = nn.BatchNorm2d(50)
self.conv3_ = nn.Conv2d(in_channels = 50, out_channels = 10, kernel_size = 1, stride=1, padding=0, bias = False)
self.conv3 = nn.Conv2d(in_channels = 10, out_channels = 50, kernel_size = 3, stride=1, padding=1, bias = False)
self.maxpool3 = nn.MaxPool2d(2, 2)
self.bn3 = nn.BatchNorm2d(50)
self.conv4_ = nn.Conv2d(in_channels = 50, out_channels = 20, kernel_size = 1, stride=1, padding=0, bias = False)
self.conv4 = nn.Conv2d(in_channels =20, out_channels = 100, kernel_size = 3, stride=1, padding=1, bias = False)
self.maxpool4 = nn.MaxPool2d(2, 2)
self.bn4 = nn.BatchNorm2d(100)
self.conv5_ = nn.Conv2d(in_channels = 100, out_channels = 10, kernel_size = 1, stride=1, padding=0, bias = False)
self.conv5 = nn.Conv2d(in_channels = 10, out_channels = 100, kernel_size = 3, stride=1, padding=1, bias = False)
self.maxpool5 = nn.MaxPool2d(2, 2)
self.bn5 = nn.BatchNorm2d(100)
self.fc = nn.Sequential(Flatten(),
nn.Linear(100*7*7, 100),
nn.ReLU(),
nn.Linear(100,100))
def forward(self, inputs):
feature_map1 = self.conv1(inputs)
feature_map1 = self.maxpool1(feature_map1)
feature_map1 = self.bn1(feature_map1)
feature_map2 = self.conv2_(feature_map1)
feature_map2 = self.conv2(feature_map2)
feature_map2 = self.maxpool2(feature_map2)
feature_map2 = self.bn2(feature_map2)
feature_map3 = self.conv3_(feature_map2)
feature_map3 = self.conv3(feature_map3)
feature_map3 = self.maxpool3(feature_map3)
feature_map3 = self.bn3(feature_map3)
feature_map4 = self.conv4_(feature_map3)
feature_map4 = self.conv4(feature_map4)
feature_map4 = self.maxpool4(feature_map4)
feature_map4 = self.bn4(feature_map4)
feature_map5 = self.conv5_(feature_map4)
feature_map5 = self.conv5(feature_map5)
feature_map5 = self.maxpool5(feature_map5)
feature_map5 = self.bn5(feature_map5)
output = self.fc(feature_map5)
return output
then
model_cpu = test_model()
save and load
torch.save(model_cpu, '/home/mskang/hyeokjong/model_cpu.pt')
model_load = torch.load('/home/mskang/hyeokjong/model_cpu.pt')
and
model_load == model_cpu
------------------------------------
False
However
print(model_load)
print(model_cpu)
are seemed same
furthermore
I also trained two models(model_load, model_cpu) and results looks same too.
So I think those tow models are same and should be same
But why False

They have the same underlying model but are different Python objects. That is why __eq__ returns False when trying model_load == model_cpu. You can see model_load and model_cpu as two copies of the same nn.Module.

Related

training and validation losses decreasing slowly

i have implemented 2DCNN model followed by GRU layer
class CNN2D(nn.Module):
def __init__(self, img_x=88, img_y=88, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=512,num_classes=9):
super(CNN2D, self).__init__()
self.img_x = img_x
self.img_y = img_y
self.CNN_embed_dim = CNN_embed_dim
self.ch1, self.ch2, self.ch3, self.ch4 = 8, 16, 32, 64
self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)
self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)
self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)
self.conv1_outshape = conv2D_output_size((self.img_x, self.img_y), self.pd1, self.k1, self.s1) # Conv1 output shape
self.conv2_outshape = conv2D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
self.conv3_outshape = conv2D_output_size(self.conv2_outshape, self.pd3, self.k3, self.s3)
self.conv4_outshape = conv2D_output_size(self.conv3_outshape, self.pd4, self.k4, self.s4)
# fully connected layer hidden nodes
self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
self.drop_p = drop_p
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.pd1),
nn.BatchNorm2d(self.ch1, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2, padding=self.pd2),
nn.BatchNorm2d(self.ch2, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3, padding=self.pd3),
nn.BatchNorm2d(self.ch3, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4, padding=self.pd4),
nn.BatchNorm2d(self.ch4, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.drop = nn.Dropout2d(self.drop_p)
self.pool = nn.MaxPool2d(2)
#self.fc1 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.fc_hidden1) # fully connected layer, output k classes
#self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
self.fc3 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.CNN_embed_dim) # output = CNN embedding latent variables
self.num_classes = num_classes
self.gru = nn.GRU(
input_size=self.CNN_embed_dim,
hidden_size=256,
num_layers=1,
batch_first=True,(batch, time_step, input_size)
)
#self.gfc1 = nn.Linear(256, 128)
self.gfc2 = nn.Linear(256, self.num_classes)
def forward(self, x_3d):
cnn_embed_seq = []
for t in range(x_3d.size(2)):
# CNNs
x = self.conv1(x_3d[:, :, t, :, :])
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = x.reshape(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = F.relu(self.fc2(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = self.fc3(x)
cnn_embed_seq.append(x)
cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
RNN_out, _ = self.gru(cnn_embed_seq, None)
x = RNN_out[:, -1, :]
x = F.relu(x)
x = F.dropout(x, p=self.drop_p, training=self.training) NEW UPDATE
x = self.gfc2(x)
return x
inputs are videos of shape [batch,channels,frames,height,width]
i used adam optimizer with lr=1e-5 ,weight_decay=5e-5 ,amsgrad=True and cross entropy loss
training and validation losses are decreasing slowly and model is not converging
what should i change ?

RuntimeError: Expected 4-dimensional input for 4-dimensional weight [1024, 64, 3, 3], but got input of size [32, 10] instead

This line works fine
self.conv = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
I introduced ResNet18
self.conv = ResNet18()
**ResNet Class**
'''ResNet in PyTorch.
For Pre-activation ResNet, see 'preact_resnet.py'.
Reference:
[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
Deep Residual Learning for Image Recognition. arXiv:1512.03385
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_planes, planes, stride=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion *
planes, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64
self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNet18():
return ResNet(BasicBlock, [2, 2, 2, 2])
def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
def ResNet50():
return ResNet(Bottleneck, [3, 4, 6, 3])
def ResNet101():
return ResNet(Bottleneck, [3, 4, 23, 3])
def ResNet152():
return ResNet(Bottleneck, [3, 8, 36, 3])
def test():
net = ResNet18()
y = net(torch.randn(1, 3, 3, 64))
print(y.size())
# test()
The problem is that resnet18 (and most other classification netwroks) are not fully convolutional networks. The last layer of the net is actually a nn.Linear layer and therefore you cannot simply replace a convolutional block/layer with a resnet.

U-Net with Pre-Trained ResNet throws dimension error must match

I have an RGB image of mask for Segmentation of dimensions 900x600 (width, height)
My U-Net code is the ff. I do not really want to resize the output too much it is fine if it is resized without losing much of the aspect ratio.
import torch
import torch.nn as nn
from torchvision import models
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True)
)
class ResNetUNet(nn.Module):
def __init__(self, n_class=1):
super().__init__()
self.base_model = models.resnet18(pretrained=True)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
self.layer0_1x1 = convrelu(64, 64, 1, 0)
self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
self.layer1_1x1 = convrelu(64, 64, 1, 0)
self.layer2 = self.base_layers[5] # size=(N, 128, x.H/8, x.W/8)
self.layer2_1x1 = convrelu(128, 128, 1, 0)
self.layer3 = self.base_layers[6] # size=(N, 256, x.H/16, x.W/16)
self.layer3_1x1 = convrelu(256, 256, 1, 0)
self.layer4 = self.base_layers[7] # size=(N, 512, x.H/32, x.W/32)
self.layer4_1x1 = convrelu(512, 512, 1, 0)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
self.conv_original_size0 = convrelu(3, 64, 3, 1)
self.conv_original_size1 = convrelu(64, 64, 3, 1)
self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
self.conv_last = nn.Conv2d(64, n_class, 1)
def forward(self, input):
x_original = self.conv_original_size0(input)
x_original = self.conv_original_size1(x_original)
layer0 = self.layer0(input)
layer1 = self.layer1(layer0)
layer2 = self.layer2(layer1)
layer3 = self.layer3(layer2)
layer4 = self.layer4(layer3)
layer4 = self.layer4_1x1(layer4)
x = self.upsample(layer4)
layer3 = self.layer3_1x1(layer3)
x = torch.cat([x, layer3], dim=1)
x = self.conv_up3(x)
x = self.upsample(x)
layer2 = self.layer2_1x1(layer2)
x = torch.cat([x, layer2], dim=1)
x = self.conv_up2(x)
x = self.upsample(x)
layer1 = self.layer1_1x1(layer1)
x = torch.cat([x, layer1], dim=1)
x = self.conv_up1(x)
x = self.upsample(x)
layer0 = self.layer0_1x1(layer0)
x = torch.cat([x, layer0], dim=1)
x = self.conv_up0(x)
x = self.upsample(x)
x = torch.cat([x, x_original], dim=1)
x = self.conv_original_size2(x)
out = self.conv_last(x)
return out
for this command
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet()
model = model.to(device)
# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 600, 900))
it throws the error:
54 x = self.upsample(layer4)
55 layer3 = self.layer3_1x1(layer3)
---> 56 x = torch.cat([x, layer3], dim=1)
57 x = self.conv_up3(x)
58
RuntimeError: Sizes of tensors must match except in dimension 3. Got 57 and 58
Not sure what to do here. Could someone help me how to solve this?
Try this. You just need to match tensor shapes before torch.cat.
import torch
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F
def match_shapes(x, y):
if x.shape[-2:] != y.shape[-2:]:
x = F.interpolate(x, y.shape[-2:], mode='nearest')
return x
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True)
)
class ResNetUNet(nn.Module):
def __init__(self, n_class=1):
super().__init__()
self.base_model = models.resnet18(pretrained=True)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
self.layer0_1x1 = convrelu(64, 64, 1, 0)
self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
self.layer1_1x1 = convrelu(64, 64, 1, 0)
self.layer2 = self.base_layers[5] # size=(N, 128, x.H/8, x.W/8)
self.layer2_1x1 = convrelu(128, 128, 1, 0)
self.layer3 = self.base_layers[6] # size=(N, 256, x.H/16, x.W/16)
self.layer3_1x1 = convrelu(256, 256, 1, 0)
self.layer4 = self.base_layers[7] # size=(N, 512, x.H/32, x.W/32)
self.layer4_1x1 = convrelu(512, 512, 1, 0)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
self.conv_original_size0 = convrelu(3, 64, 3, 1)
self.conv_original_size1 = convrelu(64, 64, 3, 1)
self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
self.conv_last = nn.Conv2d(64, n_class, 1)
def forward(self, input):
x_original = self.conv_original_size0(input)
x_original = self.conv_original_size1(x_original)
layer0 = self.layer0(input)
layer1 = self.layer1(layer0)
layer2 = self.layer2(layer1)
layer3 = self.layer3(layer2)
layer4 = self.layer4(layer3)
layer4 = self.layer4_1x1(layer4)
x = self.upsample(layer4)
layer3 = self.layer3_1x1(layer3)
x = match_shapes(x, layer3)
x = torch.cat([x, layer3], dim=1)
x = self.conv_up3(x)
x = self.upsample(x)
layer2 = self.layer2_1x1(layer2)
x = match_shapes(x, layer2)
x = torch.cat([x, layer2], dim=1)
x = self.conv_up2(x)
x = self.upsample(x)
layer1 = self.layer1_1x1(layer1)
x = match_shapes(x, layer1)
x = torch.cat([x, layer1], dim=1)
x = self.conv_up1(x)
x = self.upsample(x)
layer0 = self.layer0_1x1(layer0)
x = torch.cat([x, layer0], dim=1)
x = self.conv_up0(x)
x = self.upsample(x)
x = torch.cat([x, x_original], dim=1)
x = self.conv_original_size2(x)
out = self.conv_last(x)
return out
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet()
model = model.to(device)
# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 600, 900))

Got 512 channels instead of 64 - what should I change in my Autoencoder?

ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
def __init__(self):
super(MyNetwork, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.fc1 = nn.Linear(ndf*4*8*16,z_size)
self.fc2 = nn.Linear(ndf*4*8*16,z_size)
self.d1 = nn.Linear(z_size, ndf*4*8*8)
self.z_size = z_size
self.d_max = ndf *4
def encode(self, x):
x = self.encoder(x)
x = x.view(x.shape[0], -1)
mu = self.fc1(x)
log_var = self.fc2(x)
return mu, log_var
def decode(self,x):
x = x.view(x.shape[0], self.z_size)
x = self.d1(x)
x = x.view(x.shape[0], self.d_max, 8,8)
x = self.decoder(x)
return x
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
def forward(self, x):
mu, log_var = self.encode(x)
mu = mu.squeeze()
log_var = log_var.squeeze()
z = self.reparameterize(mu, log_var)
return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var
I have adapted code from a tutorial and I'm currently getting the error 'Given transposed=1, weight of size 64 16 3 3, expected input[16, 512, 8, 8] to have 64 channels, but got 512 channels instead' when attempting to run my auto-encoder.
Could someone please explain how I should further adapt this code, using the CIFAR10 dataset, which a batch size of 16.
Skimmed through your code and found that input and output dimension was not proper.
Assuming both the input and output array shape as 32x32x3 I formatted this code.
ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
def __init__(self):
super(MyNetwork, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.fc1 = nn.Linear(ndf*4*8*16,z_size)
self.fc2 = nn.Linear(ndf*4*8*16,z_size)
self.d1 = nn.Linear(z_size, ndf*4*8*16)
self.z_size = z_size
self.d_max = ndf *4
def encode(self, x):
x = self.encoder(x)
x = x.view(x.shape[0], -1)
mu = self.fc1(x)
log_var = self.fc2(x)
return mu, log_var
def decode(self,x):
x = x.view(x.shape[0], self.z_size)
x = self.d1(x)
x = x.view(x.shape[0], 64, 32, 32)
x = self.decoder(x)
return x
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
def forward(self, x):
mu, log_var = self.encode(x)
mu = mu.squeeze()
log_var = log_var.squeeze()
z = self.reparameterize(mu, log_var)
return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var
Hope this code works :)
The input to the decoder (x = self.decoder(x) in forward function) should have 64 channels as defined by nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1).
In order to do so, you can do the following change:
Change self.d1 = nn.Linear(z_size, ndf*4*8*8) to self.d1 = nn.Linear(z_size, ndf*4*8*16).
Change from x = x.view(x.shape[0], self.d_max, 8,8) in decoder method to x = x.view(x.shape[0], 64, 32, 32)
Use the print statements to analyze the shape of input tensor at different layers:
ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
def __init__(self):
super(MyNetwork, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.fc1 = nn.Linear(ndf*4*8*16,z_size)
self.fc2 = nn.Linear(ndf*4*8*16,z_size)
self.d1 = nn.Linear(z_size, ndf*4*8*16)
self.z_size = z_size
self.d_max = ndf *4
def encode(self, x):
print('encoder')
print(x.shape)
x = self.encoder(x)
print(x.shape)
x = x.view(x.shape[0], -1)
print(x.shape)
mu = self.fc1(x)
print(mu.shape)
log_var = self.fc2(x)
print(log_var.shape)
return mu, log_var
def decode(self,x):
print('decoder')
print(x.shape)
x = x.view(x.shape[0], self.z_size)
print(x.shape)
x = self.d1(x)
print(x.shape)
x = x.view(x.shape[0], 64, 32, 32)
print(x.shape)
x = self.decoder(x)
print(x.shape)
return x
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
def forward(self, x):
mu, log_var = self.encode(x)
mu = mu.squeeze()
log_var = log_var.squeeze()
z = self.reparameterize(mu, log_var)
return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var

PyTorch runtime error : invalid argument 0: Sizes of tensors must match except in dimension 1

I have a PyTorch model and I'm trying to test it by performing a forward pass. Here is the code:
class ResBlock(nn.Module):
def __init__(self, inplanes, planes, stride=1):
super(ResBlock, self).__init__()
self.conv1x1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
#batch normalization
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.stride = stride
def forward(self, x):
residual = self.conv1x1(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
#adding the skip connection
out += residual
out = self.relu(out)
return out
class ResUnet (nn.Module):
def __init__(self, in_shape, num_classes):
super(ResUnet, self).__init__()
in_channels, height, width = in_shape
#
#self.L1 = IncResBlock(in_channels,64)
self.e1 = nn.Sequential(
nn.Conv2d(in_channels, 64, kernel_size=4, stride=2,padding=1),
ResBlock(64,64))
self.e2 = nn.Sequential(
nn.LeakyReLU(0.2,),
nn.Conv2d(64, 128, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(128),
ResBlock(128,128))
#
self.e2add = nn.Sequential(
nn.Conv2d(128, 128, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(128))
#
##
self.e3 = nn.Sequential(
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(128, 128, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2,),
nn.Conv2d(128,256, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(256),
ResBlock(256,256))
self.e4 = nn.Sequential(
nn.LeakyReLU(0.2,),
nn.Conv2d(256,512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
ResBlock(512,512))
#
self.e4add = nn.Sequential(
nn.Conv2d(512,512, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(512))
#
self.e5 = nn.Sequential(
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(512,512, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2,),
nn.Conv2d(512,512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
ResBlock(512,512))
#
#
self.e6 = nn.Sequential(
nn.LeakyReLU(0.2,),
nn.Conv2d(512,512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
ResBlock(512,512))
#
self.e6add = nn.Sequential(
nn.Conv2d(512,512, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(512))
#
self.e7 = nn.Sequential(
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(512,512, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2,),
nn.Conv2d(512,512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
ResBlock(512,512))
#
self.e8 = nn.Sequential(
nn.LeakyReLU(0.2,),
nn.Conv2d(512,512, kernel_size=4, stride=2,padding=1))
#nn.BatchNorm2d(512))
self.d1 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(512, 512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
nn.Dropout(p=0.5),
ResBlock(512,512))
#
self.d2 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(1024, 512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
nn.Dropout(p=0.5),
ResBlock(512,512))
#
self.d3 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(1024, 512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
nn.Dropout(p=0.5),
ResBlock(512,512))
#
self.d4 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(1024, 512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
ResBlock(512,512))
#
self.d5 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(1024, 256, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(256),
ResBlock(256,256))
#
self.d6 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(512, 128, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(128),
ResBlock(128,128))
#
self.d7 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(256, 64, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(64),
ResBlock(64,64))
#
self.d8 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2,padding=1))
#nn.BatchNorm2d(64),
#nn.ReLU())
self.out_l = nn.Sequential(
nn.Conv2d(64,num_classes,kernel_size=1,stride=1))
#nn.ReLU())
def forward(self, x):
#Image Encoder
#### Encoder #####
en1 = self.e1(x)
en2 = self.e2(en1)
en2add = self.e2add(en2)
en3 = self.e3(en2add)
en4 = self.e4(en3)
en4add = self.e4add(en4)
en5 = self.e5(en4add)
en6 = self.e6(en5)
en6add = self.e6add(en6)
en7 = self.e7(en6add)
en8 = self.e8(en7)
#### Decoder ####
de1_ = self.d1(en8)
de1 = torch.cat([en7,de1_],1)
de2_ = self.d2(de1)
de2 = torch.cat([en6add,de2_],1)
de3_ = self.d3(de2)
de3 = torch.cat([en5,de3_],1)
de4_ = self.d4(de3)
de4 = torch.cat([en4add,de4_],1)
de5_ = self.d5(de4)
de5 = torch.cat([en3,de5_],1)
de6_ = self.d6(de5)
de6 = torch.cat([en2add,de6_],1)
de7_ = self.d7(de6)
de7 = torch.cat([en1,de7_],1)
de8 = self.d8(de7)
out_l_mask = self.out_l(de8)
return out_l_mask
Here is how I attempt to test it:
modl = ResUnet((1,512,512), 1)
x = torch.rand(1, 1, 512, 512)
modl(x)
This works fine, as does for any size that are multiples of 64.
If I try:
modl = ResUnet((1,320,320), 1)
x = torch.rand(1, 1, 320, 320)
modl(x)
It throws an error
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-46-4ddc821c365b> in <module>
----> 1 modl(x)
~/.conda/envs/torch0.4/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
--> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)
<ipython-input-36-f9eeefa3c0b8> in forward(self, x)
221 de2_ = self.d2(de1)
222 #print de2_.size()
--> 223 de2 = torch.cat([en6add,de2_],1)
224 #print de2.size()
225
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 1. Got 5 and 4 in dimension 2 at /opt/conda/conda-bld/pytorch_1535491974311/work/aten/src/TH/generic/THTensorMath.cpp:3616
I figure the problem is caused by the input size not being a power of 2 but I am not sure how to rectify it for the given input dimenstions (320, 320).
This issue arises from mismatch in size between the variables in the downsampling (encoder) path and the upsampling (decoder) path. Your code is huge and difficult to understand, but by inserting print statements, we can check that
en6add is of size [1, 512, 5, 5]
en7 is [1, 512, 2, 2]
en8 is [1, 512, 1, 1]
then upsampling goes as powers of two: de1_ is [1, 512, 2, 2]
de1 [1, 1024, 2, 2]
de2_ [1, 512, 4, 4]
at which point you try to concatenate it with en6add, so apparently the code creating de2_ is not "upsampling enough". My strong guess is that you need to pay the attention to the output_padding parameter of nn.ConvTranspose2d and possibly set it to 1 in a couple of places. I would try and fix this error for you, but that example is so far from being minimal that I can't wrap my head around the whole of it.