U-Net with Pre-Trained ResNet throws dimension error must match - deep-learning

I have an RGB image of mask for Segmentation of dimensions 900x600 (width, height)
My U-Net code is the ff. I do not really want to resize the output too much it is fine if it is resized without losing much of the aspect ratio.
import torch
import torch.nn as nn
from torchvision import models
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True)
)
class ResNetUNet(nn.Module):
def __init__(self, n_class=1):
super().__init__()
self.base_model = models.resnet18(pretrained=True)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
self.layer0_1x1 = convrelu(64, 64, 1, 0)
self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
self.layer1_1x1 = convrelu(64, 64, 1, 0)
self.layer2 = self.base_layers[5] # size=(N, 128, x.H/8, x.W/8)
self.layer2_1x1 = convrelu(128, 128, 1, 0)
self.layer3 = self.base_layers[6] # size=(N, 256, x.H/16, x.W/16)
self.layer3_1x1 = convrelu(256, 256, 1, 0)
self.layer4 = self.base_layers[7] # size=(N, 512, x.H/32, x.W/32)
self.layer4_1x1 = convrelu(512, 512, 1, 0)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
self.conv_original_size0 = convrelu(3, 64, 3, 1)
self.conv_original_size1 = convrelu(64, 64, 3, 1)
self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
self.conv_last = nn.Conv2d(64, n_class, 1)
def forward(self, input):
x_original = self.conv_original_size0(input)
x_original = self.conv_original_size1(x_original)
layer0 = self.layer0(input)
layer1 = self.layer1(layer0)
layer2 = self.layer2(layer1)
layer3 = self.layer3(layer2)
layer4 = self.layer4(layer3)
layer4 = self.layer4_1x1(layer4)
x = self.upsample(layer4)
layer3 = self.layer3_1x1(layer3)
x = torch.cat([x, layer3], dim=1)
x = self.conv_up3(x)
x = self.upsample(x)
layer2 = self.layer2_1x1(layer2)
x = torch.cat([x, layer2], dim=1)
x = self.conv_up2(x)
x = self.upsample(x)
layer1 = self.layer1_1x1(layer1)
x = torch.cat([x, layer1], dim=1)
x = self.conv_up1(x)
x = self.upsample(x)
layer0 = self.layer0_1x1(layer0)
x = torch.cat([x, layer0], dim=1)
x = self.conv_up0(x)
x = self.upsample(x)
x = torch.cat([x, x_original], dim=1)
x = self.conv_original_size2(x)
out = self.conv_last(x)
return out
for this command
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet()
model = model.to(device)
# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 600, 900))
it throws the error:
54 x = self.upsample(layer4)
55 layer3 = self.layer3_1x1(layer3)
---> 56 x = torch.cat([x, layer3], dim=1)
57 x = self.conv_up3(x)
58
RuntimeError: Sizes of tensors must match except in dimension 3. Got 57 and 58
Not sure what to do here. Could someone help me how to solve this?

Try this. You just need to match tensor shapes before torch.cat.
import torch
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F
def match_shapes(x, y):
if x.shape[-2:] != y.shape[-2:]:
x = F.interpolate(x, y.shape[-2:], mode='nearest')
return x
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True)
)
class ResNetUNet(nn.Module):
def __init__(self, n_class=1):
super().__init__()
self.base_model = models.resnet18(pretrained=True)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
self.layer0_1x1 = convrelu(64, 64, 1, 0)
self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
self.layer1_1x1 = convrelu(64, 64, 1, 0)
self.layer2 = self.base_layers[5] # size=(N, 128, x.H/8, x.W/8)
self.layer2_1x1 = convrelu(128, 128, 1, 0)
self.layer3 = self.base_layers[6] # size=(N, 256, x.H/16, x.W/16)
self.layer3_1x1 = convrelu(256, 256, 1, 0)
self.layer4 = self.base_layers[7] # size=(N, 512, x.H/32, x.W/32)
self.layer4_1x1 = convrelu(512, 512, 1, 0)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
self.conv_original_size0 = convrelu(3, 64, 3, 1)
self.conv_original_size1 = convrelu(64, 64, 3, 1)
self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
self.conv_last = nn.Conv2d(64, n_class, 1)
def forward(self, input):
x_original = self.conv_original_size0(input)
x_original = self.conv_original_size1(x_original)
layer0 = self.layer0(input)
layer1 = self.layer1(layer0)
layer2 = self.layer2(layer1)
layer3 = self.layer3(layer2)
layer4 = self.layer4(layer3)
layer4 = self.layer4_1x1(layer4)
x = self.upsample(layer4)
layer3 = self.layer3_1x1(layer3)
x = match_shapes(x, layer3)
x = torch.cat([x, layer3], dim=1)
x = self.conv_up3(x)
x = self.upsample(x)
layer2 = self.layer2_1x1(layer2)
x = match_shapes(x, layer2)
x = torch.cat([x, layer2], dim=1)
x = self.conv_up2(x)
x = self.upsample(x)
layer1 = self.layer1_1x1(layer1)
x = match_shapes(x, layer1)
x = torch.cat([x, layer1], dim=1)
x = self.conv_up1(x)
x = self.upsample(x)
layer0 = self.layer0_1x1(layer0)
x = torch.cat([x, layer0], dim=1)
x = self.conv_up0(x)
x = self.upsample(x)
x = torch.cat([x, x_original], dim=1)
x = self.conv_original_size2(x)
out = self.conv_last(x)
return out
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet()
model = model.to(device)
# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 600, 900))

Related

training and validation losses decreasing slowly

i have implemented 2DCNN model followed by GRU layer
class CNN2D(nn.Module):
def __init__(self, img_x=88, img_y=88, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=512,num_classes=9):
super(CNN2D, self).__init__()
self.img_x = img_x
self.img_y = img_y
self.CNN_embed_dim = CNN_embed_dim
self.ch1, self.ch2, self.ch3, self.ch4 = 8, 16, 32, 64
self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)
self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)
self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)
self.conv1_outshape = conv2D_output_size((self.img_x, self.img_y), self.pd1, self.k1, self.s1) # Conv1 output shape
self.conv2_outshape = conv2D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
self.conv3_outshape = conv2D_output_size(self.conv2_outshape, self.pd3, self.k3, self.s3)
self.conv4_outshape = conv2D_output_size(self.conv3_outshape, self.pd4, self.k4, self.s4)
# fully connected layer hidden nodes
self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
self.drop_p = drop_p
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.pd1),
nn.BatchNorm2d(self.ch1, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2, padding=self.pd2),
nn.BatchNorm2d(self.ch2, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3, padding=self.pd3),
nn.BatchNorm2d(self.ch3, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4, padding=self.pd4),
nn.BatchNorm2d(self.ch4, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.drop = nn.Dropout2d(self.drop_p)
self.pool = nn.MaxPool2d(2)
#self.fc1 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.fc_hidden1) # fully connected layer, output k classes
#self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
self.fc3 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.CNN_embed_dim) # output = CNN embedding latent variables
self.num_classes = num_classes
self.gru = nn.GRU(
input_size=self.CNN_embed_dim,
hidden_size=256,
num_layers=1,
batch_first=True,(batch, time_step, input_size)
)
#self.gfc1 = nn.Linear(256, 128)
self.gfc2 = nn.Linear(256, self.num_classes)
def forward(self, x_3d):
cnn_embed_seq = []
for t in range(x_3d.size(2)):
# CNNs
x = self.conv1(x_3d[:, :, t, :, :])
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = x.reshape(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = F.relu(self.fc2(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = self.fc3(x)
cnn_embed_seq.append(x)
cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
RNN_out, _ = self.gru(cnn_embed_seq, None)
x = RNN_out[:, -1, :]
x = F.relu(x)
x = F.dropout(x, p=self.drop_p, training=self.training) NEW UPDATE
x = self.gfc2(x)
return x
inputs are videos of shape [batch,channels,frames,height,width]
i used adam optimizer with lr=1e-5 ,weight_decay=5e-5 ,amsgrad=True and cross entropy loss
training and validation losses are decreasing slowly and model is not converging
what should i change ?

Keras Tuner on autoencoder - Add condition : first hidden layer units greater than or equal next hidden layer units

I want to use Keras-tuner to tune an autoencoder hyperparameters.
It is a symetric AE with two layers. I want the number of units in the first layer always greater than or equal the units in the second layer. But I don't know how implement it with keras-tuner. If someone can help, it would be very great. Thank you in advance.
class DAE(tf.keras.Model):
'''
A DAE model
'''
def __init__(self, hp, **kwargs):
'''
DAE instantiation
args :
hp : Tuner
input_dim : input dimension
return:
None
'''
super(DAE, self).__init__(**kwargs)
input_dim = 15
latent_dim = hp.Choice("latent_space", [2,4,8])
units_0 = hp.Choice("units_0", [8, 16, 32, 64])
units_1 = hp.Choice("units_1", [8, 16, 32, 64])
for i in [8, 16, 32, 64]:
with hp.conditional_scope("units_0", [i]):
if units_0 == i:
......? # units_1 should be <= i
dropout = hp.Choice("dropout_rate", [0.1, 0.2, 0.3, 0.4, 0.5])
inputs = tf.keras.Input(shape = (input_dim,))
x = layers.Dense(units_0, activation="relu")(inputs)
x = layers.Dropout(dropout)(x)
x = layers.Dense(units_1, activation="relu")(x)
x = layers.Dropout(dropout)(x)
z = layers.Dense(latent_dim)(x)
self.encoder = tf.keras.Model(inputs, z, name="encoder")
inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(units_1, activation="relu")(inputs)
x = layers.Dropout(dropout)(x)
x = layers.Dense(units_0, activation="relu")(x)
x = layers.Dropout(dropout)(x)
outputs = layers.Dense(input_dim, activation="linear")(x)
self.decoder = tf.keras.Model(inputs, outputs, name="decoder")```
See above my code. It's a denoising autoencoder class
I found the solution. We need to create differents units_1 for for each units_O values
class DAE(tf.keras.Model):
'''
A DAE model
'''
def __init__(self, hp, training=None, **kwargs):
'''
DAE instantiation
args :
hp : Tuner
input_dim : input dimension
return:
None
'''
super(DAE, self).__init__(**kwargs)
self.input_dim = 15
l_units = [16, 32, 64, 128]
latent_dim = hp.Choice("latent_space", [2,4,8])
units_0 = hp.Choice("units_0", l_units)
dropout_0 = hp.Choice("dropout_rate_0", [0.1, 0.2, 0.3, 0.4, 0.5])
dropout_1 = hp.Choice("dropout_rate_1", [0.1, 0.2, 0.3, 0.4, 0.5])
for i in l_units:
name = "units_1_%d" % i # generates unique name for each hp.Int object
with hp.conditional_scope("units_0", [i]):
if units_0 == i:
locals()[name] = hp.Int(name, min_value = 8, max_value = i, step = 2, sampling = "log" )
inputs = tf.keras.Input(shape = (self.input_dim,))
x = layers.Dense(units_0, activation="relu")(inputs)
x = layers.Dropout(dropout_0)(x, training=training)
x = layers.Dense(locals()[name], activation="relu")(x)
x = layers.Dropout(dropout_1)(x, training=training)
z = layers.Dense(latent_dim)(x)
self.encoder = tf.keras.Model(inputs, z, name="encoder")
inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(locals()[name], activation="relu")(inputs)
x = layers.Dropout(dropout_1)(x, training=training)
x = layers.Dense(units_0, activation="relu")(x)
x = layers.Dropout(dropout_0)(x, training=training)
outputs = layers.Dense(self.input_dim, activation="linear")(x)
self.decoder = tf.keras.Model(inputs, outputs, name="decoder")

Autoencoder hits invisble wall

I have a problem. I am currently trying to train an autoencoder using Stock data. The data has been MinMax scaled.
The model seems to have an issue tranforming values lower than 0.15 and higher than 0.8. It looks like there is an invisible barrier. Not sure how to call this. How would you call it?
This is the models output vs original: Output vs Original
My model uses the tanh activation function and consists of Linear Layers. The laten_dim is the same as the input layer. I planned to reduce it later down the line:
class SparseEncoder(nn.Module):
def __init__(self, input_shape: int, latent_dims, dtype=torch.float64):
super().__init__()
self.linear1 = nn.Linear(input_shape, 512, dtype=dtype)
self.linear2 = nn.Linear(512, 256, dtype=dtype)
self.linear3 = nn.Linear(256, 128, dtype=dtype)
self.linear4 = nn.Linear(128, 64, dtype=dtype)
self.linear5 = nn.Linear(64, 32, dtype=dtype)
self.linear6 = nn.Linear(32, 16, dtype=dtype)
self.linear7 = nn.Linear(16, 8, dtype=dtype)
self.linear8 = nn.Linear(8, latent_dims, dtype=dtype)
def forward(self, x):
# y = torch.flatten(x, start_dim=1)
z = torch.tanh(self.linear1(x))
z = torch.tanh(self.linear2(z))
z = torch.tanh(self.linear3(z))
z = torch.tanh(self.linear4(z))
z = torch.tanh(self.linear5(z))
z = torch.tanh(self.linear6(z))
z = torch.tanh(self.linear7(z))
z = torch.tanh(self.linear8(z))
return z
class SparseDecoder(nn.Module):
def __init__(self, input_shape: int, latent_dims, dtype=torch.float64):
super().__init__()
self.linear1 = nn.Linear(latent_dims, 8, dtype=dtype)
self.linear2 = nn.Linear(8, 16, dtype=dtype)
self.linear3 = nn.Linear(16, 32, dtype=dtype)
self.linear4 = nn.Linear(32, 64, dtype=dtype)
self.linear5 = nn.Linear(64, 128, dtype=dtype)
self.linear6 = nn.Linear(128, 256, dtype=dtype)
self.linear7 = nn.Linear(256, 512, dtype=dtype)
self.linear8 = nn.Linear(512, input_shape, dtype=dtype)
def forward(self, x):
# y = torch.flatten(x, start_dim=1)
z = torch.tanh(self.linear1(x))
z = torch.tanh(self.linear2(z))
z = torch.tanh(self.linear3(z))
z = torch.tanh(self.linear4(z))
z = torch.tanh(self.linear5(z))
z = torch.tanh(self.linear6(z))
z = torch.tanh(self.linear7(z))
z = torch.tanh(self.linear8(z))
return z
Do you have any hints that could help me?
I tried to autoencode Stock market data and expected to see a 1:1 to the output since the latent space is still the same.

Trying to understad why my custom ResNetv50 gives worst performance that Transfer learning (without weight) performs better?

I am doing a deep learning project (binary classification) with a ResNet on small datasets (896 total images). I have tried several models where ResNet gives me the best performance even though the model sufferers from exploding gradients with SGD optimizers (Adam converges faster but fluctuates much more). (Code source)
But the model performs better when I try ResNetv50 using transfer learning without initializing any weights (weights = None).
To my understanding, both models should perform similarly, but due to less coding experience, I failed to understand I got a different result.
def identity_block(input_tensor, kernel_size, filters, stage, block):
filters1, filters2, filters3 = filters
bn_axis = 3
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(filters1, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2a')(input_tensor)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters2, kernel_size,
padding='same',
kernel_initializer='he_normal',
name=conv_name_base + '2b')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters3, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2c')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
x = layers.add([x, input_tensor])
x = layers.Activation('relu')(x)
return x
def conv_block(input_tensor,
kernel_size,
filters,
stage,
block,
strides=(2, 2)):
filters1, filters2, filters3 = filters
bn_axis = 3
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(filters1, (1, 1), strides=strides,
kernel_initializer='he_normal',
name=conv_name_base + '2a')(input_tensor)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters2, kernel_size, padding='same',
kernel_initializer='he_normal',
name=conv_name_base + '2b')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters3, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2c')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
shortcut = layers.Conv2D(filters3, (1, 1), strides=strides,
kernel_initializer='he_normal',
name=conv_name_base + '1')(input_tensor)
shortcut = layers.BatchNormalization(
axis=bn_axis, name=bn_name_base + '1')(shortcut)
x = layers.add([x, shortcut])
x = layers.Activation('relu')(x)
return x
def ResNet50(input_shape, classes):
bn_axis = 3
img_input = Input(input_shape)
x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
x = layers.Conv2D(64, (7, 7),
strides=(2, 2),
padding='valid',
kernel_initializer='he_normal',
name='conv1')(img_input)
x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
x = layers.Activation('relu')(x)
x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(classes, activation='sigmoid', name='fc')(x)
# Create model.
model = models.Model(inputs = img_input, outputs = x, name='resnet50')
return model
model_resnet = ResNet50(input_shape = (3, 256, 256), classes = 1)
# compile model
model_resnet.compile(loss='binary_crossentropy',
optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0001),
metrics=['accuracy', 'Recall', 'Precision'])
# make directory for logs
logdir = os.path.join('logs', model_name)
#os.mkdir(logdir)
from math import floor
N_FLODS = 5
INIT_LR = 1e-4 # 0.001
T_BS = 16
V_BS = 16
decay_rate = 0.95
decay_step = 1
# early stopping
cp = EarlyStopping(monitor ='val_loss', mode = 'min', verbose = 2, patience = PATIENCE, restore_best_weights=True)
mc = ModelCheckpoint(model_name, monitor = 'val_loss', mode = 'min', verbose = 2, save_best_only = True)
tsb = TensorBoard(log_dir=logdir)
lrs = LearningRateScheduler(lambda epoch : INIT_LR * pow(decay_rate, floor(epoch / decay_step)))
# training
start = timer()
# Fit the model
history_resnet= model_resnet.fit(train_g1,
epochs=1000,
steps_per_epoch=len(train_g),
validation_data=val_g,
validation_steps=len(val_g),
callbacks= [cp, mc, tsb])
end = timer()
elapsed = end - start
print('Total Time Elapsed: ', int(elapsed//60), ' minutes ', (round(elapsed%60)), ' seconds')

pytorch save and load model

Is there any difference between original model and saved then loaded model?
Before training, I just saved model and then loaded because I wanted to know if there is any changes during saving and loading.
Here's my code
just model for test
class test_model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 50, kernel_size = 3, stride=1, padding=1, bias = False)
self.maxpool1 = nn.MaxPool2d(2, 2)
self.bn1 = nn.BatchNorm2d(50)
self.conv2_ = nn.Conv2d(in_channels = 50, out_channels = 10, kernel_size = 1, stride=1, padding=0, bias = False)
self.conv2 = nn.Conv2d(in_channels = 10, out_channels = 50, kernel_size = 3, stride=1, padding=1, bias = False)
self.maxpool2 = nn.MaxPool2d(2, 2)
self.bn2 = nn.BatchNorm2d(50)
self.conv3_ = nn.Conv2d(in_channels = 50, out_channels = 10, kernel_size = 1, stride=1, padding=0, bias = False)
self.conv3 = nn.Conv2d(in_channels = 10, out_channels = 50, kernel_size = 3, stride=1, padding=1, bias = False)
self.maxpool3 = nn.MaxPool2d(2, 2)
self.bn3 = nn.BatchNorm2d(50)
self.conv4_ = nn.Conv2d(in_channels = 50, out_channels = 20, kernel_size = 1, stride=1, padding=0, bias = False)
self.conv4 = nn.Conv2d(in_channels =20, out_channels = 100, kernel_size = 3, stride=1, padding=1, bias = False)
self.maxpool4 = nn.MaxPool2d(2, 2)
self.bn4 = nn.BatchNorm2d(100)
self.conv5_ = nn.Conv2d(in_channels = 100, out_channels = 10, kernel_size = 1, stride=1, padding=0, bias = False)
self.conv5 = nn.Conv2d(in_channels = 10, out_channels = 100, kernel_size = 3, stride=1, padding=1, bias = False)
self.maxpool5 = nn.MaxPool2d(2, 2)
self.bn5 = nn.BatchNorm2d(100)
self.fc = nn.Sequential(Flatten(),
nn.Linear(100*7*7, 100),
nn.ReLU(),
nn.Linear(100,100))
def forward(self, inputs):
feature_map1 = self.conv1(inputs)
feature_map1 = self.maxpool1(feature_map1)
feature_map1 = self.bn1(feature_map1)
feature_map2 = self.conv2_(feature_map1)
feature_map2 = self.conv2(feature_map2)
feature_map2 = self.maxpool2(feature_map2)
feature_map2 = self.bn2(feature_map2)
feature_map3 = self.conv3_(feature_map2)
feature_map3 = self.conv3(feature_map3)
feature_map3 = self.maxpool3(feature_map3)
feature_map3 = self.bn3(feature_map3)
feature_map4 = self.conv4_(feature_map3)
feature_map4 = self.conv4(feature_map4)
feature_map4 = self.maxpool4(feature_map4)
feature_map4 = self.bn4(feature_map4)
feature_map5 = self.conv5_(feature_map4)
feature_map5 = self.conv5(feature_map5)
feature_map5 = self.maxpool5(feature_map5)
feature_map5 = self.bn5(feature_map5)
output = self.fc(feature_map5)
return output
then
model_cpu = test_model()
save and load
torch.save(model_cpu, '/home/mskang/hyeokjong/model_cpu.pt')
model_load = torch.load('/home/mskang/hyeokjong/model_cpu.pt')
and
model_load == model_cpu
------------------------------------
False
However
print(model_load)
print(model_cpu)
are seemed same
furthermore
I also trained two models(model_load, model_cpu) and results looks same too.
So I think those tow models are same and should be same
But why False
They have the same underlying model but are different Python objects. That is why __eq__ returns False when trying model_load == model_cpu. You can see model_load and model_cpu as two copies of the same nn.Module.