Error in validation accuracy of multiview model - deep-learning

I created a multi-view model to classify the composition and BIRADS category of breast cancer. The training is taking place, but here is the problem: the validation success of the model is very low and stable. Why do you think this error is caused? Python notebook codes are below.
Thank you.
enter image description here
train_generatorRCC = tf.keras.preprocessing.image_dataset_from_directory(training_dirRCC,
image_size=(512,512),
shuffle=False,
label_mode='categorical',
batch_size = 16)
validation_generatorRCC = tf.keras.preprocessing.image_dataset_from_directory(validation_dirRCC,
image_size=(512,512),
shuffle=False,
label_mode='categorical',
batch_size= 16)
train_generatorLCC = tf.keras.preprocessing.image_dataset_from_directory(training_dirLCC,
image_size=(512,512),
shuffle=False,
label_mode='categorical',
batch_size = 16)
validation_generatorLCC = tf.keras.preprocessing.image_dataset_from_directory(validation_dirLCC,
image_size=(512,512),
shuffle=False,
label_mode='categorical',
batch_size= 16)
train_generatorLMLO = tf.keras.preprocessing.image_dataset_from_directory(training_dirLMLO,
image_size=(512,512),
shuffle=False,
label_mode='categorical',
batch_size = 16)
validation_generatorLMLO = tf.keras.preprocessing.image_dataset_from_directory(validation_dirLMLO,
image_size=(512,512),
shuffle=False,
label_mode='categorical',
batch_size= 16)
train_generatorRMLO = tf.keras.preprocessing.image_dataset_from_directory(training_dirRMLO,
image_size=(512,512),
shuffle=False,
label_mode='categorical',
batch_size = 16)
validation_generatorRMLO = tf.keras.preprocessing.image_dataset_from_directory(validation_dirRMLO,
image_size=(512,512),
shuffle=False,
label_mode='categorical',
batch_size= 16)
def VGG16(input_layer):
X = Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu")(input_layer)
X = (Conv2D(filters=64,kernel_size=(3,3),padding="same", activation="relu"))(X)
X = (MaxPooling2D(pool_size=(2,2),strides=(2,2)))(X)
X = (Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (Conv2D(filters=128, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (MaxPooling2D(pool_size=(2,2),strides=(2,2)))(X)
X = (Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (Conv2D(filters=256, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (MaxPooling2D(pool_size=(2,2),strides=(2,2)))(X)
X = (Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (MaxPooling2D(pool_size=(2,2),strides=(2,2)))(X)
X = (Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (Conv2D(filters=512, kernel_size=(3,3), padding="same", activation="relu"))(X)
X = (MaxPooling2D(pool_size=(2,2),strides=(2,2)))(X)
return X
input_layer1 = Input(shape = (256, 256, 3))
input_layer2 = Input(shape = (256, 256, 3))
input_layer3 = Input(shape = (256, 256, 3))
input_layer4 = Input(shape = (256, 256, 3))
def Multi_VGG16():
vgg1 = VGG16(input_layer1)
vgg2 = VGG16(input_layer2)
vgg3 = VGG16(input_layer3)
vgg4 = VGG16(input_layer4)
mergedVGG = keras.layers.concatenate([vgg1, vgg2, vgg3, vgg4])
X = Flatten()(mergedVGG)
X = (Dense(units=4096,activation="relu"))(X)
X = (Dense(units=4096,activation="relu"))(X)
X = Dense(4, activation = 'softmax')(X)
model = Model([input_layer1, input_layer2, input_layer3, input_layer4], [X], name = 'VGG16')
model.compile(loss = "categorical_crossentropy",
optimizer = 'sgd',
metrics = [tf.keras.metrics.Precision(),
tf.keras.metrics.Recall(),
# tf.keras.metrics.TruePositives(),
# tf.keras.metrics.TrueNegatives(),
# tf.keras.metrics.FalsePositives(),
# tf.keras.metrics.FalseNegatives()
])
return model
model = Multi_VGG16()
def resize_data1(images, classes):
return (tf.image.resize(images, (256, 256)),
classes)
def resize_data2(images, classes):
return (tf.image.resize(images, (256, 256)),
classes)
def resize_data3(images, classes):
return (tf.image.resize(images, (256, 256)),
classes)
def resize_data4(images, classes):
return (tf.image.resize(images, (256, 256)),
classes)
def post_zip_process(example1, example2, example3, example4):
return (example1[0], example2[0], example3[0], example4[0]), example1[1]
RCC_train_dataset = train_generatorRCC.map(resize_data1)
LCC_train_dataset = train_generatorLCC.map(resize_data2)
RMLO_train_dataset = train_generatorRMLO.map(resize_data3)
LMLO_train_dataset = train_generatorLMLO.map(resize_data4)
train_dataset_zip = tf.data.Dataset.zip((RCC_train_dataset, LCC_train_dataset, RMLO_train_dataset,LMLO_train_dataset))
train_dataset_zip = train_dataset_zip.map(post_zip_process)
RCC_valid_dataset = validation_generatorRCC.map(resize_data1)
LCC_valid_dataset = validation_generatorLCC.map(resize_data2)
RMLO_valid_dataset = validation_generatorRMLO.map(resize_data3)
LMLO_valid_dataset = validation_generatorLMLO.map(resize_data4)
valid_dataset_zip = tf.data.Dataset.zip((RCC_valid_dataset, LCC_valid_dataset, RMLO_valid_dataset,LMLO_valid_dataset))
valid_dataset_zip = valid_dataset_zip.map(post_zip_process)
history = model.fit( train_dataset_zip,
epochs = 10,
validation_data = valid_dataset_zip,
shuffle = False
)
I tried most methods I could think of and did extensive research but couldn't find any solution.

Related

training and validation losses decreasing slowly

i have implemented 2DCNN model followed by GRU layer
class CNN2D(nn.Module):
def __init__(self, img_x=88, img_y=88, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=512,num_classes=9):
super(CNN2D, self).__init__()
self.img_x = img_x
self.img_y = img_y
self.CNN_embed_dim = CNN_embed_dim
self.ch1, self.ch2, self.ch3, self.ch4 = 8, 16, 32, 64
self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)
self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)
self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)
self.conv1_outshape = conv2D_output_size((self.img_x, self.img_y), self.pd1, self.k1, self.s1) # Conv1 output shape
self.conv2_outshape = conv2D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
self.conv3_outshape = conv2D_output_size(self.conv2_outshape, self.pd3, self.k3, self.s3)
self.conv4_outshape = conv2D_output_size(self.conv3_outshape, self.pd4, self.k4, self.s4)
# fully connected layer hidden nodes
self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
self.drop_p = drop_p
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.pd1),
nn.BatchNorm2d(self.ch1, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2, padding=self.pd2),
nn.BatchNorm2d(self.ch2, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3, padding=self.pd3),
nn.BatchNorm2d(self.ch3, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4, padding=self.pd4),
nn.BatchNorm2d(self.ch4, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.drop = nn.Dropout2d(self.drop_p)
self.pool = nn.MaxPool2d(2)
#self.fc1 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.fc_hidden1) # fully connected layer, output k classes
#self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
self.fc3 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.CNN_embed_dim) # output = CNN embedding latent variables
self.num_classes = num_classes
self.gru = nn.GRU(
input_size=self.CNN_embed_dim,
hidden_size=256,
num_layers=1,
batch_first=True,(batch, time_step, input_size)
)
#self.gfc1 = nn.Linear(256, 128)
self.gfc2 = nn.Linear(256, self.num_classes)
def forward(self, x_3d):
cnn_embed_seq = []
for t in range(x_3d.size(2)):
# CNNs
x = self.conv1(x_3d[:, :, t, :, :])
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = x.reshape(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = F.relu(self.fc2(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = self.fc3(x)
cnn_embed_seq.append(x)
cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
RNN_out, _ = self.gru(cnn_embed_seq, None)
x = RNN_out[:, -1, :]
x = F.relu(x)
x = F.dropout(x, p=self.drop_p, training=self.training) NEW UPDATE
x = self.gfc2(x)
return x
inputs are videos of shape [batch,channels,frames,height,width]
i used adam optimizer with lr=1e-5 ,weight_decay=5e-5 ,amsgrad=True and cross entropy loss
training and validation losses are decreasing slowly and model is not converging
what should i change ?

why my multi-output regression using pytorch only optimize one output?

I want to predict three outputs, the model is as follows. the features of input is 9, output is 3.
class DNN(nn.Module):
def __init__(self, n_features):
self.n_features = n_features
super(DNN, self).__init__()
self.inlayer1 = nn.Linear(self.n_features, 16)
self.layer2 = nn.Linear(16, 32)
self.layer3 = nn.Linear(32, 64)
self.layer4 = nn.Linear(64, 128)
self.layer5 = nn.Linear(128, 256)
self.layer6 = nn.Linear(256, 256)
self.layer7 = nn.Linear(256, 128)
self.layer8 = nn.Linear(128, 64)
self.layer9 = nn.Linear(64, 32)
self.layer10 = nn.Linear(32, 16)
self.outlayer = nn.Linear(16, 3)
def forward(self, x):
x = F.elu(self.inlayer1(x))
x = F.elu(self.layer2(x))
x = F.elu(self.layer3(x))
x = F.elu(self.layer4(x))
x = F.elu(self.layer5(x))
x = F.elu(self.layer6(x))
x = F.elu(self.layer7(x))
x = F.elu(self.layer8(x))
x = F.elu(self.layer9(x))
x = F.elu(self.layer10(x))
out = self.outlayer(x)
return out
The train code
def train(net, train_features, train_labels, test_features, test_labels,
num_epochs, learning_rate, weight_decay, batch_size):
train_ls, test_ls = [], []
train_iter = d2l.load_array((train_features, train_labels), batch_size)
optimizer = torch.optim.Adam(net.parameters(),
lr = learning_rate,
weight_decay = weight_decay)
for epoch in range(num_epochs):
for X, y in train_iter:
optimizer.zero_grad()
out = net(X) ##out.shape is (100 samples, 3 labels)
loss = MSEloss(out, y)
loss.backward()
optimizer.step()
train_ls.append(MSEloss(net(train_features), train_labels).item())
if test_labels is not None:
test_ls.append(MSEloss(net(test_features), test_labels).item())
return train_ls, test_ls
after running the model, the below result is incorrect, but i don't know where is the bug? It seems that only the first col label is right. Should i change my method of calculating loss?
the below is the result.
the R2 and MSE metrics for three outputs
I tried to calculate the three outputs(out1, out2, out3) separately by change the number of output neurons to 1, then calculate the weighted loss, but it didn't work, even all three outputs are not close to the real label.

U-Net with Pre-Trained ResNet throws dimension error must match

I have an RGB image of mask for Segmentation of dimensions 900x600 (width, height)
My U-Net code is the ff. I do not really want to resize the output too much it is fine if it is resized without losing much of the aspect ratio.
import torch
import torch.nn as nn
from torchvision import models
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True)
)
class ResNetUNet(nn.Module):
def __init__(self, n_class=1):
super().__init__()
self.base_model = models.resnet18(pretrained=True)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
self.layer0_1x1 = convrelu(64, 64, 1, 0)
self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
self.layer1_1x1 = convrelu(64, 64, 1, 0)
self.layer2 = self.base_layers[5] # size=(N, 128, x.H/8, x.W/8)
self.layer2_1x1 = convrelu(128, 128, 1, 0)
self.layer3 = self.base_layers[6] # size=(N, 256, x.H/16, x.W/16)
self.layer3_1x1 = convrelu(256, 256, 1, 0)
self.layer4 = self.base_layers[7] # size=(N, 512, x.H/32, x.W/32)
self.layer4_1x1 = convrelu(512, 512, 1, 0)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
self.conv_original_size0 = convrelu(3, 64, 3, 1)
self.conv_original_size1 = convrelu(64, 64, 3, 1)
self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
self.conv_last = nn.Conv2d(64, n_class, 1)
def forward(self, input):
x_original = self.conv_original_size0(input)
x_original = self.conv_original_size1(x_original)
layer0 = self.layer0(input)
layer1 = self.layer1(layer0)
layer2 = self.layer2(layer1)
layer3 = self.layer3(layer2)
layer4 = self.layer4(layer3)
layer4 = self.layer4_1x1(layer4)
x = self.upsample(layer4)
layer3 = self.layer3_1x1(layer3)
x = torch.cat([x, layer3], dim=1)
x = self.conv_up3(x)
x = self.upsample(x)
layer2 = self.layer2_1x1(layer2)
x = torch.cat([x, layer2], dim=1)
x = self.conv_up2(x)
x = self.upsample(x)
layer1 = self.layer1_1x1(layer1)
x = torch.cat([x, layer1], dim=1)
x = self.conv_up1(x)
x = self.upsample(x)
layer0 = self.layer0_1x1(layer0)
x = torch.cat([x, layer0], dim=1)
x = self.conv_up0(x)
x = self.upsample(x)
x = torch.cat([x, x_original], dim=1)
x = self.conv_original_size2(x)
out = self.conv_last(x)
return out
for this command
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet()
model = model.to(device)
# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 600, 900))
it throws the error:
54 x = self.upsample(layer4)
55 layer3 = self.layer3_1x1(layer3)
---> 56 x = torch.cat([x, layer3], dim=1)
57 x = self.conv_up3(x)
58
RuntimeError: Sizes of tensors must match except in dimension 3. Got 57 and 58
Not sure what to do here. Could someone help me how to solve this?
Try this. You just need to match tensor shapes before torch.cat.
import torch
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F
def match_shapes(x, y):
if x.shape[-2:] != y.shape[-2:]:
x = F.interpolate(x, y.shape[-2:], mode='nearest')
return x
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True)
)
class ResNetUNet(nn.Module):
def __init__(self, n_class=1):
super().__init__()
self.base_model = models.resnet18(pretrained=True)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
self.layer0_1x1 = convrelu(64, 64, 1, 0)
self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
self.layer1_1x1 = convrelu(64, 64, 1, 0)
self.layer2 = self.base_layers[5] # size=(N, 128, x.H/8, x.W/8)
self.layer2_1x1 = convrelu(128, 128, 1, 0)
self.layer3 = self.base_layers[6] # size=(N, 256, x.H/16, x.W/16)
self.layer3_1x1 = convrelu(256, 256, 1, 0)
self.layer4 = self.base_layers[7] # size=(N, 512, x.H/32, x.W/32)
self.layer4_1x1 = convrelu(512, 512, 1, 0)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
self.conv_original_size0 = convrelu(3, 64, 3, 1)
self.conv_original_size1 = convrelu(64, 64, 3, 1)
self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
self.conv_last = nn.Conv2d(64, n_class, 1)
def forward(self, input):
x_original = self.conv_original_size0(input)
x_original = self.conv_original_size1(x_original)
layer0 = self.layer0(input)
layer1 = self.layer1(layer0)
layer2 = self.layer2(layer1)
layer3 = self.layer3(layer2)
layer4 = self.layer4(layer3)
layer4 = self.layer4_1x1(layer4)
x = self.upsample(layer4)
layer3 = self.layer3_1x1(layer3)
x = match_shapes(x, layer3)
x = torch.cat([x, layer3], dim=1)
x = self.conv_up3(x)
x = self.upsample(x)
layer2 = self.layer2_1x1(layer2)
x = match_shapes(x, layer2)
x = torch.cat([x, layer2], dim=1)
x = self.conv_up2(x)
x = self.upsample(x)
layer1 = self.layer1_1x1(layer1)
x = match_shapes(x, layer1)
x = torch.cat([x, layer1], dim=1)
x = self.conv_up1(x)
x = self.upsample(x)
layer0 = self.layer0_1x1(layer0)
x = torch.cat([x, layer0], dim=1)
x = self.conv_up0(x)
x = self.upsample(x)
x = torch.cat([x, x_original], dim=1)
x = self.conv_original_size2(x)
out = self.conv_last(x)
return out
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet()
model = model.to(device)
# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 600, 900))

Got 512 channels instead of 64 - what should I change in my Autoencoder?

ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
def __init__(self):
super(MyNetwork, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.fc1 = nn.Linear(ndf*4*8*16,z_size)
self.fc2 = nn.Linear(ndf*4*8*16,z_size)
self.d1 = nn.Linear(z_size, ndf*4*8*8)
self.z_size = z_size
self.d_max = ndf *4
def encode(self, x):
x = self.encoder(x)
x = x.view(x.shape[0], -1)
mu = self.fc1(x)
log_var = self.fc2(x)
return mu, log_var
def decode(self,x):
x = x.view(x.shape[0], self.z_size)
x = self.d1(x)
x = x.view(x.shape[0], self.d_max, 8,8)
x = self.decoder(x)
return x
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
def forward(self, x):
mu, log_var = self.encode(x)
mu = mu.squeeze()
log_var = log_var.squeeze()
z = self.reparameterize(mu, log_var)
return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var
I have adapted code from a tutorial and I'm currently getting the error 'Given transposed=1, weight of size 64 16 3 3, expected input[16, 512, 8, 8] to have 64 channels, but got 512 channels instead' when attempting to run my auto-encoder.
Could someone please explain how I should further adapt this code, using the CIFAR10 dataset, which a batch size of 16.
Skimmed through your code and found that input and output dimension was not proper.
Assuming both the input and output array shape as 32x32x3 I formatted this code.
ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
def __init__(self):
super(MyNetwork, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.fc1 = nn.Linear(ndf*4*8*16,z_size)
self.fc2 = nn.Linear(ndf*4*8*16,z_size)
self.d1 = nn.Linear(z_size, ndf*4*8*16)
self.z_size = z_size
self.d_max = ndf *4
def encode(self, x):
x = self.encoder(x)
x = x.view(x.shape[0], -1)
mu = self.fc1(x)
log_var = self.fc2(x)
return mu, log_var
def decode(self,x):
x = x.view(x.shape[0], self.z_size)
x = self.d1(x)
x = x.view(x.shape[0], 64, 32, 32)
x = self.decoder(x)
return x
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
def forward(self, x):
mu, log_var = self.encode(x)
mu = mu.squeeze()
log_var = log_var.squeeze()
z = self.reparameterize(mu, log_var)
return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var
Hope this code works :)
The input to the decoder (x = self.decoder(x) in forward function) should have 64 channels as defined by nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1).
In order to do so, you can do the following change:
Change self.d1 = nn.Linear(z_size, ndf*4*8*8) to self.d1 = nn.Linear(z_size, ndf*4*8*16).
Change from x = x.view(x.shape[0], self.d_max, 8,8) in decoder method to x = x.view(x.shape[0], 64, 32, 32)
Use the print statements to analyze the shape of input tensor at different layers:
ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
def __init__(self):
super(MyNetwork, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.fc1 = nn.Linear(ndf*4*8*16,z_size)
self.fc2 = nn.Linear(ndf*4*8*16,z_size)
self.d1 = nn.Linear(z_size, ndf*4*8*16)
self.z_size = z_size
self.d_max = ndf *4
def encode(self, x):
print('encoder')
print(x.shape)
x = self.encoder(x)
print(x.shape)
x = x.view(x.shape[0], -1)
print(x.shape)
mu = self.fc1(x)
print(mu.shape)
log_var = self.fc2(x)
print(log_var.shape)
return mu, log_var
def decode(self,x):
print('decoder')
print(x.shape)
x = x.view(x.shape[0], self.z_size)
print(x.shape)
x = self.d1(x)
print(x.shape)
x = x.view(x.shape[0], 64, 32, 32)
print(x.shape)
x = self.decoder(x)
print(x.shape)
return x
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
def forward(self, x):
mu, log_var = self.encode(x)
mu = mu.squeeze()
log_var = log_var.squeeze()
z = self.reparameterize(mu, log_var)
return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var

Pytorch-Optimzer doesn't update parameters

I made my custom model, AlexNetQIL (Alexnet with QIL layer)
'QIL' means quantization intervals learning
I trained my model and loss value didn't decrease at all and I found out parameters in my model were not updated at all because of QIL layer I added
I attached my codes AlexNetQil and qil
please someone let me know what's the problem in my codes
AlexNetQIL
import torch
import torch.nn as nn
from qil import *
class AlexNetQIL(nn.Module):
#def __init__(self, num_classes=1000): for imagenet
def __init__(self, num_classes=10): # for cifar-10
super(AlexNetQIL, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
self.bn1 = nn.BatchNorm2d(64)
self.relu2 = nn.ReLU(inplace=True)
self.maxpool1 = nn.MaxPool2d(kernel_size=2)
self.qil2 = Qil()
self.conv2 = nn.Conv2d(64, 192, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm2d(192)
self.relu2 = nn.ReLU(inplace=True)
self.maxpool2 = nn.MaxPool2d(kernel_size=2)
self.qil3 = Qil()
self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
self.bn3 = nn.BatchNorm2d(384)
self.relu3 = nn.ReLU(inplace=True)
self.qil4 = Qil()
self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
self.bn4 = nn.BatchNorm2d(256)
self.relu4 = nn.ReLU(inplace=True)
self.qil5 = Qil()
self.conv5 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
self.bn5 = nn.BatchNorm2d(256)
self.relu5 = nn.ReLU(inplace=True)
self.maxpool5 = nn.MaxPool2d(kernel_size=2)
self.classifier = nn.Sequential(
nn.Linear(256 * 2 * 2, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self,x,inference = False):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu2(x)
x = self.maxpool1(x)
x,self.conv2.weight = self.qil2(x,self.conv2.weight,inference ) # if I remove this line, No problem
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.maxpool2(x)
x,self.conv3.weight = self.qil3(x,self.conv3.weight,inference ) # if I remove this line, No problem
x = self.conv3(x)
x = self.bn3(x)
x = self.relu3(x)
x,self.conv4.weight = self.qil4(x,self.conv4.weight,inference ) # if I remove this line, No problem
x = self.conv4(x)
x = self.bn4(x)
x = self.relu4(x)
x,self.conv5.weight = self.qil5(x,self.conv5.weight,inference ) # if I remove this line, No problem
x = self.conv5(x)
x = self.bn5(x)
x = self.relu5(x)
x = self.maxpool5(x)
x = x.view(x.size(0),256 * 2 * 2)
x = self.classifier(x)
return x
QIL
forward
quantize weights and input activation with 2 steps
transformer(params) -> discretizer(params)
import torch
import torch.nn as nn
import numpy as np
import copy
#Qil (Quantize intervals learning)
class Qil(nn.Module):
discretization_level = 32
def __init__(self):
super(Qil,self).__init__()
self.cw = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.dw = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.cx = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.dx = nn.Parameter(torch.rand(1)) # I have to train this interval parameter
self.gamma = nn.Parameter(torch.tensor(1.0)) # I have to train this transformer parameter
self.a = Qil.discretization_level
def forward(self,x,weights,Inference = False):
if not Inference:
weights = self.transfomer_weights(weights)
weights = self.discretizer(weights)
x = self.transfomer_activation(x)
x = self.discretizer(x)
return torch.nn.Parameter(x), torch.nn.Parameter(weights)
def transfomer_weights(self,weights):
device = weights.device
aw,bw = (0.5 / self.dw) , (-0.5*self.cw / self.dw + 0.5)
weights = torch.where( abs(weights) < self.cw - self.dw,
torch.tensor(0.).to(device),weights)
weights = torch.where( abs(weights) > self.cw + self.dw,
weights.sign(), weights)
weights = torch.where( (abs(weights) >= self.cw - self.dw) & (abs(weights) <= self.cw + self.dw),
(aw*abs(weights) + bw)**self.gamma * weights.sign() , weights)
return weights
def transfomer_activation(self,x):
device = x.device
ax,bx = (0.5 / self.dx) , (-0.5*self.cx / self.dx + 0.5)
x = torch.where(x < self.cx - self.dx,
torch.tensor(0.).to(device),x)
x = torch.where(x > self.cx + self.dx,
torch.tensor(1.0).to(device),x)
x = torch.where( (abs(x) >= self.cx - self.dx) & (abs(x) <= self.cx + self.dx),
ax*abs(x) + bx, x)
return x
def discretizer(self,tensor):
q_D = pow(2, Qil.discretization_level)
tensor = torch.round(tensor * q_D) / q_D
return tensor