DenseNet, Sizes of tensors must match - deep-learning

would you know how I can adapt this code so that sizes of tensors must match because I have this error: x = torch.cat([x1,x2],1) RuntimeError: Sizes of tensors must match except in dimension 0. Got 32 and 1 (The offending index is 0).
My images are size 416x416.
Thank you in advance for your help,
num_classes = 20
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.inc = models.inception_v3(pretrained=True)
self.inc.aux_logits = False
for child in list(self.inc.children())[:-5]:
for param in child.parameters():
param.requires_grad = False
self.inc.fc = nn.Sequential()
self.dens121 = models.densenet121(pretrained=True)
for child in list(self.dens121.children())[:-6]:
for param in child.parameters():
param.requires_grad = False
self.dens121 = nn.Sequential(*list(self.dens121.children())[:-1])
self.SiLU = nn.SiLU()
self.linear = nn.Linear(4096, num_classes)
self.dropout = nn.Dropout(0.2)
def forward(self, x):
x1 = self.SiLU(self.dens121(x))
x1 = x1.view(-1, 2048)
x2 = self.inc(x).view(-1, 2048)
x = torch.cat([x1,x2],1)
return self.linear(self.dropout(x))

The shapes of the two tensors are very different and that's why the torch.cat() fails. I tried to run your code with the following example:
def forward(self, x):
x1 = self.SiLU(self.dens121(x))
x1 = x1.view(-1, 2048)
x2 = self.inc(x).view(-1, 2048)
print(x1.shape, x2.shape)
x = torch.cat([x1,x2], dim=1)
return self.linear(self.dropout(x))
Here's the driver code
inputs = torch.randn(2, 3, 416, 416)
model = Net()
outputs = model(inputs)
The shapes of x1 of x2 are as follows:
torch.Size([169, 2048]) torch.Size([2, 2048])
Either your DenseNet should output the same shape as the output of Inceptionv3 or vice-versa. The output from DenseNet is of shape torch.Size([2, 1024, 13, 13]) and the output from Inceptionv3 is of shape torch.Size([2, 2048]).
EDIT
Add this line to the init method:
self.conv_reshape= nn.Conv2d(1024, 2048, kernel_size=13, stride=1)
Add these lines to your forward():
x1 = self.SiLU(self.dens121(x))
out = self.conv_reshape(x1)
x1 = out.view(-1, out.size(1))
x2 = self.inc(x).view(-1, 2048)

Related

Temporal sequence feature extraction CNN, batches with different dimensions

I am using a CNN to extract features from temporal data of different lengths. I am using pad_sequence to pad the data in a batch. However as the max length in a batch will change, the padded sequence length differs by batch. This creates errors when i flatten the data for the FCN layer (as the dimension of the flattened vector changes). I am currently handling this by using an 'adaptive avg pooling layer' in before the FCN layers. As this is a global averaging, it fixes the output dimension for the FCN. However I am not sure if this is the correct thing to do.
Code is:
##pad tensors
def pad_collate(batch):
sequences = [item[0] for item in batch]
lengths = [len(seq) for seq in sequences]
padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=0)
return padded_sequences, lengths
## Create dataloader
trainData = Sequence(root = path)
trainDataLoader = DataLoader(trainData, batch_size = BATCH_SIZE, collate_fn= pad_collate)
## CNN model
class FeatureExtractor(nn.Module):
def __init__(self, block, layers):
super(FeatureExtractor, self).__init__()
self.inplanes = 6
## 1st CONV layers
self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 3, stride = 2, padding = 4)
self.bn1 = nn.BatchNorm2d(6)
self.relu1 = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride = 2, padding = 1)
## residual blocks
self.layer0 = self._make_layer(block, 12, layers[0], stride = 1)
self.layer1 = self._make_layer(block, 24, layers[1], stride = 2)
self.avgpool = nn.AdaptiveAvgPool2d((5,5)) ##### MY CURRENT SOLUTION #####
self.fc = nn.Linear(600, 128)
def _make_layer(self, block, planes, blocks, stride):
downsample = None
if stride != 1 or self.inplanes != planes:
downsample = nn.Sequential(nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
nn.BatchNorm2d(planes))
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
## first conv
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.maxpool1(x)
## conv blocks
x = self.layer0(x)
x = self.layer1(x)
##FCN layer
x = self.avgpool(x)
x = torch.flatten(x, 1)
output = self.fc(x)
return output
Any other comments are also welcome (i am self-taught)

ValueError: Expected input batch_size (59) to match target batch_size (1)

I'm trying to build a semantic segmentation model with pytorch. However, I encounter this error and do not know how to fix it.
This is the model:
class SegmentationNN(pl.LightningModule):
def __init__(self, num_classes=23, hparams=None):
super().__init__()
self.hparams = hparams
self.model=models.alexnet(pretrained=True).features
self.conv=nn.Conv2d(256, 3, kernel_size=1)
self.upsample = nn.Upsample(size=(240,240))
def forward(self, x):
print('Input:', x.shape)
x = self.model(x)
print('After Alexnet convs:', x.shape)
x = self.conv(x)
print('After 1-conv:', x.shape)
x = self.upsample(x)
print('After upsampling:', x.shape)
return x
def training_step(self, batch, batch_idx):
images, targets = batch
# targets = targets.view(targets.size(0), -1)
out = self.forward(images)
loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduction='mean')
loss = loss_func(out, targets.unsqueeze(0))
tensorboard_logs = {'loss': loss}
return {'loss': loss, 'log':tensorboard_logs}
def validation_step(self, batch, batch_idx):
images, targets = batch
# targets = targets.view(targets.size(0), -1)
out = self.forward(images)
loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduction='mean')
loss = loss_func(out, targets.unsqueeze(0))
tensorboard_logs = {'loss': loss}
return {'loss': loss, 'log':tensorboard_logs}
def configure_optimizers(self):
optim = torch.optim.Adam(self.parameters(), lr=self.hparams['learning_rate'])
return optim
And this is the training and fit:
train_dataloader = DataLoader(train_data, batch_size=hparams['batch_size'])
val_dataloader = DataLoader(val_data, batch_size=hparams['batch_size'])
trainer = pl.Trainer(
max_epochs=50,
gpus=1 if torch.cuda.is_available() else None
)
pass
trainer.fit(model, train_dataloader, val_dataloader)
These are the sizes of the tensor after each layer:
Input: torch.Size([59, 3, 240, 240])
After Alexnet convs: torch.Size([59, 256, 6, 6])
After 1-conv: torch.Size([59, 3, 6, 6])
After upsampling: torch.Size([59, 3, 240, 240])
I am pretty a beginner with Pytorch and Pytorch Lightning so every advice would be apprreciated!
Can you delete the unsqueeze(0) part here : loss = loss_func(out, targets.unsqueeze(0))

Neural Network cannot overfit even one sample

I am using neural network for a regression task.
My input is an gray image whose size is 100x70x1.
The gray area has a unique value 60.
The input will go through a preprocessing layer, which multiply 1./255 on every pixel value.
My output is just three double number: [0.87077969, 0.98989031, 0.98888382]
I used ResNet152 model as shown below:
class Bottleneck(tf.keras.Model):
expansion = 4
def __init__(self, in_channels, out_channels, strides=1):
super(Bottleneck, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(out_channels, 1, 1, use_bias=False)
self.bn1 = tf.keras.layers.BatchNormalization()
self.conv2 = tf.keras.layers.Conv2D(out_channels, 3, strides, padding="same", use_bias=False)
self.bn2 = tf.keras.layers.BatchNormalization()
self.conv3 = tf.keras.layers.Conv2D(out_channels*self.expansion, 1, 1, use_bias=False)
self.bn3 = tf.keras.layers.BatchNormalization()
if strides != 1 or in_channels != self.expansion * out_channels:
self.shortcut = tf.keras.Sequential([
tf.keras.layers.Conv2D(self.expansion*out_channels, kernel_size=1,
strides=strides, use_bias=False),
tf.keras.layers.BatchNormalization()]
)
else:
self.shortcut = lambda x,_: x
def call(self, x, training=False):
out = tf.nn.elu(self.bn1(self.conv1(x), training))
out = tf.nn.elu(self.bn2(self.conv2(out), training))
out = self.bn3(self.conv3(out), training)
out += self.shortcut(x, training)
return tf.nn.elu(out)
class ResNet(tf.keras.Model):
def __init__(self, block, num_blocks):
super(ResNet, self).__init__()
self.in_channels = 64
self.conv1 = tf.keras.layers.Conv2D(64, 7, 2, padding="same", use_bias=False) # 60x60
self.bn1 = tf.keras.layers.BatchNormalization()
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides=2, padding='same') # 30x30
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.avg_pool2d = tf.keras.layers.GlobalAveragePooling2D()
self.flatten = tf.keras.layers.Flatten()
def _make_layer(self, block, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_channels, out_channels, stride))
self.in_channels = out_channels * block.expansion
return tf.keras.Sequential(layers)
def call(self, x, training=False):
out = self.pool1(tf.nn.elu(self.bn1(self.conv1(x), training)))
out = self.layer1(out, training=training)
out = self.layer2(out, training=training)
out = self.layer3(out, training=training)
out = self.layer4(out, training=training)
# For classification
out = self.flatten(out)
# out = tf.keras.layers.Reshape((out.shape[-1],))(out)
#out = self.linear(out)
return out
def model(self):
x = tf.keras.layers.Input(shape=(100,70,1))
return tf.keras.Model(inputs=[x], outputs=self.call(x))
def ResNet152():
return ResNet(Bottleneck, [3,8,36,3])
I used elu as activation function and changed the GlobalAveragePooling layer into flatten layer at the end of ResNet.
Before output I stack two Dense layer(2048 units and 3 units) on top of the ResNet model.
For training I used adam optimizer and inital learning rate is 1e-4, which will decreasing by factor 10 when the val_loss not decreasing for 3 epoch.
The loss is just mse error.
After early stopping while learning rate is 1e-8, the mse loss is still very high:8.6225
The prediction is [2.92318237, 5.53124916, 3.00686643] which is far away from the ground truth: [0.87077969, 0.98989031, 0.98888382]
I don't know why such a deep network cannot overfit such a sample.
Is this the reason that my input image has too few information? Could someone help me?

pytorch cnn Test result is sticked

TRAINING CODE
if os.path.isfile(PATH):
print("checkpoint training '{}' ...".format(PATH))
checkpoint = torch.load(PATH)
start_epoch = checkpoint['epoch']
start_i = checkpoint['i']
net.load_state_dict(checkpoint['state_dict'])
print("=> loaded checkpoint '{}' (trained for {} epochs, {} i)".format(PATH, checkpoint['epoch'],
checkpoint['i']))
else:
print('new training')
for epoch in range(num_epochs): # loop over the dataset multiple times
running_loss = 0.0
for i in range(len(train_folder_list2)):
# get the inputs; data is a list of [inputs, labels]
# net.train()
inputs, labels = train_input[i], train_list[i]
inputs = torch.as_tensor(inputs).cuda()
inputs = inputs.transpose(1, 3)
labels = torch.as_tensor(labels).cuda()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
# zero the parameter gradients
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 100 == 1:
save_checkpoint({
'epoch': start_epoch + epoch + 1,
'i': start_i + i + 1,
'state_dict': net.state_dict(),
})
TEST CODE
PATH = './checkpoint.pth'
model = Net().cuda()
if os.path.isfile(PATH):
print('checkpoint check!')
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['state_dict'])
model.eval()
for k in range(len(train_folder_list2)):
inputs = train_input[k]
inputs = torch.as_tensor(inputs).cuda()
inputs = inputs.transpose(1, 3)
outputs = model(inputs)
result = outputs.cpu().detach().numpy()
This is the code to find the edges of the image.
If I run the training code, train it, and test it with the test code, it doesn't seem to find any edges in the image. The edges are on the same side, whatever image i put.
**ADD
CNN CODE
In addition, we added cnn code to give you information. Data input was put in the list separately from the image and label.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(293904, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 18)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 293904)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
x = x.view(18)
return x

Custom max_pool layer: ValueError: The channel dimension of the inputs should be defined. Found `None`

I am working on tensorflow2 and I am trying to implement Max unpool with indices to implement SegNet.
When I run it I get the following problem. I am defining the def MaxUnpool2D and then calling it in the model. I suppose that the problem is given by the fact that updates and mask have got shape (None, H,W,ch).
def MaxUnpooling2D(updates, mask):
size = 2
mask = tf.cast(mask, 'int32')
input_shape = tf.shape(updates, out_type='int32')
# calculation new shape
output_shape = (
input_shape[0],
input_shape[1]*size,
input_shape[2]*size,
input_shape[3])
# calculation indices for batch, height, width and feature maps
one_like_mask = tf.ones_like(mask, dtype='int32')
batch_shape = tf.concat(
[[input_shape[0]], [1], [1], [1]],
axis=0)
batch_range = tf.reshape(
tf.range(output_shape[0], dtype='int32'),
shape=batch_shape)
b = one_like_mask * batch_range
y = mask // (output_shape[2] * output_shape[3])
x = (mask // output_shape[3]) % output_shape[2]
feature_range = tf.range(output_shape[3], dtype='int32')
f = one_like_mask * feature_range
updates_size = tf.size(updates)
indices = K.transpose(K.reshape(
tf.stack([b, y, x, f]),
[4, updates_size]))
values = tf.reshape(updates, [updates_size])
return tf.scatter_nd(indices, values, output_shape)
def segnet_conv(
inputs,
kernel_size=3,
kernel_initializer='glorot_uniform',
batch_norm = False,
**kwargs):
conv1 = Conv2D(
filters=64,
kernel_size=kernel_size,
padding='same',
activation=None,
kernel_initializer=kernel_initializer,
name='conv_1'
)(inputs)
if batch_norm:
conv1 = BatchNormalization(name='bn_1')(conv1)
conv1 = LeakyReLU(alpha=0.3, name='activation_1')(conv1)
conv1 = Conv2D(
filters=64,
kernel_size=kernel_size,
padding='same',
activation=None,
kernel_initializer=kernel_initializer,
name='conv_2'
)(conv1)
if batch_norm:
conv1 = BatchNormalization(name='bn_2')(conv1)
conv1 = LeakyReLU(alpha=0.3, name='activation_2')(conv1)
pool1, mask1 = tf.nn.max_pool_with_argmax(
input=conv1,
ksize=2,
strides=2,
padding='SAME'
)
def segnet_deconv(
pool1,
mask1,
kernel_size=3,
kernel_initializer='glorot_uniform',
batch_norm = False,
**kwargs
):
dec = MaxUnpooling2D(pool5, mask5)
dec = Conv2D(
filters=512,
kernel_size=kernel_size,
padding='same',
activation=None,
kernel_initializer=kernel_initializer,
name='upconv_13'
)(dec)
def classifier(
dec,
ch_out=2,
kernel_size=3,
final_activation=None,
batch_norm = False,
**kwargs
):
dec = Conv2D(
filters=64,
kernel_size=kernel_size,
activation='relu',
padding='same',
name='dec_out1'
)(dec)
#tf.function
def segnet(
inputs,
ch_out=2,
kernel_size=3,
kernel_initializer='glorot_uniform',
final_activation=None,
batch_norm = False,
**kwargs
):
pool5, mask1, mask2, mask3, mask4, mask5 = segnet_conv(
inputs,
kernel_size=3,
kernel_initializer='glorot_uniform',
batch_norm = False
)
dec = segnet_deconv(
pool5,
mask1,
mask2,
mask3,
mask4,
mask5,
kernel_size=kernel_size,
kernel_initializer=kernel_initializer,
batch_norm = batch_norm
)
output = classifier(
dec,
ch_out=2,
kernel_size=3,
final_activation=None,
batch_norm = batch_norm
)
return output
inputs = Input(shape=(*params['image_size'], params['num_channels']), name='input')
outputs = segnet(inputs, n_labels=2, kernel=3, pool_size=(2, 2), output_mode=None)
# we define our U-Net to output logits
model = Model(inputs, outputs)
Can you please help me with this problem?
I have solved the problem. If someone will need here is the code for MaxUnpooling2D:
def MaxUnpooling2D(pool, ind, output_shape, batch_size, name=None):
"""
Unpooling layer after max_pool_with_argmax.
Args:
pool: max pooled output tensor
ind: argmax indices
ksize: ksize is the same as for the pool
Return:
unpool: unpooling tensor
:param batch_size:
"""
with tf.compat.v1.variable_scope(name):
pool_ = tf.reshape(pool, [-1])
batch_range = tf.reshape(tf.range(batch_size, dtype=ind.dtype), [tf.shape(pool)[0], 1, 1, 1])
b = tf.ones_like(ind) * batch_range
b = tf.reshape(b, [-1, 1])
ind_ = tf.reshape(ind, [-1, 1])
ind_ = tf.concat([b, ind_], 1)
ret = tf.scatter_nd(ind_, pool_, shape=[batch_size, output_shape[1] * output_shape[2] * output_shape[3]])
# the reason that we use tf.scatter_nd: if we use tf.sparse_tensor_to_dense, then the gradient is None, which will cut off the network.
# But if we use tf.scatter_nd, the gradients for all the trainable variables will be tensors, instead of None.
# The usage for tf.scatter_nd is that: create a new tensor by applying sparse UPDATES(which is the pooling value) to individual values of slices within a
# zero tensor of given shape (FLAT_OUTPUT_SHAPE) according to the indices (ind_). If we ues the orignal code, the only thing we need to change is: changeing
# from tf.sparse_tensor_to_dense(sparse_tensor) to tf.sparse_add(tf.zeros((output_sahpe)),sparse_tensor) which will give us the gradients!!!
ret = tf.reshape(ret, [tf.shape(pool)[0], output_shape[1], output_shape[2], output_shape[3]])
return ret