Generating 28x28 image using DCGAN - deep-learning

I have a python test code which predefines
nz=10, ngf= 64
def test_Generator_shapes():
nz = 10
netG = Generator(nz, ngf=64, nc=1)
batch_size = 32
noise = torch.randn(batch_size, nz, 1, 1)
out = netG(noise, verbose=True)
assert out.shape == torch.Size([batch_size, 1, 28, 28]), f"Bad shape of out: out.shape={out.shape}"
print('Success')
test_Generator_shapes()
Now I need to reset the hidden layers and other parameters to be able to output imamges of size 28x28,
i.e.- torch.Size([batch_size, 1, 28, 28])
Please can someone suggest what changes I should do in the following code so as to be able to generate images of 28x28 instead of 64x64 presently
class Generator(nn.Module):
def __init__(self, nz=10, ngf=28, nc=1, ndf=28):
"""GAN generator.
Args:
nz: Number of elements in the latent code.
ngf: Base size (number of channels) of the generator layers.
nc: Number of channels in the generated images.
"""
ngf=28
super(Generator, self).__init__()
self.ngpu = 0
self.main = nn.Sequential(
# input is Z, going into a convolution
nn.ConvTranspose2d( nz, ngf * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(ngf * 8),
nn.ReLU(True),
# state size. (ngf*8) x 4 x 4
nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 4),
nn.ReLU(True),
# state size. (ngf*4) x 8 x 8
nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf * 2),
nn.ReLU(True),
# state size. (ngf*2) x 16 x 16
nn.ConvTranspose2d( ngf * 2, ngf, 4, 2, 1, bias=False),
nn.BatchNorm2d(ngf),
nn.ReLU(True),
# state size. (ngf) x 32 x 32
nn.ConvTranspose2d( ngf, nc, 4, 2, 1, bias=False),
nn.Tanh()
# state size. (nc) x 64 x 64
)
# YOUR CODE HERE
#raise NotImplementedError()
def forward(self, z, verbose=False):
"""Generate images by transforming the given noise tensor.
Args:
z of shape (batch_size, nz, 1, 1): Tensor of noise samples. We use the last two singleton dimensions
so that we can feed z to the generator without reshaping.
verbose (bool): Whether to print intermediate shapes (True) or not (False).
Returns:
out of shape (batch_size, nc, 28, 28): Generated images.
"""
# YOUR CODE HERE
x = self.main(z)
print (x.size())
return x
#raise NotImplementedError()

Related

how to best visualize CNN architecture? (experience using "PlotNeuralNet")

I'm writing a thesis and want to present a visualisation of the CNN architecture used for the analysis (written in PyTorch). I came across this cool repository PlotNeuralNet with examples for how to generate LaTeX code for drawing neural networks for reports and presentation. However, I'm having trouble finding out how to exactly define my particular architecture.
Here is an example of how one would define an architecture.
import sys
sys.path.append('../')
from pycore.tikzeng import *
# define your arch
arch = \[
to_head( '..' ),
to_cor(),
to_begin(),
to_Conv("conv1", 512, 64, offset="(0,0,0)", to="(0,0,0)", height=64, depth=64, width=2 ),
to_Pool("pool1", offset="(0,0,0)", to="(conv1-east)"),
to_Conv("conv2", 128, 64, offset="(1,0,0)", to="(pool1-east)", height=32, depth=32, width=2 ),
to_connection( "pool1", "conv2"),
to_Pool("pool2", offset="(0,0,0)", to="(conv2-east)", height=28, depth=28, width=1),
to_SoftMax("soft1", 10 ,"(3,0,0)", "(pool1-east)", caption="SOFT" ),
to_connection("pool2", "soft1"),
to_Sum("sum1", offset="(1.5,0,0)", to="(soft1-east)", radius=2.5, opacity=0.6),
to_connection("soft1", "sum1"),
to_end()
\]
def main():
namefile = str(sys.argv[0]).split('.')[0]
to_generate(arch, namefile + '.tex' )
if __name__ == '__main__':
main()
However, looking at the different available blocks available in pycore module, I'm still not able to use the tool. Documentation for usage is not really that elaborate, so I was hoping someone here would find it trivial to define the architecture below. Else, any good ways to
class Net20(nn.Module):
""" CNN for 20-day Image
This particular model should have:
- 3 blocks
- 64 layers in first block, multiply by 2 each subsequent block
- filter size (5,3)
- vertical stride = 3 (but only in first layer)
- vertical dilation = 2 (but only in first layer)
- Leaky Relu activation function
- max pooling (2,1) at the end of each block
"""
def __init__(self):
super().__init__()
self.layer1 = nn.Sequential(
Conv2dSame(1, 64, kernel_size=(5,3), stride=(3,1), dilation=(2,1)),
nn.BatchNorm2d(64),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.MaxPool2d((2, 1), ceil_mode=True)
)
self.layer2 = nn.Sequential(
Conv2dSame(64, 128, kernel_size=(5,3)),
nn.BatchNorm2d(128),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.MaxPool2d((2, 1), ceil_mode=True)
)
self.layer3 = nn.Sequential(
Conv2dSame(128, 256, kernel_size=(5,3)),
nn.BatchNorm2d(256),
nn.LeakyReLU(negative_slope=0.01, inplace=True),
nn.MaxPool2d((2, 1), ceil_mode=True)
)
self.fc1 = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(46080, 1),
)
def forward(self, x):
x = x.reshape(-1,1,64,60)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = x.reshape(-1,46080)
x = self.fc1(x)
return x
You can try model.summary() or keras.utils.plot_model. You may want to check: https://machinelearningmastery.com/visualize-deep-learning-neural-network-model-keras/

PyTorch: How to calculate output size of the CNN?

I went through this PyTorch CNN implementation available here: https://machinelearningknowledge.ai/pytorch-conv2d-explained-with-examples/
I am unable to understand how they replace the '?' with some value. What is the formula for calculating the CNN layer output?
This is essential to be calculated in PyTorch; not so in Tensorflow - Keras. If there is any other blog that explains this well, please drop it in the comments.
# Implementation of CNN/ConvNet Model
class CNN(torch.nn.Module):
def __init__(self):
super(CNN, self).__init__()
# L1 ImgIn shape=(?, 28, 28, 1)
# Conv -> (?, 28, 28, 32)
# Pool -> (?, 14, 14, 32)
self.layer1 = torch.nn.Sequential(
torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
torch.nn.Dropout(p=1 - keep_prob))
# L2 ImgIn shape=(?, 14, 14, 32)
# Conv ->(?, 14, 14, 64)
# Pool ->(?, 7, 7, 64)
self.layer2 = torch.nn.Sequential(
torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2),
torch.nn.Dropout(p=1 - keep_prob))
# L3 ImgIn shape=(?, 7, 7, 64)
# Conv ->(?, 7, 7, 128)
# Pool ->(?, 4, 4, 128)
self.layer3 = torch.nn.Sequential(
torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1),
torch.nn.Dropout(p=1 - keep_prob))
# L4 FC 4x4x128 inputs -> 625 outputs
self.fc1 = torch.nn.Linear(4 * 4 * 128, 625, bias=True)
torch.nn.init.xavier_uniform(self.fc1.weight)
self.layer4 = torch.nn.Sequential(
self.fc1,
torch.nn.ReLU(),
torch.nn.Dropout(p=1 - keep_prob))
# L5 Final FC 625 inputs -> 10 outputs
self.fc2 = torch.nn.Linear(625, 10, bias=True)
torch.nn.init.xavier_uniform_(self.fc2.weight) # initialize parameters
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = out.view(out.size(0), -1) # Flatten them for FC
out = self.fc1(out)
out = self.fc2(out)
return out
#instantiate CNN model
model = CNN()
model
Thanks!
I assume you calculation is wrong because:
Pytorch support images in format C * H * W (e.g. 3x32x32 not 32x32x3)
First dimension always batch dimension and must be omitted in calculation because, all nn.Modules handle it by default
So if you want calculate input size for first Linear layer, you can use this trick:
conv = nn.Sequential(self.layer1,self.layer2, self.layer3, nn.Flatten())
out = conv(torch.randn(1,im_height,im_width).unsqueeze(0))
# fc_layer_in_channels = out.shape[1]
self.fc1 = torch.nn.Linear(out.shape[1], 625, bias=True)
but only if you know im_height,im_width
The best practice is use torch.nn.AdaptiveAvgPool2d.
With this layer you always can get output of fixed spatial size.

Inputting 2D array in to conv2D layer

I'm building a DQN that takes a 24x10 array of 0,1,2 (representing a tetris board) and a int 0-5 (representing the current playable tetramino)
I flatten my array and convert it to a Tensor before inputting it to my convolutional layers but this is the error I keep on getting
Expected 4-dimensional input for 4-dimensional weight [16, 3, 240, 240], but got 1-dimensional input of size [240] instead
I've tried reducing the Kernel size and stride as well as not flattening the array but neither has worked.
For reference this is my DQN
class DQN(nn.Module):
def __init__(self):
super(DQN, self).__init__()
self.conv1_board = nn.Conv2d(3, 16, kernel_size=240, stride=1) #3 input channels for 0,1,2 . kernel_size 240 for length of tensor
self.conv2_board = nn.Conv2d(16, 32, kernel_size=240, stride=1)
self.conv3_board = nn.Conv2d(32, 6, kernel_size=240, stride=1)
self.conv1_piece = nn.Conv2d(6, 16, kernel_size=240, stride=1) #in channels 6 as 6 possible values
self.conv2_piece = nn.Conv2d(16, 6, kernel_size=240, stride=1)
self.fc1 = nn.Linear(1, 32)
self.fc2 = nn.Linear(32, 6)
self.flatten = nn.Flatten()
def flt_totns(self, arr):
flt = []
for l in arr:
flt.extend(l)
return torch.FloatTensor(flt)
def forward(self, states): #inputs to conv layers should be Tensors not list. convert list => tensor
board, piece = states
board = self.flt_totns(board)
embed_board = flatten(self.conv3_board(self.conv2_board(self.conv1_board(board))))
embed_piece = flatten(self.conv2_piece(self.conv1_piece(piece)))
embed_joined = torch.cat([embed_board, embed_piece])
return self.fc2(self.fc1(embed_joined))
I'm very new to CNNs in pytorch so I'm sure a lot of my reasoning is faulty. For example I'm still not sure how Kernel size exactly relates to the shape of your input, or if input channels still applies to array inputs. Buy any help would be greatly appreciated.

Pytorch model running out of memory on both CPU and GPU, can’t figure out what I’m doing wrong

Trying to implement a simple multi-label image classifier using Pytorch Lightning. Here's the model definition:
import torch
from torch import nn
# creates network class
class Net(pl.LightningModule):
def __init__(self):
super().__init__()
# defines conv layers
self.conv_layer_b1 = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=32,
kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(),
)
# passes dummy x matrix to find the input size of the fc layer
x = torch.randn(1, 3, 800, 600)
self._to_linear = None
self.forward(x)
# defines fc layer
self.fc_layer = nn.Sequential(
nn.Linear(in_features=self._to_linear,
out_features=256),
nn.ReLU(),
nn.Linear(256, 5),
)
# defines accuracy metric
self.accuracy = pl.metrics.Accuracy()
self.confusion_matrix = pl.metrics.ConfusionMatrix(num_classes=5)
def forward(self, x):
x = self.conv_layer_b1(x)
if self._to_linear is None:
# does not run fc layer if input size is not determined yet
self._to_linear = x.shape[1]
else:
x = self.fc_layer(x)
return x
def cross_entropy_loss(self, logits, y):
criterion = nn.CrossEntropyLoss()
return criterion(logits, y)
def training_step(self, train_batch, batch_idx):
x, y = train_batch
logits = self.forward(x)
train_loss = self.cross_entropy_loss(logits, y)
train_acc = self.accuracy(logits, y)
train_cm = self.confusion_matrix(logits, y)
self.log('train_loss', train_loss)
self.log('train_acc', train_acc)
self.log('train_cm', train_cm)
return train_loss
def validation_step(self, val_batch, batch_idx):
x, y = val_batch
logits = self.forward(x)
val_loss = self.cross_entropy_loss(logits, y)
val_acc = self.accuracy(logits, y)
return {'val_loss': val_loss, 'val_acc': val_acc}
def validation_epoch_end(self, outputs):
avg_val_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
avg_val_acc = torch.stack([x['val_acc'] for x in outputs]).mean()
self.log("val_loss", avg_val_loss)
self.log("val_acc", avg_val_acc)
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=0.0008)
return optimizer
The issue is probably not the machine since I'm using a cloud instance with 60 GBs of RAM and 12 GBs of VRAM. Whenever I run this model even for a single epoch, I get an out of memory error. On the CPU it looks like this:
RuntimeError: [enforce fail at CPUAllocator.cpp:64] . DefaultCPUAllocator: can't allocate memory: you tried to allocate 1966080000 bytes. Error code 12 (Cannot allocate memory)
and on the GPU it looks like this:
RuntimeError: CUDA out of memory. Tried to allocate 7.32 GiB (GPU 0; 11.17 GiB total capacity; 4.00 KiB already allocated; 2.56 GiB free; 2.00 MiB reserved in total by PyTorch)
Clearing the cache and reducing the batch size did not work. I'm a novice so clearly something here is exploding but I can't tell what. Any help would be appreciated.
Thank you!
Indeed, it's not a machine issue; the model itself is simply unreasonably big. Typically, if you take a look at common CNN models, the fc layers occur near the end, after the inputs already pass through quite a few convolutional blocks (and have their spatial resolutions reduced).
Assuming inputs are of shape (batch, 3, 800, 600), while passing the conv_layer_b1 layer, the feature map shape would be (batch, 32, 400, 300) after the MaxPool operation. After flattening, the inputs become (batch, 32 * 400 * 300), ie, (batch, 3840000).
The immediately following fc_layer thus contains nn.Linear(3840000, 256), which is simply absurd. This single linear layer contains ~983 million trainable parameters! For reference, popular image classification CNNs roughly have 3 to 30 million parameters on average, with larger variants reaching 60 to 80 million. Few ever really cross the 100 million mark.
You can count your model params with this:
def count_params(model):
return sum(map(lambda p: p.data.numel(), model.parameters()))
My advice: 800 x 600 is really a massive input size. Reduce it to something like 400 x 300, if possible. Furthermore, add several convolutional blocks similar to conv_layer_b1, before the FC layer. For example:
def get_conv_block(C_in, C_out):
return nn.Sequential(
nn.Conv2d(in_channels=C_in, out_channels=C_out,
kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
class Net(pl.LightningModule):
def __init__(self):
super().__init__()
# defines conv layers
self.conv_layer_b1 = get_conv_block(3, 16)
self.conv_layer_b2 = get_conv_block(16, 32)
self.conv_layer_b3 = get_conv_block(32, 64)
self.conv_layer_b4 = get_conv_block(64, 128)
self.conv_layer_b5 = get_conv_block(128, 256)
# passes dummy x matrix to find the input size of the fc layer
x = torch.randn(1, 3, 800, 600)
self._to_linear = None
self.forward(x)
# defines fc layer
self.fc_layer = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=self._to_linear,
out_features=256),
nn.ReLU(),
nn.Linear(256, 5)
)
# defines accuracy metric
self.accuracy = pl.metrics.Accuracy()
self.confusion_matrix = pl.metrics.ConfusionMatrix(num_classes=5)
def forward(self, x):
x = self.conv_layer_b1(x)
x = self.conv_layer_b2(x)
x = self.conv_layer_b3(x)
x = self.conv_layer_b4(x)
x = self.conv_layer_b5(x)
if self._to_linear is None:
# does not run fc layer if input size is not determined yet
self._to_linear = nn.Flatten()(x).shape[1]
else:
x = self.fc_layer(x)
return x
Here, because more conv-relu-pool layers are applied, the input is reduced to a feature map of a much smaller shape, (batch, 256, 25, 18), and the overall number of trainable parameters would be reduced to about ~30 million parameters.

Concatenating branches(of different dimensions) of googlenet on Pytorch

is there any way I can concatenate branches of different dimensions of a small inception(googlenet)?
For example, at a 32 x 32 x 3 image (torch.Size([1, 3, 32, 32])), it will pass through an inception module with the following branches:
a convolution with (32 channels, 1 x 1 filters)
another convolution with (32 channels, 3 x3 filters)
merge (concatenate along the channel dimension)
Inception Module
The issue however is that the torch sizes as a result of the two convolutions are different.
(32 channels, 1 x 1 filters) -> [1, 32, 30, 30]
(32 channels, 3 x3 filters) -> [1, 32, 28, 28]
How can I concatenate the two branches?
Should I add padding? I tried ZeroPad2d. It’s not working.
For your reference:
class Inception(nn.Module):
def __init__(self, in_channel, ch1, ch3):
super(Inception, self).__init__()
self.branch1 = nn.Sequential(
ConvBlock(in_channel, ch1, kernel_size = 1,stride=1, padding=0)
)
self.branch2 = nn.Sequential(
ConvBlock(in_channel, ch3, kernel_size = 3,stride=1, padding=0)
)
def forward(self, x):
branch1 = self.branch1(x)
branch2 = self.branch2(x)
return torch.cat([branch1, branch2], 1) *(error here)*
You need to match your kernel size and padding.
For kernel_size=1 no padding is needed, but for kernel_size=3 you need padding to be 1:
self.branch2 = nn.Sequential(
ConvBlock(in_channel, ch3, kernel_size=3,stride=1, padding=1)
)
See this nice tutorial for more details.