Autoencoder hits invisble wall - deep-learning

I have a problem. I am currently trying to train an autoencoder using Stock data. The data has been MinMax scaled.
The model seems to have an issue tranforming values lower than 0.15 and higher than 0.8. It looks like there is an invisible barrier. Not sure how to call this. How would you call it?
This is the models output vs original: Output vs Original
My model uses the tanh activation function and consists of Linear Layers. The laten_dim is the same as the input layer. I planned to reduce it later down the line:
class SparseEncoder(nn.Module):
def __init__(self, input_shape: int, latent_dims, dtype=torch.float64):
super().__init__()
self.linear1 = nn.Linear(input_shape, 512, dtype=dtype)
self.linear2 = nn.Linear(512, 256, dtype=dtype)
self.linear3 = nn.Linear(256, 128, dtype=dtype)
self.linear4 = nn.Linear(128, 64, dtype=dtype)
self.linear5 = nn.Linear(64, 32, dtype=dtype)
self.linear6 = nn.Linear(32, 16, dtype=dtype)
self.linear7 = nn.Linear(16, 8, dtype=dtype)
self.linear8 = nn.Linear(8, latent_dims, dtype=dtype)
def forward(self, x):
# y = torch.flatten(x, start_dim=1)
z = torch.tanh(self.linear1(x))
z = torch.tanh(self.linear2(z))
z = torch.tanh(self.linear3(z))
z = torch.tanh(self.linear4(z))
z = torch.tanh(self.linear5(z))
z = torch.tanh(self.linear6(z))
z = torch.tanh(self.linear7(z))
z = torch.tanh(self.linear8(z))
return z
class SparseDecoder(nn.Module):
def __init__(self, input_shape: int, latent_dims, dtype=torch.float64):
super().__init__()
self.linear1 = nn.Linear(latent_dims, 8, dtype=dtype)
self.linear2 = nn.Linear(8, 16, dtype=dtype)
self.linear3 = nn.Linear(16, 32, dtype=dtype)
self.linear4 = nn.Linear(32, 64, dtype=dtype)
self.linear5 = nn.Linear(64, 128, dtype=dtype)
self.linear6 = nn.Linear(128, 256, dtype=dtype)
self.linear7 = nn.Linear(256, 512, dtype=dtype)
self.linear8 = nn.Linear(512, input_shape, dtype=dtype)
def forward(self, x):
# y = torch.flatten(x, start_dim=1)
z = torch.tanh(self.linear1(x))
z = torch.tanh(self.linear2(z))
z = torch.tanh(self.linear3(z))
z = torch.tanh(self.linear4(z))
z = torch.tanh(self.linear5(z))
z = torch.tanh(self.linear6(z))
z = torch.tanh(self.linear7(z))
z = torch.tanh(self.linear8(z))
return z
Do you have any hints that could help me?
I tried to autoencode Stock market data and expected to see a 1:1 to the output since the latent space is still the same.

Related

why my multi-output regression using pytorch only optimize one output?

I want to predict three outputs, the model is as follows. the features of input is 9, output is 3.
class DNN(nn.Module):
def __init__(self, n_features):
self.n_features = n_features
super(DNN, self).__init__()
self.inlayer1 = nn.Linear(self.n_features, 16)
self.layer2 = nn.Linear(16, 32)
self.layer3 = nn.Linear(32, 64)
self.layer4 = nn.Linear(64, 128)
self.layer5 = nn.Linear(128, 256)
self.layer6 = nn.Linear(256, 256)
self.layer7 = nn.Linear(256, 128)
self.layer8 = nn.Linear(128, 64)
self.layer9 = nn.Linear(64, 32)
self.layer10 = nn.Linear(32, 16)
self.outlayer = nn.Linear(16, 3)
def forward(self, x):
x = F.elu(self.inlayer1(x))
x = F.elu(self.layer2(x))
x = F.elu(self.layer3(x))
x = F.elu(self.layer4(x))
x = F.elu(self.layer5(x))
x = F.elu(self.layer6(x))
x = F.elu(self.layer7(x))
x = F.elu(self.layer8(x))
x = F.elu(self.layer9(x))
x = F.elu(self.layer10(x))
out = self.outlayer(x)
return out
The train code
def train(net, train_features, train_labels, test_features, test_labels,
num_epochs, learning_rate, weight_decay, batch_size):
train_ls, test_ls = [], []
train_iter = d2l.load_array((train_features, train_labels), batch_size)
optimizer = torch.optim.Adam(net.parameters(),
lr = learning_rate,
weight_decay = weight_decay)
for epoch in range(num_epochs):
for X, y in train_iter:
optimizer.zero_grad()
out = net(X) ##out.shape is (100 samples, 3 labels)
loss = MSEloss(out, y)
loss.backward()
optimizer.step()
train_ls.append(MSEloss(net(train_features), train_labels).item())
if test_labels is not None:
test_ls.append(MSEloss(net(test_features), test_labels).item())
return train_ls, test_ls
after running the model, the below result is incorrect, but i don't know where is the bug? It seems that only the first col label is right. Should i change my method of calculating loss?
the below is the result.
the R2 and MSE metrics for three outputs
I tried to calculate the three outputs(out1, out2, out3) separately by change the number of output neurons to 1, then calculate the weighted loss, but it didn't work, even all three outputs are not close to the real label.

Keras Tuner on autoencoder - Add condition : first hidden layer units greater than or equal next hidden layer units

I want to use Keras-tuner to tune an autoencoder hyperparameters.
It is a symetric AE with two layers. I want the number of units in the first layer always greater than or equal the units in the second layer. But I don't know how implement it with keras-tuner. If someone can help, it would be very great. Thank you in advance.
class DAE(tf.keras.Model):
'''
A DAE model
'''
def __init__(self, hp, **kwargs):
'''
DAE instantiation
args :
hp : Tuner
input_dim : input dimension
return:
None
'''
super(DAE, self).__init__(**kwargs)
input_dim = 15
latent_dim = hp.Choice("latent_space", [2,4,8])
units_0 = hp.Choice("units_0", [8, 16, 32, 64])
units_1 = hp.Choice("units_1", [8, 16, 32, 64])
for i in [8, 16, 32, 64]:
with hp.conditional_scope("units_0", [i]):
if units_0 == i:
......? # units_1 should be <= i
dropout = hp.Choice("dropout_rate", [0.1, 0.2, 0.3, 0.4, 0.5])
inputs = tf.keras.Input(shape = (input_dim,))
x = layers.Dense(units_0, activation="relu")(inputs)
x = layers.Dropout(dropout)(x)
x = layers.Dense(units_1, activation="relu")(x)
x = layers.Dropout(dropout)(x)
z = layers.Dense(latent_dim)(x)
self.encoder = tf.keras.Model(inputs, z, name="encoder")
inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(units_1, activation="relu")(inputs)
x = layers.Dropout(dropout)(x)
x = layers.Dense(units_0, activation="relu")(x)
x = layers.Dropout(dropout)(x)
outputs = layers.Dense(input_dim, activation="linear")(x)
self.decoder = tf.keras.Model(inputs, outputs, name="decoder")```
See above my code. It's a denoising autoencoder class
I found the solution. We need to create differents units_1 for for each units_O values
class DAE(tf.keras.Model):
'''
A DAE model
'''
def __init__(self, hp, training=None, **kwargs):
'''
DAE instantiation
args :
hp : Tuner
input_dim : input dimension
return:
None
'''
super(DAE, self).__init__(**kwargs)
self.input_dim = 15
l_units = [16, 32, 64, 128]
latent_dim = hp.Choice("latent_space", [2,4,8])
units_0 = hp.Choice("units_0", l_units)
dropout_0 = hp.Choice("dropout_rate_0", [0.1, 0.2, 0.3, 0.4, 0.5])
dropout_1 = hp.Choice("dropout_rate_1", [0.1, 0.2, 0.3, 0.4, 0.5])
for i in l_units:
name = "units_1_%d" % i # generates unique name for each hp.Int object
with hp.conditional_scope("units_0", [i]):
if units_0 == i:
locals()[name] = hp.Int(name, min_value = 8, max_value = i, step = 2, sampling = "log" )
inputs = tf.keras.Input(shape = (self.input_dim,))
x = layers.Dense(units_0, activation="relu")(inputs)
x = layers.Dropout(dropout_0)(x, training=training)
x = layers.Dense(locals()[name], activation="relu")(x)
x = layers.Dropout(dropout_1)(x, training=training)
z = layers.Dense(latent_dim)(x)
self.encoder = tf.keras.Model(inputs, z, name="encoder")
inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(locals()[name], activation="relu")(inputs)
x = layers.Dropout(dropout_1)(x, training=training)
x = layers.Dense(units_0, activation="relu")(x)
x = layers.Dropout(dropout_0)(x, training=training)
outputs = layers.Dense(self.input_dim, activation="linear")(x)
self.decoder = tf.keras.Model(inputs, outputs, name="decoder")

U-Net with Pre-Trained ResNet throws dimension error must match

I have an RGB image of mask for Segmentation of dimensions 900x600 (width, height)
My U-Net code is the ff. I do not really want to resize the output too much it is fine if it is resized without losing much of the aspect ratio.
import torch
import torch.nn as nn
from torchvision import models
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True)
)
class ResNetUNet(nn.Module):
def __init__(self, n_class=1):
super().__init__()
self.base_model = models.resnet18(pretrained=True)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
self.layer0_1x1 = convrelu(64, 64, 1, 0)
self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
self.layer1_1x1 = convrelu(64, 64, 1, 0)
self.layer2 = self.base_layers[5] # size=(N, 128, x.H/8, x.W/8)
self.layer2_1x1 = convrelu(128, 128, 1, 0)
self.layer3 = self.base_layers[6] # size=(N, 256, x.H/16, x.W/16)
self.layer3_1x1 = convrelu(256, 256, 1, 0)
self.layer4 = self.base_layers[7] # size=(N, 512, x.H/32, x.W/32)
self.layer4_1x1 = convrelu(512, 512, 1, 0)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
self.conv_original_size0 = convrelu(3, 64, 3, 1)
self.conv_original_size1 = convrelu(64, 64, 3, 1)
self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
self.conv_last = nn.Conv2d(64, n_class, 1)
def forward(self, input):
x_original = self.conv_original_size0(input)
x_original = self.conv_original_size1(x_original)
layer0 = self.layer0(input)
layer1 = self.layer1(layer0)
layer2 = self.layer2(layer1)
layer3 = self.layer3(layer2)
layer4 = self.layer4(layer3)
layer4 = self.layer4_1x1(layer4)
x = self.upsample(layer4)
layer3 = self.layer3_1x1(layer3)
x = torch.cat([x, layer3], dim=1)
x = self.conv_up3(x)
x = self.upsample(x)
layer2 = self.layer2_1x1(layer2)
x = torch.cat([x, layer2], dim=1)
x = self.conv_up2(x)
x = self.upsample(x)
layer1 = self.layer1_1x1(layer1)
x = torch.cat([x, layer1], dim=1)
x = self.conv_up1(x)
x = self.upsample(x)
layer0 = self.layer0_1x1(layer0)
x = torch.cat([x, layer0], dim=1)
x = self.conv_up0(x)
x = self.upsample(x)
x = torch.cat([x, x_original], dim=1)
x = self.conv_original_size2(x)
out = self.conv_last(x)
return out
for this command
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet()
model = model.to(device)
# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 600, 900))
it throws the error:
54 x = self.upsample(layer4)
55 layer3 = self.layer3_1x1(layer3)
---> 56 x = torch.cat([x, layer3], dim=1)
57 x = self.conv_up3(x)
58
RuntimeError: Sizes of tensors must match except in dimension 3. Got 57 and 58
Not sure what to do here. Could someone help me how to solve this?
Try this. You just need to match tensor shapes before torch.cat.
import torch
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F
def match_shapes(x, y):
if x.shape[-2:] != y.shape[-2:]:
x = F.interpolate(x, y.shape[-2:], mode='nearest')
return x
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True)
)
class ResNetUNet(nn.Module):
def __init__(self, n_class=1):
super().__init__()
self.base_model = models.resnet18(pretrained=True)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
self.layer0_1x1 = convrelu(64, 64, 1, 0)
self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
self.layer1_1x1 = convrelu(64, 64, 1, 0)
self.layer2 = self.base_layers[5] # size=(N, 128, x.H/8, x.W/8)
self.layer2_1x1 = convrelu(128, 128, 1, 0)
self.layer3 = self.base_layers[6] # size=(N, 256, x.H/16, x.W/16)
self.layer3_1x1 = convrelu(256, 256, 1, 0)
self.layer4 = self.base_layers[7] # size=(N, 512, x.H/32, x.W/32)
self.layer4_1x1 = convrelu(512, 512, 1, 0)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
self.conv_original_size0 = convrelu(3, 64, 3, 1)
self.conv_original_size1 = convrelu(64, 64, 3, 1)
self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
self.conv_last = nn.Conv2d(64, n_class, 1)
def forward(self, input):
x_original = self.conv_original_size0(input)
x_original = self.conv_original_size1(x_original)
layer0 = self.layer0(input)
layer1 = self.layer1(layer0)
layer2 = self.layer2(layer1)
layer3 = self.layer3(layer2)
layer4 = self.layer4(layer3)
layer4 = self.layer4_1x1(layer4)
x = self.upsample(layer4)
layer3 = self.layer3_1x1(layer3)
x = match_shapes(x, layer3)
x = torch.cat([x, layer3], dim=1)
x = self.conv_up3(x)
x = self.upsample(x)
layer2 = self.layer2_1x1(layer2)
x = match_shapes(x, layer2)
x = torch.cat([x, layer2], dim=1)
x = self.conv_up2(x)
x = self.upsample(x)
layer1 = self.layer1_1x1(layer1)
x = match_shapes(x, layer1)
x = torch.cat([x, layer1], dim=1)
x = self.conv_up1(x)
x = self.upsample(x)
layer0 = self.layer0_1x1(layer0)
x = torch.cat([x, layer0], dim=1)
x = self.conv_up0(x)
x = self.upsample(x)
x = torch.cat([x, x_original], dim=1)
x = self.conv_original_size2(x)
out = self.conv_last(x)
return out
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet()
model = model.to(device)
# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 600, 900))

Got 512 channels instead of 64 - what should I change in my Autoencoder?

ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
def __init__(self):
super(MyNetwork, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.fc1 = nn.Linear(ndf*4*8*16,z_size)
self.fc2 = nn.Linear(ndf*4*8*16,z_size)
self.d1 = nn.Linear(z_size, ndf*4*8*8)
self.z_size = z_size
self.d_max = ndf *4
def encode(self, x):
x = self.encoder(x)
x = x.view(x.shape[0], -1)
mu = self.fc1(x)
log_var = self.fc2(x)
return mu, log_var
def decode(self,x):
x = x.view(x.shape[0], self.z_size)
x = self.d1(x)
x = x.view(x.shape[0], self.d_max, 8,8)
x = self.decoder(x)
return x
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
def forward(self, x):
mu, log_var = self.encode(x)
mu = mu.squeeze()
log_var = log_var.squeeze()
z = self.reparameterize(mu, log_var)
return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var
I have adapted code from a tutorial and I'm currently getting the error 'Given transposed=1, weight of size 64 16 3 3, expected input[16, 512, 8, 8] to have 64 channels, but got 512 channels instead' when attempting to run my auto-encoder.
Could someone please explain how I should further adapt this code, using the CIFAR10 dataset, which a batch size of 16.
Skimmed through your code and found that input and output dimension was not proper.
Assuming both the input and output array shape as 32x32x3 I formatted this code.
ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
def __init__(self):
super(MyNetwork, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.fc1 = nn.Linear(ndf*4*8*16,z_size)
self.fc2 = nn.Linear(ndf*4*8*16,z_size)
self.d1 = nn.Linear(z_size, ndf*4*8*16)
self.z_size = z_size
self.d_max = ndf *4
def encode(self, x):
x = self.encoder(x)
x = x.view(x.shape[0], -1)
mu = self.fc1(x)
log_var = self.fc2(x)
return mu, log_var
def decode(self,x):
x = x.view(x.shape[0], self.z_size)
x = self.d1(x)
x = x.view(x.shape[0], 64, 32, 32)
x = self.decoder(x)
return x
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
def forward(self, x):
mu, log_var = self.encode(x)
mu = mu.squeeze()
log_var = log_var.squeeze()
z = self.reparameterize(mu, log_var)
return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var
Hope this code works :)
The input to the decoder (x = self.decoder(x) in forward function) should have 64 channels as defined by nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1).
In order to do so, you can do the following change:
Change self.d1 = nn.Linear(z_size, ndf*4*8*8) to self.d1 = nn.Linear(z_size, ndf*4*8*16).
Change from x = x.view(x.shape[0], self.d_max, 8,8) in decoder method to x = x.view(x.shape[0], 64, 32, 32)
Use the print statements to analyze the shape of input tensor at different layers:
ndf = 128
z_size = 512
# define the model (a simple autoencoder)
class MyNetwork(nn.Module):
def __init__(self):
super(MyNetwork, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 6, kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(6,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(16,64,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.decoder = nn.Sequential(
nn.ConvTranspose2d(64,16,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(16,6,kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.ConvTranspose2d(6,3,kernel_size=3, stride=1, padding=1),
nn.ReLU(True))
self.fc1 = nn.Linear(ndf*4*8*16,z_size)
self.fc2 = nn.Linear(ndf*4*8*16,z_size)
self.d1 = nn.Linear(z_size, ndf*4*8*16)
self.z_size = z_size
self.d_max = ndf *4
def encode(self, x):
print('encoder')
print(x.shape)
x = self.encoder(x)
print(x.shape)
x = x.view(x.shape[0], -1)
print(x.shape)
mu = self.fc1(x)
print(mu.shape)
log_var = self.fc2(x)
print(log_var.shape)
return mu, log_var
def decode(self,x):
print('decoder')
print(x.shape)
x = x.view(x.shape[0], self.z_size)
print(x.shape)
x = self.d1(x)
print(x.shape)
x = x.view(x.shape[0], 64, 32, 32)
print(x.shape)
x = self.decoder(x)
print(x.shape)
return x
def reparameterize(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps.mul(std).add_(mu)
def forward(self, x):
mu, log_var = self.encode(x)
mu = mu.squeeze()
log_var = log_var.squeeze()
z = self.reparameterize(mu, log_var)
return self.decode(z.view(-1, self.z_size, 1, 1)), mu, log_var

PyTorch runtime error : invalid argument 0: Sizes of tensors must match except in dimension 1

I have a PyTorch model and I'm trying to test it by performing a forward pass. Here is the code:
class ResBlock(nn.Module):
def __init__(self, inplanes, planes, stride=1):
super(ResBlock, self).__init__()
self.conv1x1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=1, bias=False)
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
#batch normalization
self.bn1 = nn.BatchNorm2d(planes)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.stride = stride
def forward(self, x):
residual = self.conv1x1(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
#adding the skip connection
out += residual
out = self.relu(out)
return out
class ResUnet (nn.Module):
def __init__(self, in_shape, num_classes):
super(ResUnet, self).__init__()
in_channels, height, width = in_shape
#
#self.L1 = IncResBlock(in_channels,64)
self.e1 = nn.Sequential(
nn.Conv2d(in_channels, 64, kernel_size=4, stride=2,padding=1),
ResBlock(64,64))
self.e2 = nn.Sequential(
nn.LeakyReLU(0.2,),
nn.Conv2d(64, 128, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(128),
ResBlock(128,128))
#
self.e2add = nn.Sequential(
nn.Conv2d(128, 128, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(128))
#
##
self.e3 = nn.Sequential(
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(128, 128, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(128),
nn.LeakyReLU(0.2,),
nn.Conv2d(128,256, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(256),
ResBlock(256,256))
self.e4 = nn.Sequential(
nn.LeakyReLU(0.2,),
nn.Conv2d(256,512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
ResBlock(512,512))
#
self.e4add = nn.Sequential(
nn.Conv2d(512,512, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(512))
#
self.e5 = nn.Sequential(
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(512,512, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2,),
nn.Conv2d(512,512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
ResBlock(512,512))
#
#
self.e6 = nn.Sequential(
nn.LeakyReLU(0.2,),
nn.Conv2d(512,512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
ResBlock(512,512))
#
self.e6add = nn.Sequential(
nn.Conv2d(512,512, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(512))
#
self.e7 = nn.Sequential(
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(512,512, kernel_size=3, stride=1,padding=1),
nn.BatchNorm2d(512),
nn.LeakyReLU(0.2,),
nn.Conv2d(512,512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
ResBlock(512,512))
#
self.e8 = nn.Sequential(
nn.LeakyReLU(0.2,),
nn.Conv2d(512,512, kernel_size=4, stride=2,padding=1))
#nn.BatchNorm2d(512))
self.d1 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(512, 512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
nn.Dropout(p=0.5),
ResBlock(512,512))
#
self.d2 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(1024, 512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
nn.Dropout(p=0.5),
ResBlock(512,512))
#
self.d3 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(1024, 512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
nn.Dropout(p=0.5),
ResBlock(512,512))
#
self.d4 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(1024, 512, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(512),
ResBlock(512,512))
#
self.d5 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(1024, 256, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(256),
ResBlock(256,256))
#
self.d6 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(512, 128, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(128),
ResBlock(128,128))
#
self.d7 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(256, 64, kernel_size=4, stride=2,padding=1),
nn.BatchNorm2d(64),
ResBlock(64,64))
#
self.d8 = nn.Sequential(
nn.ReLU(),
nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2,padding=1))
#nn.BatchNorm2d(64),
#nn.ReLU())
self.out_l = nn.Sequential(
nn.Conv2d(64,num_classes,kernel_size=1,stride=1))
#nn.ReLU())
def forward(self, x):
#Image Encoder
#### Encoder #####
en1 = self.e1(x)
en2 = self.e2(en1)
en2add = self.e2add(en2)
en3 = self.e3(en2add)
en4 = self.e4(en3)
en4add = self.e4add(en4)
en5 = self.e5(en4add)
en6 = self.e6(en5)
en6add = self.e6add(en6)
en7 = self.e7(en6add)
en8 = self.e8(en7)
#### Decoder ####
de1_ = self.d1(en8)
de1 = torch.cat([en7,de1_],1)
de2_ = self.d2(de1)
de2 = torch.cat([en6add,de2_],1)
de3_ = self.d3(de2)
de3 = torch.cat([en5,de3_],1)
de4_ = self.d4(de3)
de4 = torch.cat([en4add,de4_],1)
de5_ = self.d5(de4)
de5 = torch.cat([en3,de5_],1)
de6_ = self.d6(de5)
de6 = torch.cat([en2add,de6_],1)
de7_ = self.d7(de6)
de7 = torch.cat([en1,de7_],1)
de8 = self.d8(de7)
out_l_mask = self.out_l(de8)
return out_l_mask
Here is how I attempt to test it:
modl = ResUnet((1,512,512), 1)
x = torch.rand(1, 1, 512, 512)
modl(x)
This works fine, as does for any size that are multiples of 64.
If I try:
modl = ResUnet((1,320,320), 1)
x = torch.rand(1, 1, 320, 320)
modl(x)
It throws an error
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-46-4ddc821c365b> in <module>
----> 1 modl(x)
~/.conda/envs/torch0.4/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
475 result = self._slow_forward(*input, **kwargs)
476 else:
--> 477 result = self.forward(*input, **kwargs)
478 for hook in self._forward_hooks.values():
479 hook_result = hook(self, input, result)
<ipython-input-36-f9eeefa3c0b8> in forward(self, x)
221 de2_ = self.d2(de1)
222 #print de2_.size()
--> 223 de2 = torch.cat([en6add,de2_],1)
224 #print de2.size()
225
RuntimeError: invalid argument 0: Sizes of tensors must match except in dimension 1. Got 5 and 4 in dimension 2 at /opt/conda/conda-bld/pytorch_1535491974311/work/aten/src/TH/generic/THTensorMath.cpp:3616
I figure the problem is caused by the input size not being a power of 2 but I am not sure how to rectify it for the given input dimenstions (320, 320).
This issue arises from mismatch in size between the variables in the downsampling (encoder) path and the upsampling (decoder) path. Your code is huge and difficult to understand, but by inserting print statements, we can check that
en6add is of size [1, 512, 5, 5]
en7 is [1, 512, 2, 2]
en8 is [1, 512, 1, 1]
then upsampling goes as powers of two: de1_ is [1, 512, 2, 2]
de1 [1, 1024, 2, 2]
de2_ [1, 512, 4, 4]
at which point you try to concatenate it with en6add, so apparently the code creating de2_ is not "upsampling enough". My strong guess is that you need to pay the attention to the output_padding parameter of nn.ConvTranspose2d and possibly set it to 1 in a couple of places. I would try and fix this error for you, but that example is so far from being minimal that I can't wrap my head around the whole of it.