How can I duplicate the Resnet50 to five branches? - deep-learning

Below you see the network structure of the ResNet50. What I want to do is duplicate the last convolution layers to five branches for some spesific task, where each branch will consist of two FC layers. How can I do that in the Pytorch, where Resnet50 is already loaded as
ResNet50 = torchvision.models.resnet50(pretrained=True)

One way to accomplish this is to index the children of the resnet model and then attatch a sequential after that pair of conv blocks. One great implementation can be found here:
You can use this same principal to replace the vgg with your resnet.Play close attention to how they slice the model and then add a linear sequential.
class BCNN(nn.Module):
def __init__(self):
super(BCNN,self).__init__()
# Load pretrained model
vgg_model = models.vgg16_bn(pretrained=True)
self.Conv1 = nn.Sequential(*list(vgg_model.features.children())[0:7])
self.Conv2 = nn.Sequential(*list(vgg_model.features.children())[7:14])
# Level-1 classifier after second conv block
self.level_one_clf = nn.Sequential(nn.Linear(128*56*56, 256),
nn.ReLU(),
nn.BatchNorm1d(256),
nn.Dropout(0.5),
nn.Linear(256, 256),
nn.BatchNorm1d(256),
nn.Dropout(0.5),
nn.Linear(256, 2))
self.Conv3 = nn.Sequential(*list(vgg_model.features.children())[14:24])
# Level-2 classifier after third conv block
self.level_two_clf = nn.Sequential(nn.Linear(256*28*28, 1024),
nn.ReLU(),
nn.BatchNorm1d(1024),
nn.Dropout(0.5),
nn.Linear(1024, 1024),
nn.BatchNorm1d(1024),
nn.Dropout(0.5),
nn.Linear(1024, 7))
self.Conv4 = nn.Sequential(*list(vgg_model.features.children())[24:34])
self.Conv5 = nn.Sequential(*list(vgg_model.features.children())[34:44])
# Level-3 classifier after fifth conv block
self.level_three_clf = nn.Sequential(nn.Linear(512*7*7, 4096),
nn.ReLU(),
nn.BatchNorm1d(4096),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.BatchNorm1d(4096),
nn.Dropout(0.5),
nn.Linear(4096, 25))
def forward(self,x):
x = self.Conv1(x)
x = self.Conv2(x)
lvl_one = x.view(x.size(0), -1)
lvl_one = self.level_one_clf(lvl_one)
x = self.Conv3(x)
lvl_two = x.view(x.size(0), -1)
lvl_two = self.level_two_clf(lvl_two)
x = self.Conv4(x)
x = self.Conv5(x)
lvl_three = x.view(x.size(0), -1)
lvl_three = self.level_three_clf(lvl_three)
return lvl_one, lvl_two, lvl_three

Related

Temporal sequence feature extraction CNN, batches with different dimensions

I am using a CNN to extract features from temporal data of different lengths. I am using pad_sequence to pad the data in a batch. However as the max length in a batch will change, the padded sequence length differs by batch. This creates errors when i flatten the data for the FCN layer (as the dimension of the flattened vector changes). I am currently handling this by using an 'adaptive avg pooling layer' in before the FCN layers. As this is a global averaging, it fixes the output dimension for the FCN. However I am not sure if this is the correct thing to do.
Code is:
##pad tensors
def pad_collate(batch):
sequences = [item[0] for item in batch]
lengths = [len(seq) for seq in sequences]
padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=0)
return padded_sequences, lengths
## Create dataloader
trainData = Sequence(root = path)
trainDataLoader = DataLoader(trainData, batch_size = BATCH_SIZE, collate_fn= pad_collate)
## CNN model
class FeatureExtractor(nn.Module):
def __init__(self, block, layers):
super(FeatureExtractor, self).__init__()
self.inplanes = 6
## 1st CONV layers
self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 3, stride = 2, padding = 4)
self.bn1 = nn.BatchNorm2d(6)
self.relu1 = nn.ReLU()
self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride = 2, padding = 1)
## residual blocks
self.layer0 = self._make_layer(block, 12, layers[0], stride = 1)
self.layer1 = self._make_layer(block, 24, layers[1], stride = 2)
self.avgpool = nn.AdaptiveAvgPool2d((5,5)) ##### MY CURRENT SOLUTION #####
self.fc = nn.Linear(600, 128)
def _make_layer(self, block, planes, blocks, stride):
downsample = None
if stride != 1 or self.inplanes != planes:
downsample = nn.Sequential(nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
nn.BatchNorm2d(planes))
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def forward(self, x):
## first conv
x = self.conv1(x)
x = self.bn1(x)
x = self.relu1(x)
x = self.maxpool1(x)
## conv blocks
x = self.layer0(x)
x = self.layer1(x)
##FCN layer
x = self.avgpool(x)
x = torch.flatten(x, 1)
output = self.fc(x)
return output
Any other comments are also welcome (i am self-taught)

Neural Network cannot overfit even one sample

I am using neural network for a regression task.
My input is an gray image whose size is 100x70x1.
The gray area has a unique value 60.
The input will go through a preprocessing layer, which multiply 1./255 on every pixel value.
My output is just three double number: [0.87077969, 0.98989031, 0.98888382]
I used ResNet152 model as shown below:
class Bottleneck(tf.keras.Model):
expansion = 4
def __init__(self, in_channels, out_channels, strides=1):
super(Bottleneck, self).__init__()
self.conv1 = tf.keras.layers.Conv2D(out_channels, 1, 1, use_bias=False)
self.bn1 = tf.keras.layers.BatchNormalization()
self.conv2 = tf.keras.layers.Conv2D(out_channels, 3, strides, padding="same", use_bias=False)
self.bn2 = tf.keras.layers.BatchNormalization()
self.conv3 = tf.keras.layers.Conv2D(out_channels*self.expansion, 1, 1, use_bias=False)
self.bn3 = tf.keras.layers.BatchNormalization()
if strides != 1 or in_channels != self.expansion * out_channels:
self.shortcut = tf.keras.Sequential([
tf.keras.layers.Conv2D(self.expansion*out_channels, kernel_size=1,
strides=strides, use_bias=False),
tf.keras.layers.BatchNormalization()]
)
else:
self.shortcut = lambda x,_: x
def call(self, x, training=False):
out = tf.nn.elu(self.bn1(self.conv1(x), training))
out = tf.nn.elu(self.bn2(self.conv2(out), training))
out = self.bn3(self.conv3(out), training)
out += self.shortcut(x, training)
return tf.nn.elu(out)
class ResNet(tf.keras.Model):
def __init__(self, block, num_blocks):
super(ResNet, self).__init__()
self.in_channels = 64
self.conv1 = tf.keras.layers.Conv2D(64, 7, 2, padding="same", use_bias=False) # 60x60
self.bn1 = tf.keras.layers.BatchNormalization()
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(3, 3), strides=2, padding='same') # 30x30
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.avg_pool2d = tf.keras.layers.GlobalAveragePooling2D()
self.flatten = tf.keras.layers.Flatten()
def _make_layer(self, block, out_channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_channels, out_channels, stride))
self.in_channels = out_channels * block.expansion
return tf.keras.Sequential(layers)
def call(self, x, training=False):
out = self.pool1(tf.nn.elu(self.bn1(self.conv1(x), training)))
out = self.layer1(out, training=training)
out = self.layer2(out, training=training)
out = self.layer3(out, training=training)
out = self.layer4(out, training=training)
# For classification
out = self.flatten(out)
# out = tf.keras.layers.Reshape((out.shape[-1],))(out)
#out = self.linear(out)
return out
def model(self):
x = tf.keras.layers.Input(shape=(100,70,1))
return tf.keras.Model(inputs=[x], outputs=self.call(x))
def ResNet152():
return ResNet(Bottleneck, [3,8,36,3])
I used elu as activation function and changed the GlobalAveragePooling layer into flatten layer at the end of ResNet.
Before output I stack two Dense layer(2048 units and 3 units) on top of the ResNet model.
For training I used adam optimizer and inital learning rate is 1e-4, which will decreasing by factor 10 when the val_loss not decreasing for 3 epoch.
The loss is just mse error.
After early stopping while learning rate is 1e-8, the mse loss is still very high:8.6225
The prediction is [2.92318237, 5.53124916, 3.00686643] which is far away from the ground truth: [0.87077969, 0.98989031, 0.98888382]
I don't know why such a deep network cannot overfit such a sample.
Is this the reason that my input image has too few information? Could someone help me?

GAN, generate regression output by the real image, not from the random noise

Is this concept possible to be implemented with the GAN algorithm?
I want the GAN to generate a regression-output(G-Value) of the shape(4,) by the real-image, not from the random noise, and discriminate G-Value with real regression-value(R-Value) of the same shape(4, ). R-Value is of the "y-train" dataset.
It means that if an image has a pattern like circular, it generally has the 4 features of position x, y, z, and alpha. I call it Real-Value(R-Value) and I want the GAN to generate fake value (G-Value) fooling the discriminator.
I have tried to implement it as below.
class UTModel:
def __init__(self):
optimizer__ = Adam(2e-4)
self.__dropout = .3
self.optimizerGenerator = Adam(1e-4)
self.optimizerDiscriminator = Adam(1e-4)
self.generator, self.discriminator = self.build()
def build(self):
# build the generator
g = Sequential()
g.add(Conv2D(512, kernel_size=3, strides=2, input_shape=(128, 128, 1), padding='same'))
g.add(BatchNormalization(momentum=0.8))
g.add(LeakyReLU(alpha=0.2))
g.add(Dropout(self.__dropout))
g.add(Conv2D(256, kernel_size=3, strides=2, padding='same'))
g.add(BatchNormalization(momentum=0.8))
g.add(LeakyReLU(alpha=0.2))
g.add(Dropout(self.__dropout))
g.add(Conv2D(128, kernel_size=3, strides=2, padding='same'))
g.add(BatchNormalization(momentum=0.8))
g.add(LeakyReLU(alpha=0.2))
g.add(Dropout(self.__dropout))
g.add(Conv2D(64, kernel_size=3, strides=1, padding='same'))
g.add(BatchNormalization(momentum=0.8))
g.add(LeakyReLU(alpha=0.2))
g.add(Dropout(self.__dropout))
g.add(Flatten())
g.add(Dense(4, activation='linear'))
# build the discriminator
d = Sequential()
d.add(Dense(128, input_shape=(4,)))
d.add(LeakyReLU(alpha=0.2))
d.add(Dropout(self.__dropout))
d.add(Dense(64))
d.add(LeakyReLU(alpha=0.2))
d.add(Dropout(self.__dropout))
d.add(Dense(64))
d.add(LeakyReLU(alpha=0.2))
d.add(Dropout(self.__dropout))
d.add(Dense(32))
d.add(LeakyReLU(alpha=0.2))
d.add(Dropout(self.__dropout))
d.add(Dense(1, activation='sigmoid'))
return g, d
def computeLosses(self, rValid, fValid):
bce = BinaryCrossentropy(from_logits=True)
# Discriminator loss
rLoss = bce(tf.ones_like(rValid), rValid)
fLoss = bce(tf.zeros_like(fValid), fValid)
dLoss = rLoss + fLoss
# Generator loss
gLoss = bce(tf.zeros_like(fValid), fValid)
return dLoss, gLoss
def train(self, images, rValues):
with tf.GradientTape() as gTape, tf.GradientTape() as dTape:
gValues = self.generator(images, training=True)
rValid = self.discriminator(rValues, training=True)
fValid = self.discriminator(gValues, training=True)
dLoss, gLoss = self.computeLosses(rValid, fValid)
dGradients = dTape.gradient(dLoss, self.discriminator.trainable_variables)
gGradients = gTape.gradient(gLoss, self.generator.trainable_variables)
self.optimizerDiscriminator.apply_gradients(zip(dGradients, self.discriminator.trainable_variables))
self.optimizerGenerator.apply_gradients(zip(gGradients, self.generator.trainable_variables))
print (dLoss, gLoss)
class UTTrainer:
def __init__(self):
self.env = 3DPatterns()
self.model = UTModel()
def start(self):
if not self.env.available:
return
batch = 32
for epoch in range(1):
# set new episod
while self.env.setEpisod():
for i in range(0, self.env.episodelen, batch):
self.model.train(self.env.episode[i:i+batch], self.env.y[i:i+batch])
But the G-Values have not generated as valid values. It converges the 1 or -1 always. The proper value should be like [-0.192798, 0.212887, -0.034519, -0.015000]. Please help me to find the right way.
Thank you.

Custom max_pool layer: ValueError: The channel dimension of the inputs should be defined. Found `None`

I am working on tensorflow2 and I am trying to implement Max unpool with indices to implement SegNet.
When I run it I get the following problem. I am defining the def MaxUnpool2D and then calling it in the model. I suppose that the problem is given by the fact that updates and mask have got shape (None, H,W,ch).
def MaxUnpooling2D(updates, mask):
size = 2
mask = tf.cast(mask, 'int32')
input_shape = tf.shape(updates, out_type='int32')
# calculation new shape
output_shape = (
input_shape[0],
input_shape[1]*size,
input_shape[2]*size,
input_shape[3])
# calculation indices for batch, height, width and feature maps
one_like_mask = tf.ones_like(mask, dtype='int32')
batch_shape = tf.concat(
[[input_shape[0]], [1], [1], [1]],
axis=0)
batch_range = tf.reshape(
tf.range(output_shape[0], dtype='int32'),
shape=batch_shape)
b = one_like_mask * batch_range
y = mask // (output_shape[2] * output_shape[3])
x = (mask // output_shape[3]) % output_shape[2]
feature_range = tf.range(output_shape[3], dtype='int32')
f = one_like_mask * feature_range
updates_size = tf.size(updates)
indices = K.transpose(K.reshape(
tf.stack([b, y, x, f]),
[4, updates_size]))
values = tf.reshape(updates, [updates_size])
return tf.scatter_nd(indices, values, output_shape)
def segnet_conv(
inputs,
kernel_size=3,
kernel_initializer='glorot_uniform',
batch_norm = False,
**kwargs):
conv1 = Conv2D(
filters=64,
kernel_size=kernel_size,
padding='same',
activation=None,
kernel_initializer=kernel_initializer,
name='conv_1'
)(inputs)
if batch_norm:
conv1 = BatchNormalization(name='bn_1')(conv1)
conv1 = LeakyReLU(alpha=0.3, name='activation_1')(conv1)
conv1 = Conv2D(
filters=64,
kernel_size=kernel_size,
padding='same',
activation=None,
kernel_initializer=kernel_initializer,
name='conv_2'
)(conv1)
if batch_norm:
conv1 = BatchNormalization(name='bn_2')(conv1)
conv1 = LeakyReLU(alpha=0.3, name='activation_2')(conv1)
pool1, mask1 = tf.nn.max_pool_with_argmax(
input=conv1,
ksize=2,
strides=2,
padding='SAME'
)
def segnet_deconv(
pool1,
mask1,
kernel_size=3,
kernel_initializer='glorot_uniform',
batch_norm = False,
**kwargs
):
dec = MaxUnpooling2D(pool5, mask5)
dec = Conv2D(
filters=512,
kernel_size=kernel_size,
padding='same',
activation=None,
kernel_initializer=kernel_initializer,
name='upconv_13'
)(dec)
def classifier(
dec,
ch_out=2,
kernel_size=3,
final_activation=None,
batch_norm = False,
**kwargs
):
dec = Conv2D(
filters=64,
kernel_size=kernel_size,
activation='relu',
padding='same',
name='dec_out1'
)(dec)
#tf.function
def segnet(
inputs,
ch_out=2,
kernel_size=3,
kernel_initializer='glorot_uniform',
final_activation=None,
batch_norm = False,
**kwargs
):
pool5, mask1, mask2, mask3, mask4, mask5 = segnet_conv(
inputs,
kernel_size=3,
kernel_initializer='glorot_uniform',
batch_norm = False
)
dec = segnet_deconv(
pool5,
mask1,
mask2,
mask3,
mask4,
mask5,
kernel_size=kernel_size,
kernel_initializer=kernel_initializer,
batch_norm = batch_norm
)
output = classifier(
dec,
ch_out=2,
kernel_size=3,
final_activation=None,
batch_norm = batch_norm
)
return output
inputs = Input(shape=(*params['image_size'], params['num_channels']), name='input')
outputs = segnet(inputs, n_labels=2, kernel=3, pool_size=(2, 2), output_mode=None)
# we define our U-Net to output logits
model = Model(inputs, outputs)
Can you please help me with this problem?
I have solved the problem. If someone will need here is the code for MaxUnpooling2D:
def MaxUnpooling2D(pool, ind, output_shape, batch_size, name=None):
"""
Unpooling layer after max_pool_with_argmax.
Args:
pool: max pooled output tensor
ind: argmax indices
ksize: ksize is the same as for the pool
Return:
unpool: unpooling tensor
:param batch_size:
"""
with tf.compat.v1.variable_scope(name):
pool_ = tf.reshape(pool, [-1])
batch_range = tf.reshape(tf.range(batch_size, dtype=ind.dtype), [tf.shape(pool)[0], 1, 1, 1])
b = tf.ones_like(ind) * batch_range
b = tf.reshape(b, [-1, 1])
ind_ = tf.reshape(ind, [-1, 1])
ind_ = tf.concat([b, ind_], 1)
ret = tf.scatter_nd(ind_, pool_, shape=[batch_size, output_shape[1] * output_shape[2] * output_shape[3]])
# the reason that we use tf.scatter_nd: if we use tf.sparse_tensor_to_dense, then the gradient is None, which will cut off the network.
# But if we use tf.scatter_nd, the gradients for all the trainable variables will be tensors, instead of None.
# The usage for tf.scatter_nd is that: create a new tensor by applying sparse UPDATES(which is the pooling value) to individual values of slices within a
# zero tensor of given shape (FLAT_OUTPUT_SHAPE) according to the indices (ind_). If we ues the orignal code, the only thing we need to change is: changeing
# from tf.sparse_tensor_to_dense(sparse_tensor) to tf.sparse_add(tf.zeros((output_sahpe)),sparse_tensor) which will give us the gradients!!!
ret = tf.reshape(ret, [tf.shape(pool)[0], output_shape[1], output_shape[2], output_shape[3]])
return ret

Combining Two CNN's

I Want to Combine Two CNN Into Just One In Keras, What I Mean Is that I Want The Neural Network To Take Two Images And Process Each One in Separate CNN, and Then Concatenate Them Together Into The Flattening Layer and Use Fully Connected Layer to Do The Last Work, Here What I Did:
# Start With First Branch ############################################################
branch_one = Sequential()
# Adding The Convolution
branch_one.add(Conv2D(32, (3,3),input_shape = (64,64,3) , activation = 'relu'))
branch_one.add(Conv2D(32, (3, 3), activation='relu'))
# Doing The Pooling Phase
branch_one.add(MaxPooling2D(pool_size=(2, 2)))
branch_one.add(Dropout(0.25))
branch_one.add(Flatten())
# Start With Second Branch ############################################################
branch_two = Sequential()
# Adding The Convolution
branch_two.add(Conv2D(32, (3,3),input_shape = (64,64,3) , activation = 'relu'))
branch_two.add(Conv2D(32, (3, 3), activation='relu'))
# Doing The Pooling Phase
branch_two.add(MaxPooling2D(pool_size=(2, 2)))
branch_two.add(Dropout(0.25))
branch_two.add(Flatten())
# Making The Combinition ##########################################################
final = Sequential()
final.add(Concatenate([branch_one, branch_two]))
final.add(Dense(units = 128, activation = "relu"))
final.add(Dense(units = 1, activation = "sigmoid"))
# Doing The Compilation
final.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
# Adding and Pushing The Images to CNN
# use ImageDataGenerator to preprocess the data
from keras.preprocessing.image import ImageDataGenerator
# augment the data that we have
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)
# prepare training data
X1 = train_datagen.flow_from_directory('./ddsm1000_resized/images/train',
target_size = (64, 64),
batch_size = 32,
class_mode = 'binary')
X2 = train_datagen.flow_from_directory('./ddsm1000_resized_canny/images/train',
target_size = (64, 64),
batch_size = 32,
class_mode = 'binary')
# prepare test data
Y1 = test_datagen.flow_from_directory('./ddsm1000_resized/images/test',
target_size = (64, 64),
batch_size = 32,
class_mode = 'binary')
Y2 = test_datagen.flow_from_directory('./ddsm1000_resized_canny/images/test',
target_size = (64, 64),
batch_size = 32,
class_mode = 'binary')
final.fit_generator([X1, X2], steps_per_epoch = (8000 / 32), epochs = 1, validation_data = [Y1,Y2], validation_steps = 2000)
Keras Telling Me
RuntimeError: You must compile your model before using it.
I Think That is The CNN Does not the shapes of input data, so what Can I Do Here ?? Thanks
Make the change as pointed below:
from keras.layers import Merge
...
...
# Making The Combinition ##########################################################
final = Sequential()
final.add(Merge([branch_one, branch_two], mode = 'concat'))
...
...