Trying to understad why my custom ResNetv50 gives worst performance that Transfer learning (without weight) performs better? - deep-learning

I am doing a deep learning project (binary classification) with a ResNet on small datasets (896 total images). I have tried several models where ResNet gives me the best performance even though the model sufferers from exploding gradients with SGD optimizers (Adam converges faster but fluctuates much more). (Code source)
But the model performs better when I try ResNetv50 using transfer learning without initializing any weights (weights = None).
To my understanding, both models should perform similarly, but due to less coding experience, I failed to understand I got a different result.
def identity_block(input_tensor, kernel_size, filters, stage, block):
filters1, filters2, filters3 = filters
bn_axis = 3
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(filters1, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2a')(input_tensor)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters2, kernel_size,
padding='same',
kernel_initializer='he_normal',
name=conv_name_base + '2b')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters3, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2c')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
x = layers.add([x, input_tensor])
x = layers.Activation('relu')(x)
return x
def conv_block(input_tensor,
kernel_size,
filters,
stage,
block,
strides=(2, 2)):
filters1, filters2, filters3 = filters
bn_axis = 3
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(filters1, (1, 1), strides=strides,
kernel_initializer='he_normal',
name=conv_name_base + '2a')(input_tensor)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters2, kernel_size, padding='same',
kernel_initializer='he_normal',
name=conv_name_base + '2b')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters3, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2c')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
shortcut = layers.Conv2D(filters3, (1, 1), strides=strides,
kernel_initializer='he_normal',
name=conv_name_base + '1')(input_tensor)
shortcut = layers.BatchNormalization(
axis=bn_axis, name=bn_name_base + '1')(shortcut)
x = layers.add([x, shortcut])
x = layers.Activation('relu')(x)
return x
def ResNet50(input_shape, classes):
bn_axis = 3
img_input = Input(input_shape)
x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
x = layers.Conv2D(64, (7, 7),
strides=(2, 2),
padding='valid',
kernel_initializer='he_normal',
name='conv1')(img_input)
x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
x = layers.Activation('relu')(x)
x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(classes, activation='sigmoid', name='fc')(x)
# Create model.
model = models.Model(inputs = img_input, outputs = x, name='resnet50')
return model
model_resnet = ResNet50(input_shape = (3, 256, 256), classes = 1)
# compile model
model_resnet.compile(loss='binary_crossentropy',
optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0001),
metrics=['accuracy', 'Recall', 'Precision'])
# make directory for logs
logdir = os.path.join('logs', model_name)
#os.mkdir(logdir)
from math import floor
N_FLODS = 5
INIT_LR = 1e-4 # 0.001
T_BS = 16
V_BS = 16
decay_rate = 0.95
decay_step = 1
# early stopping
cp = EarlyStopping(monitor ='val_loss', mode = 'min', verbose = 2, patience = PATIENCE, restore_best_weights=True)
mc = ModelCheckpoint(model_name, monitor = 'val_loss', mode = 'min', verbose = 2, save_best_only = True)
tsb = TensorBoard(log_dir=logdir)
lrs = LearningRateScheduler(lambda epoch : INIT_LR * pow(decay_rate, floor(epoch / decay_step)))
# training
start = timer()
# Fit the model
history_resnet= model_resnet.fit(train_g1,
epochs=1000,
steps_per_epoch=len(train_g),
validation_data=val_g,
validation_steps=len(val_g),
callbacks= [cp, mc, tsb])
end = timer()
elapsed = end - start
print('Total Time Elapsed: ', int(elapsed//60), ' minutes ', (round(elapsed%60)), ' seconds')

Related

training and validation losses decreasing slowly

i have implemented 2DCNN model followed by GRU layer
class CNN2D(nn.Module):
def __init__(self, img_x=88, img_y=88, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=512,num_classes=9):
super(CNN2D, self).__init__()
self.img_x = img_x
self.img_y = img_y
self.CNN_embed_dim = CNN_embed_dim
self.ch1, self.ch2, self.ch3, self.ch4 = 8, 16, 32, 64
self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)
self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)
self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)
self.conv1_outshape = conv2D_output_size((self.img_x, self.img_y), self.pd1, self.k1, self.s1) # Conv1 output shape
self.conv2_outshape = conv2D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
self.conv3_outshape = conv2D_output_size(self.conv2_outshape, self.pd3, self.k3, self.s3)
self.conv4_outshape = conv2D_output_size(self.conv3_outshape, self.pd4, self.k4, self.s4)
# fully connected layer hidden nodes
self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
self.drop_p = drop_p
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.pd1),
nn.BatchNorm2d(self.ch1, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2, padding=self.pd2),
nn.BatchNorm2d(self.ch2, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3, padding=self.pd3),
nn.BatchNorm2d(self.ch3, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4, padding=self.pd4),
nn.BatchNorm2d(self.ch4, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.drop = nn.Dropout2d(self.drop_p)
self.pool = nn.MaxPool2d(2)
#self.fc1 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.fc_hidden1) # fully connected layer, output k classes
#self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
self.fc3 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.CNN_embed_dim) # output = CNN embedding latent variables
self.num_classes = num_classes
self.gru = nn.GRU(
input_size=self.CNN_embed_dim,
hidden_size=256,
num_layers=1,
batch_first=True,(batch, time_step, input_size)
)
#self.gfc1 = nn.Linear(256, 128)
self.gfc2 = nn.Linear(256, self.num_classes)
def forward(self, x_3d):
cnn_embed_seq = []
for t in range(x_3d.size(2)):
# CNNs
x = self.conv1(x_3d[:, :, t, :, :])
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = x.reshape(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = F.relu(self.fc2(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = self.fc3(x)
cnn_embed_seq.append(x)
cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
RNN_out, _ = self.gru(cnn_embed_seq, None)
x = RNN_out[:, -1, :]
x = F.relu(x)
x = F.dropout(x, p=self.drop_p, training=self.training) NEW UPDATE
x = self.gfc2(x)
return x
inputs are videos of shape [batch,channels,frames,height,width]
i used adam optimizer with lr=1e-5 ,weight_decay=5e-5 ,amsgrad=True and cross entropy loss
training and validation losses are decreasing slowly and model is not converging
what should i change ?

Keras Tuner on autoencoder - Add condition : first hidden layer units greater than or equal next hidden layer units

I want to use Keras-tuner to tune an autoencoder hyperparameters.
It is a symetric AE with two layers. I want the number of units in the first layer always greater than or equal the units in the second layer. But I don't know how implement it with keras-tuner. If someone can help, it would be very great. Thank you in advance.
class DAE(tf.keras.Model):
'''
A DAE model
'''
def __init__(self, hp, **kwargs):
'''
DAE instantiation
args :
hp : Tuner
input_dim : input dimension
return:
None
'''
super(DAE, self).__init__(**kwargs)
input_dim = 15
latent_dim = hp.Choice("latent_space", [2,4,8])
units_0 = hp.Choice("units_0", [8, 16, 32, 64])
units_1 = hp.Choice("units_1", [8, 16, 32, 64])
for i in [8, 16, 32, 64]:
with hp.conditional_scope("units_0", [i]):
if units_0 == i:
......? # units_1 should be <= i
dropout = hp.Choice("dropout_rate", [0.1, 0.2, 0.3, 0.4, 0.5])
inputs = tf.keras.Input(shape = (input_dim,))
x = layers.Dense(units_0, activation="relu")(inputs)
x = layers.Dropout(dropout)(x)
x = layers.Dense(units_1, activation="relu")(x)
x = layers.Dropout(dropout)(x)
z = layers.Dense(latent_dim)(x)
self.encoder = tf.keras.Model(inputs, z, name="encoder")
inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(units_1, activation="relu")(inputs)
x = layers.Dropout(dropout)(x)
x = layers.Dense(units_0, activation="relu")(x)
x = layers.Dropout(dropout)(x)
outputs = layers.Dense(input_dim, activation="linear")(x)
self.decoder = tf.keras.Model(inputs, outputs, name="decoder")```
See above my code. It's a denoising autoencoder class
I found the solution. We need to create differents units_1 for for each units_O values
class DAE(tf.keras.Model):
'''
A DAE model
'''
def __init__(self, hp, training=None, **kwargs):
'''
DAE instantiation
args :
hp : Tuner
input_dim : input dimension
return:
None
'''
super(DAE, self).__init__(**kwargs)
self.input_dim = 15
l_units = [16, 32, 64, 128]
latent_dim = hp.Choice("latent_space", [2,4,8])
units_0 = hp.Choice("units_0", l_units)
dropout_0 = hp.Choice("dropout_rate_0", [0.1, 0.2, 0.3, 0.4, 0.5])
dropout_1 = hp.Choice("dropout_rate_1", [0.1, 0.2, 0.3, 0.4, 0.5])
for i in l_units:
name = "units_1_%d" % i # generates unique name for each hp.Int object
with hp.conditional_scope("units_0", [i]):
if units_0 == i:
locals()[name] = hp.Int(name, min_value = 8, max_value = i, step = 2, sampling = "log" )
inputs = tf.keras.Input(shape = (self.input_dim,))
x = layers.Dense(units_0, activation="relu")(inputs)
x = layers.Dropout(dropout_0)(x, training=training)
x = layers.Dense(locals()[name], activation="relu")(x)
x = layers.Dropout(dropout_1)(x, training=training)
z = layers.Dense(latent_dim)(x)
self.encoder = tf.keras.Model(inputs, z, name="encoder")
inputs = tf.keras.Input(shape=(latent_dim,))
x = layers.Dense(locals()[name], activation="relu")(inputs)
x = layers.Dropout(dropout_1)(x, training=training)
x = layers.Dense(units_0, activation="relu")(x)
x = layers.Dropout(dropout_0)(x, training=training)
outputs = layers.Dense(self.input_dim, activation="linear")(x)
self.decoder = tf.keras.Model(inputs, outputs, name="decoder")

validation and train metrics very low values (images and masks generator)

I have images(X_train) and masks data (y_train).
I want to train a unet network. I am currently using iou metric and the validation iou is very low and constant!
I am not sure if I can handle right the scaling preprocessing of images and masks.
I have tried either to use only rescale=1.0/255 in the generator, either to scale only X_train and X_val hence (images) values and not masks values, either scale in the unet model (s = Lambda(lambda x: x / 255.0) (inputs)) . I am not sure if that is the problem, just wondering.
here you can download X_train and y_train data
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose, \
Dropout, Input, Concatenate, Lambda
from imgaug import augmenters as iaa
from tensorflow.keras import backend as K
# gpu setup
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
X_train = np.load('./X_train.npy')
y_train = np.load('/y_train.npy')
X_train = X_train.astype('uint8')
y_train = y_train.astype('uint8')
BATCH_SIZE=8
SEED=123
VAL_SPLIT = 0.2
IMG_HEIGHT = 256
IMG_WIDTH = 256
def augment(images):
seq = iaa.Sequential([
iaa.Fliplr(0.5), # horizontal flips
iaa.Flipud(0.5), # vertical flips
iaa.Sometimes(
0.1,
iaa.GaussianBlur(sigma=(0, 0.5))
),
iaa.LinearContrast((0.75, 1.5)),
iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),
iaa.BlendAlphaSimplexNoise(
iaa.EdgeDetect(0.3),
upscale_method="linear"),
], random_order=True)
return seq.augment_image(images)
def create_gen(X,
y,
batch_size=BATCH_SIZE,
seed=SEED):
X_train, X_val, y_train, y_val = \
train_test_split(X,
y,
test_size=VAL_SPLIT)
# Image data generator
data_gen_args = dict(rescale = 1.0/255,
preprocessing_function=augment)
data_gen_args_masks = dict( preprocessing_function=augment)
X_datagen = ImageDataGenerator(**data_gen_args)
y_datagen = ImageDataGenerator(**data_gen_args_masks)
X_datagen.fit(X_train, augment=True, seed=seed)
y_datagen.fit(y_train, augment=True, seed=seed)
X_train_augmented = X_datagen.flow(X_train,
batch_size=batch_size,
shuffle=True,
seed=seed)
y_train_augmented = y_datagen.flow(y_train,
batch_size=batch_size,
shuffle=True,
seed=seed)
# Validation data generator
data_gen_args_val = dict(rescale = 1.0/255)
X_datagen_val = ImageDataGenerator(**data_gen_args_val)
y_datagen_val = ImageDataGenerator()
X_datagen_val.fit(X_val, augment=True, seed=seed)
y_datagen_val.fit(y_val, augment=True, seed=seed)
X_val_after = X_datagen_val.flow(X_val,
batch_size=batch_size,
shuffle=False)
y_val_after = y_datagen_val.flow(y_val,
batch_size=batch_size,
shuffle=False)
train_generator = zip(X_train_augmented, y_train_augmented)
val_generator = zip(X_val_after, y_val_after)
steps_per_epoch = X_train_augmented.n // X_train_augmented.batch_size
validation_steps = X_val_after.n // X_val_after.batch_size
return train_generator, val_generator, steps_per_epoch, validation_steps
train_generator, val_generator, steps_per_epoch, validation_steps = \
create_gen(X_train,
y_train,
batch_size=BATCH_SIZE)
# Build U-Net model
inputs = Input((IMG_HEIGHT, IMG_WIDTH, 3))
#s = Lambda(lambda x: x / 255) (inputs) # rescale inputs
c1 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (inputs)
c1 = Dropout(0.1) (c1)
c1 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c1)
p1 = MaxPooling2D((2, 2)) (c1)
c2 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p1)
c2 = Dropout(0.1) (c2)
c2 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c2)
p2 = MaxPooling2D((2, 2)) (c2)
c3 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p2)
c3 = Dropout(0.2) (c3)
c3 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c3)
p3 = MaxPooling2D((2, 2)) (c3)
c4 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p3)
c4 = Dropout(0.2) (c4)
c4 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c4)
p4 = MaxPooling2D(pool_size=(2, 2)) (c4)
c5 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p4)
c5 = Dropout(0.3) (c5)
c5 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c5)
u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same') (c5)
u6 = Concatenate()([u6, c4])
c6 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u6)
c6 = Dropout(0.2) (c6)
c6 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c6)
u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c6)
u7 = Concatenate()([u7, c3])
c7 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u7)
c7 = Dropout(0.2) (c7)
c7 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c7)
u8 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c7)
u8 = Concatenate()([u8, c2])
c8 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u8)
c8 = Dropout(0.1) (c8)
c8 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c8)
u9 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c8)
u9 = Concatenate()([u9, c1])
c9 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u9)
c9 = Dropout(0.1) (c9)
c9 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c9)
outputs = Conv2D(1, (1, 1), activation='sigmoid') (c9)
model = Model(inputs=[inputs], outputs=[outputs])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[iouMetric])
EPOCHS = 40
model.fit( train_generator,
validation_data=val_generator,
batch_size=BATCH_SIZE,
steps_per_epoch= steps_per_epoch,
validation_steps=validation_steps,
epochs=EPOCHS)
code for ioumetric:
def castF(x):
return K.cast(x, K.floatx())
def castB(x):
return K.cast(x, bool)
def iou_loss_core(true,pred): #this can be used as a loss if you make it negative
intersection = true * pred
notTrue = 1 - true
union = true + (notTrue * pred)
return (K.sum(intersection, axis=-1) + K.epsilon()) / (K.sum(union, axis=-1) + K.epsilon())
def iouMetric(true, pred):
tresholds = [0.5 + (i * 0.05) for i in range(5)]
#flattened images (batch, pixels)
true = K.batch_flatten(true)
pred = K.batch_flatten(pred)
pred = castF(K.greater(pred, 0.5))
#total white pixels - (batch,)
trueSum = K.sum(true, axis=-1)
predSum = K.sum(pred, axis=-1)
#has mask or not per image - (batch,)
true1 = castF(K.greater(trueSum, 1))
pred1 = castF(K.greater(predSum, 1))
#to get images that have mask in both true and pred
truePositiveMask = castB(true1 * pred1)
#separating only the possible true positives to check iou
testTrue = tf.boolean_mask(true, truePositiveMask)
testPred = tf.boolean_mask(pred, truePositiveMask)
#getting iou and threshold comparisons
iou = iou_loss_core(testTrue,testPred)
truePositives = [castF(K.greater(iou, tres)) for tres in tresholds]
#mean of thressholds for true positives and total sum
truePositives = K.mean(K.stack(truePositives, axis=-1), axis=-1)
truePositives = K.sum(truePositives)
#to get images that don't have mask in both true and pred
trueNegatives = (1-true1) * (1 - pred1) # = 1 -true1 - pred1 + true1*pred1
trueNegatives = K.sum(trueNegatives)
return (truePositives + trueNegatives) / castF(K.shape(true)[0])
I tried other metrics as well, dice loss is also constant and very low. Accuracy is around 79 and constant.
The problem is with the pre-processing. According to tf.keras.preprocessing.image.ImageDataGenerator documentation:
preprocessing_function: function that will be applied on each input. The function will run after the image is resized and augmented. The function should take one argument: one image (Numpy tensor with rank 3), and should output a Numpy tensor with the same shape.
So, this function will run additionally after the augmentation you mentioned inside ImageDataGenerator. But the problem is that you already have scaled the images with 1.0 / 255, so, the augment() function is getting a scaled image. But according to the documentation of imgaug, it wants an un-scaled image (see the comment inside the example):
'images' should be either a 4D numpy array of shape (N, height, width, channels)
or a list of 3D numpy arrays, each having shape (height, width, channels).
Grayscale images must have shape (height, width, 1) each.
All images must have numpy's dtype uint8. Values are expected to be in
range 0-255.
Edit:
In the output you are using sigmoid activation function. Which will force the output to be always within [0, 1]. But you are not scaling the mask, that means the mask will be within [0, 255]. So, for obvious reason, the model will never be able output these large values, as it is restricted to be within [0, 1]. So, remove the sigmoid from the last layer and see what happens.

U-Net with Pre-Trained ResNet throws dimension error must match

I have an RGB image of mask for Segmentation of dimensions 900x600 (width, height)
My U-Net code is the ff. I do not really want to resize the output too much it is fine if it is resized without losing much of the aspect ratio.
import torch
import torch.nn as nn
from torchvision import models
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True)
)
class ResNetUNet(nn.Module):
def __init__(self, n_class=1):
super().__init__()
self.base_model = models.resnet18(pretrained=True)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
self.layer0_1x1 = convrelu(64, 64, 1, 0)
self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
self.layer1_1x1 = convrelu(64, 64, 1, 0)
self.layer2 = self.base_layers[5] # size=(N, 128, x.H/8, x.W/8)
self.layer2_1x1 = convrelu(128, 128, 1, 0)
self.layer3 = self.base_layers[6] # size=(N, 256, x.H/16, x.W/16)
self.layer3_1x1 = convrelu(256, 256, 1, 0)
self.layer4 = self.base_layers[7] # size=(N, 512, x.H/32, x.W/32)
self.layer4_1x1 = convrelu(512, 512, 1, 0)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
self.conv_original_size0 = convrelu(3, 64, 3, 1)
self.conv_original_size1 = convrelu(64, 64, 3, 1)
self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
self.conv_last = nn.Conv2d(64, n_class, 1)
def forward(self, input):
x_original = self.conv_original_size0(input)
x_original = self.conv_original_size1(x_original)
layer0 = self.layer0(input)
layer1 = self.layer1(layer0)
layer2 = self.layer2(layer1)
layer3 = self.layer3(layer2)
layer4 = self.layer4(layer3)
layer4 = self.layer4_1x1(layer4)
x = self.upsample(layer4)
layer3 = self.layer3_1x1(layer3)
x = torch.cat([x, layer3], dim=1)
x = self.conv_up3(x)
x = self.upsample(x)
layer2 = self.layer2_1x1(layer2)
x = torch.cat([x, layer2], dim=1)
x = self.conv_up2(x)
x = self.upsample(x)
layer1 = self.layer1_1x1(layer1)
x = torch.cat([x, layer1], dim=1)
x = self.conv_up1(x)
x = self.upsample(x)
layer0 = self.layer0_1x1(layer0)
x = torch.cat([x, layer0], dim=1)
x = self.conv_up0(x)
x = self.upsample(x)
x = torch.cat([x, x_original], dim=1)
x = self.conv_original_size2(x)
out = self.conv_last(x)
return out
for this command
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet()
model = model.to(device)
# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 600, 900))
it throws the error:
54 x = self.upsample(layer4)
55 layer3 = self.layer3_1x1(layer3)
---> 56 x = torch.cat([x, layer3], dim=1)
57 x = self.conv_up3(x)
58
RuntimeError: Sizes of tensors must match except in dimension 3. Got 57 and 58
Not sure what to do here. Could someone help me how to solve this?
Try this. You just need to match tensor shapes before torch.cat.
import torch
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F
def match_shapes(x, y):
if x.shape[-2:] != y.shape[-2:]:
x = F.interpolate(x, y.shape[-2:], mode='nearest')
return x
def convrelu(in_channels, out_channels, kernel, padding):
return nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
nn.ReLU(inplace=True)
)
class ResNetUNet(nn.Module):
def __init__(self, n_class=1):
super().__init__()
self.base_model = models.resnet18(pretrained=True)
self.base_layers = list(self.base_model.children())
self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
self.layer0_1x1 = convrelu(64, 64, 1, 0)
self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
self.layer1_1x1 = convrelu(64, 64, 1, 0)
self.layer2 = self.base_layers[5] # size=(N, 128, x.H/8, x.W/8)
self.layer2_1x1 = convrelu(128, 128, 1, 0)
self.layer3 = self.base_layers[6] # size=(N, 256, x.H/16, x.W/16)
self.layer3_1x1 = convrelu(256, 256, 1, 0)
self.layer4 = self.base_layers[7] # size=(N, 512, x.H/32, x.W/32)
self.layer4_1x1 = convrelu(512, 512, 1, 0)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
self.conv_up0 = convrelu(64 + 256, 128, 3, 1)
self.conv_original_size0 = convrelu(3, 64, 3, 1)
self.conv_original_size1 = convrelu(64, 64, 3, 1)
self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)
self.conv_last = nn.Conv2d(64, n_class, 1)
def forward(self, input):
x_original = self.conv_original_size0(input)
x_original = self.conv_original_size1(x_original)
layer0 = self.layer0(input)
layer1 = self.layer1(layer0)
layer2 = self.layer2(layer1)
layer3 = self.layer3(layer2)
layer4 = self.layer4(layer3)
layer4 = self.layer4_1x1(layer4)
x = self.upsample(layer4)
layer3 = self.layer3_1x1(layer3)
x = match_shapes(x, layer3)
x = torch.cat([x, layer3], dim=1)
x = self.conv_up3(x)
x = self.upsample(x)
layer2 = self.layer2_1x1(layer2)
x = match_shapes(x, layer2)
x = torch.cat([x, layer2], dim=1)
x = self.conv_up2(x)
x = self.upsample(x)
layer1 = self.layer1_1x1(layer1)
x = match_shapes(x, layer1)
x = torch.cat([x, layer1], dim=1)
x = self.conv_up1(x)
x = self.upsample(x)
layer0 = self.layer0_1x1(layer0)
x = torch.cat([x, layer0], dim=1)
x = self.conv_up0(x)
x = self.upsample(x)
x = torch.cat([x, x_original], dim=1)
x = self.conv_original_size2(x)
out = self.conv_last(x)
return out
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet()
model = model.to(device)
# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 600, 900))

How do I use Keras when the number of training data and test data is different?

The following code creates a model that predicts a specific value using multiple variables. My data compose 11947 by 9 in matrix form and I have created data to use 9000 for training and 2947 for test.
import numpy as np
from keras.models import Model
from keras.layers import Dense, Input, concatenate, Conv1D
batchSize=1000
def get_model(rows, cols):
inputs1 = Input(shape=(1, rows))
inputs2 = Input(shape=(1, rows))
inputs3 = Input(shape=(1, rows))
inputs4 = Input(shape=(1, rows))
inputs5 = Input(shape=(1, rows))
inputs6 = Input(shape=(1, rows))
inputs7 = Input(shape=(1, rows))
conv1 = Conv1D(1024, 1, activation='relu', padding='same')(inputs1)
conv1 = Conv1D(512, 1, activation='relu', padding='same' )(conv1)
conv2 = Conv1D(1024, 1, activation='relu', padding='same')(inputs2)
conv2 = Conv1D(512, 1, activation='relu', padding='same')(conv2)
conv3 = Conv1D(1024, 1, activation='relu', padding='same')(inputs3)
conv3 = Conv1D(512, 1, activation='relu', padding='same')(conv3)
conv4 = Conv1D(1024, 1, activation='relu', padding='same')(inputs4)
conv4 = Conv1D(512, 1, activation='relu', padding='same')(conv4)
conv5 = Conv1D(1024, 1, activation='relu', padding='same')(inputs5)
conv5 = Conv1D(512, 1, activation='relu', padding='same')(conv5)
conv6 = Conv1D(1024, 1, activation='relu', padding='same')(inputs6)
conv6 = Conv1D(512, 1, activation='relu', padding='same')(conv6)
conv7 = Conv1D(1024, 1, activation='relu', padding='same')(inputs7)
conv7 = Conv1D(512, 1, activation='relu', padding='same')(conv7)
convConcat = concatenate([conv1, conv2, conv3, conv4, conv5, conv6, conv7])
convOut = Dense(rows, activation='relu')(convConcat)
model = Model(inputs=[inputs1, inputs2, inputs3, inputs4, inputs5, inputs6, inputs7], outputs=[convOut])
model.compile(optimizer='Adam', loss='mean_squared_error', metrics=['accuracy'])
return model
def train_and_predict():
dataT = data_train.transpose()
dataT_O = dataT[0].reshape((1, 1, -1))
dataT_1 = dataT[2].reshape((1, 1, -1))
dataT_2 = dataT[3].reshape((1, 1, -1))
dataT_3 = dataT[4].reshape((1, 1, -1))
dataT_4 = dataT[5].reshape((1, 1, -1))
dataT_5 = dataT[6].reshape((1, 1, -1))
dataT_6 = dataT[7].reshape((1, 1, -1))
dataT_7 = dataT[8].reshape((1, 1, -1))
model.fit([dataT_1, dataT_2, dataT_3, dataT_4, dataT_5, dataT_6, dataT_7], dataT_O, epochs=100, batch_size=batchSize)
dataT = data_test.transpose()
dataT_1 = dataT[2].reshape((1, 1, -1))
dataT_2 = dataT[3].reshape((1, 1, -1))
dataT_3 = dataT[4].reshape((1, 1, -1))
dataT_4 = dataT[5].reshape((1, 1, -1))
dataT_5 = dataT[6].reshape((1, 1, -1))
dataT_6 = dataT[7].reshape((1, 1, -1))
dataT_7 = dataT[8].reshape((1, 1, -1))
model.predict([dataT_1, dataT_2, dataT_3, dataT_4, dataT_5, dataT_6, dataT_7])
print('Done!')
if __name__ == '__main__':
data_train = np.genfromtxt('./trainingLossdata_train.txt')
data_test = np.genfromtxt('./trainingLossdata_test.txt')
model = get_model(data_train.shape[0], data_train.shape[1])
train_and_predict()
However, an error occured due to the difference between number of training data and test data. Exactly the following error message is displayed.
"ValueError: Error when checking: expected input_1 to have shape (None, 1, 9000) but got array with shape (1, 1, 2947)"
How can I solve this problem? In fact, I am a beginner of using Keras to do deep learning programming. Please give me some advices.
It seems to me that your input creation is wrong. It has nothing to do with different number of training and test sample (actually that happens all the time). Now, you said
My data compose 11947 by 9
This means you have 11947 samples each having 9 features. So the ideal shape for input should be None,9 or None,1,9. This None actually stands for the size of input.