validation and train metrics very low values (images and masks generator) - deep-learning

I have images(X_train) and masks data (y_train).
I want to train a unet network. I am currently using iou metric and the validation iou is very low and constant!
I am not sure if I can handle right the scaling preprocessing of images and masks.
I have tried either to use only rescale=1.0/255 in the generator, either to scale only X_train and X_val hence (images) values and not masks values, either scale in the unet model (s = Lambda(lambda x: x / 255.0) (inputs)) . I am not sure if that is the problem, just wondering.
here you can download X_train and y_train data
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Conv2DTranspose, \
Dropout, Input, Concatenate, Lambda
from imgaug import augmenters as iaa
from tensorflow.keras import backend as K
# gpu setup
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
X_train = np.load('./X_train.npy')
y_train = np.load('/y_train.npy')
X_train = X_train.astype('uint8')
y_train = y_train.astype('uint8')
BATCH_SIZE=8
SEED=123
VAL_SPLIT = 0.2
IMG_HEIGHT = 256
IMG_WIDTH = 256
def augment(images):
seq = iaa.Sequential([
iaa.Fliplr(0.5), # horizontal flips
iaa.Flipud(0.5), # vertical flips
iaa.Sometimes(
0.1,
iaa.GaussianBlur(sigma=(0, 0.5))
),
iaa.LinearContrast((0.75, 1.5)),
iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),
iaa.BlendAlphaSimplexNoise(
iaa.EdgeDetect(0.3),
upscale_method="linear"),
], random_order=True)
return seq.augment_image(images)
def create_gen(X,
y,
batch_size=BATCH_SIZE,
seed=SEED):
X_train, X_val, y_train, y_val = \
train_test_split(X,
y,
test_size=VAL_SPLIT)
# Image data generator
data_gen_args = dict(rescale = 1.0/255,
preprocessing_function=augment)
data_gen_args_masks = dict( preprocessing_function=augment)
X_datagen = ImageDataGenerator(**data_gen_args)
y_datagen = ImageDataGenerator(**data_gen_args_masks)
X_datagen.fit(X_train, augment=True, seed=seed)
y_datagen.fit(y_train, augment=True, seed=seed)
X_train_augmented = X_datagen.flow(X_train,
batch_size=batch_size,
shuffle=True,
seed=seed)
y_train_augmented = y_datagen.flow(y_train,
batch_size=batch_size,
shuffle=True,
seed=seed)
# Validation data generator
data_gen_args_val = dict(rescale = 1.0/255)
X_datagen_val = ImageDataGenerator(**data_gen_args_val)
y_datagen_val = ImageDataGenerator()
X_datagen_val.fit(X_val, augment=True, seed=seed)
y_datagen_val.fit(y_val, augment=True, seed=seed)
X_val_after = X_datagen_val.flow(X_val,
batch_size=batch_size,
shuffle=False)
y_val_after = y_datagen_val.flow(y_val,
batch_size=batch_size,
shuffle=False)
train_generator = zip(X_train_augmented, y_train_augmented)
val_generator = zip(X_val_after, y_val_after)
steps_per_epoch = X_train_augmented.n // X_train_augmented.batch_size
validation_steps = X_val_after.n // X_val_after.batch_size
return train_generator, val_generator, steps_per_epoch, validation_steps
train_generator, val_generator, steps_per_epoch, validation_steps = \
create_gen(X_train,
y_train,
batch_size=BATCH_SIZE)
# Build U-Net model
inputs = Input((IMG_HEIGHT, IMG_WIDTH, 3))
#s = Lambda(lambda x: x / 255) (inputs) # rescale inputs
c1 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (inputs)
c1 = Dropout(0.1) (c1)
c1 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c1)
p1 = MaxPooling2D((2, 2)) (c1)
c2 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p1)
c2 = Dropout(0.1) (c2)
c2 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c2)
p2 = MaxPooling2D((2, 2)) (c2)
c3 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p2)
c3 = Dropout(0.2) (c3)
c3 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c3)
p3 = MaxPooling2D((2, 2)) (c3)
c4 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p3)
c4 = Dropout(0.2) (c4)
c4 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c4)
p4 = MaxPooling2D(pool_size=(2, 2)) (c4)
c5 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (p4)
c5 = Dropout(0.3) (c5)
c5 = Conv2D(256, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c5)
u6 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same') (c5)
u6 = Concatenate()([u6, c4])
c6 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u6)
c6 = Dropout(0.2) (c6)
c6 = Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c6)
u7 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same') (c6)
u7 = Concatenate()([u7, c3])
c7 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u7)
c7 = Dropout(0.2) (c7)
c7 = Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c7)
u8 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same') (c7)
u8 = Concatenate()([u8, c2])
c8 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u8)
c8 = Dropout(0.1) (c8)
c8 = Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c8)
u9 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same') (c8)
u9 = Concatenate()([u9, c1])
c9 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (u9)
c9 = Dropout(0.1) (c9)
c9 = Conv2D(16, (3, 3), activation='relu', kernel_initializer='he_normal', padding='same') (c9)
outputs = Conv2D(1, (1, 1), activation='sigmoid') (c9)
model = Model(inputs=[inputs], outputs=[outputs])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[iouMetric])
EPOCHS = 40
model.fit( train_generator,
validation_data=val_generator,
batch_size=BATCH_SIZE,
steps_per_epoch= steps_per_epoch,
validation_steps=validation_steps,
epochs=EPOCHS)
code for ioumetric:
def castF(x):
return K.cast(x, K.floatx())
def castB(x):
return K.cast(x, bool)
def iou_loss_core(true,pred): #this can be used as a loss if you make it negative
intersection = true * pred
notTrue = 1 - true
union = true + (notTrue * pred)
return (K.sum(intersection, axis=-1) + K.epsilon()) / (K.sum(union, axis=-1) + K.epsilon())
def iouMetric(true, pred):
tresholds = [0.5 + (i * 0.05) for i in range(5)]
#flattened images (batch, pixels)
true = K.batch_flatten(true)
pred = K.batch_flatten(pred)
pred = castF(K.greater(pred, 0.5))
#total white pixels - (batch,)
trueSum = K.sum(true, axis=-1)
predSum = K.sum(pred, axis=-1)
#has mask or not per image - (batch,)
true1 = castF(K.greater(trueSum, 1))
pred1 = castF(K.greater(predSum, 1))
#to get images that have mask in both true and pred
truePositiveMask = castB(true1 * pred1)
#separating only the possible true positives to check iou
testTrue = tf.boolean_mask(true, truePositiveMask)
testPred = tf.boolean_mask(pred, truePositiveMask)
#getting iou and threshold comparisons
iou = iou_loss_core(testTrue,testPred)
truePositives = [castF(K.greater(iou, tres)) for tres in tresholds]
#mean of thressholds for true positives and total sum
truePositives = K.mean(K.stack(truePositives, axis=-1), axis=-1)
truePositives = K.sum(truePositives)
#to get images that don't have mask in both true and pred
trueNegatives = (1-true1) * (1 - pred1) # = 1 -true1 - pred1 + true1*pred1
trueNegatives = K.sum(trueNegatives)
return (truePositives + trueNegatives) / castF(K.shape(true)[0])
I tried other metrics as well, dice loss is also constant and very low. Accuracy is around 79 and constant.

The problem is with the pre-processing. According to tf.keras.preprocessing.image.ImageDataGenerator documentation:
preprocessing_function: function that will be applied on each input. The function will run after the image is resized and augmented. The function should take one argument: one image (Numpy tensor with rank 3), and should output a Numpy tensor with the same shape.
So, this function will run additionally after the augmentation you mentioned inside ImageDataGenerator. But the problem is that you already have scaled the images with 1.0 / 255, so, the augment() function is getting a scaled image. But according to the documentation of imgaug, it wants an un-scaled image (see the comment inside the example):
'images' should be either a 4D numpy array of shape (N, height, width, channels)
or a list of 3D numpy arrays, each having shape (height, width, channels).
Grayscale images must have shape (height, width, 1) each.
All images must have numpy's dtype uint8. Values are expected to be in
range 0-255.
Edit:
In the output you are using sigmoid activation function. Which will force the output to be always within [0, 1]. But you are not scaling the mask, that means the mask will be within [0, 255]. So, for obvious reason, the model will never be able output these large values, as it is restricted to be within [0, 1]. So, remove the sigmoid from the last layer and see what happens.

Related

training and validation losses decreasing slowly

i have implemented 2DCNN model followed by GRU layer
class CNN2D(nn.Module):
def __init__(self, img_x=88, img_y=88, fc_hidden1=512, fc_hidden2=512, drop_p=0.3, CNN_embed_dim=512,num_classes=9):
super(CNN2D, self).__init__()
self.img_x = img_x
self.img_y = img_y
self.CNN_embed_dim = CNN_embed_dim
self.ch1, self.ch2, self.ch3, self.ch4 = 8, 16, 32, 64
self.k1, self.k2, self.k3, self.k4 = (5, 5), (3, 3), (3, 3), (3, 3)
self.s1, self.s2, self.s3, self.s4 = (2, 2), (2, 2), (2, 2), (2, 2)
self.pd1, self.pd2, self.pd3, self.pd4 = (0, 0), (0, 0), (0, 0), (0, 0)
self.conv1_outshape = conv2D_output_size((self.img_x, self.img_y), self.pd1, self.k1, self.s1) # Conv1 output shape
self.conv2_outshape = conv2D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
self.conv3_outshape = conv2D_output_size(self.conv2_outshape, self.pd3, self.k3, self.s3)
self.conv4_outshape = conv2D_output_size(self.conv3_outshape, self.pd4, self.k4, self.s4)
# fully connected layer hidden nodes
self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
self.drop_p = drop_p
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1, padding=self.pd1),
nn.BatchNorm2d(self.ch1, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2, padding=self.pd2),
nn.BatchNorm2d(self.ch2, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv3 = nn.Sequential(
nn.Conv2d(in_channels=self.ch2, out_channels=self.ch3, kernel_size=self.k3, stride=self.s3, padding=self.pd3),
nn.BatchNorm2d(self.ch3, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.conv4 = nn.Sequential(
nn.Conv2d(in_channels=self.ch3, out_channels=self.ch4, kernel_size=self.k4, stride=self.s4, padding=self.pd4),
nn.BatchNorm2d(self.ch4, momentum=0.01),
nn.ReLU(inplace=True),
# nn.MaxPool2d(kernel_size=2),
)
self.drop = nn.Dropout2d(self.drop_p)
self.pool = nn.MaxPool2d(2)
#self.fc1 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.fc_hidden1) # fully connected layer, output k classes
#self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
self.fc3 = nn.Linear(self.ch4 * self.conv4_outshape[0] * self.conv4_outshape[1], self.CNN_embed_dim) # output = CNN embedding latent variables
self.num_classes = num_classes
self.gru = nn.GRU(
input_size=self.CNN_embed_dim,
hidden_size=256,
num_layers=1,
batch_first=True,(batch, time_step, input_size)
)
#self.gfc1 = nn.Linear(256, 128)
self.gfc2 = nn.Linear(256, self.num_classes)
def forward(self, x_3d):
cnn_embed_seq = []
for t in range(x_3d.size(2)):
# CNNs
x = self.conv1(x_3d[:, :, t, :, :])
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = x.reshape(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = F.relu(self.fc2(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = self.fc3(x)
cnn_embed_seq.append(x)
cnn_embed_seq = torch.stack(cnn_embed_seq, dim=0).transpose_(0, 1)
RNN_out, _ = self.gru(cnn_embed_seq, None)
x = RNN_out[:, -1, :]
x = F.relu(x)
x = F.dropout(x, p=self.drop_p, training=self.training) NEW UPDATE
x = self.gfc2(x)
return x
inputs are videos of shape [batch,channels,frames,height,width]
i used adam optimizer with lr=1e-5 ,weight_decay=5e-5 ,amsgrad=True and cross entropy loss
training and validation losses are decreasing slowly and model is not converging
what should i change ?

Trying to understad why my custom ResNetv50 gives worst performance that Transfer learning (without weight) performs better?

I am doing a deep learning project (binary classification) with a ResNet on small datasets (896 total images). I have tried several models where ResNet gives me the best performance even though the model sufferers from exploding gradients with SGD optimizers (Adam converges faster but fluctuates much more). (Code source)
But the model performs better when I try ResNetv50 using transfer learning without initializing any weights (weights = None).
To my understanding, both models should perform similarly, but due to less coding experience, I failed to understand I got a different result.
def identity_block(input_tensor, kernel_size, filters, stage, block):
filters1, filters2, filters3 = filters
bn_axis = 3
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(filters1, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2a')(input_tensor)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters2, kernel_size,
padding='same',
kernel_initializer='he_normal',
name=conv_name_base + '2b')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters3, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2c')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
x = layers.add([x, input_tensor])
x = layers.Activation('relu')(x)
return x
def conv_block(input_tensor,
kernel_size,
filters,
stage,
block,
strides=(2, 2)):
filters1, filters2, filters3 = filters
bn_axis = 3
conv_name_base = 'res' + str(stage) + block + '_branch'
bn_name_base = 'bn' + str(stage) + block + '_branch'
x = layers.Conv2D(filters1, (1, 1), strides=strides,
kernel_initializer='he_normal',
name=conv_name_base + '2a')(input_tensor)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters2, kernel_size, padding='same',
kernel_initializer='he_normal',
name=conv_name_base + '2b')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
x = layers.Activation('relu')(x)
x = layers.Conv2D(filters3, (1, 1),
kernel_initializer='he_normal',
name=conv_name_base + '2c')(x)
x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
shortcut = layers.Conv2D(filters3, (1, 1), strides=strides,
kernel_initializer='he_normal',
name=conv_name_base + '1')(input_tensor)
shortcut = layers.BatchNormalization(
axis=bn_axis, name=bn_name_base + '1')(shortcut)
x = layers.add([x, shortcut])
x = layers.Activation('relu')(x)
return x
def ResNet50(input_shape, classes):
bn_axis = 3
img_input = Input(input_shape)
x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
x = layers.Conv2D(64, (7, 7),
strides=(2, 2),
padding='valid',
kernel_initializer='he_normal',
name='conv1')(img_input)
x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
x = layers.Activation('relu')(x)
x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = layers.Dense(classes, activation='sigmoid', name='fc')(x)
# Create model.
model = models.Model(inputs = img_input, outputs = x, name='resnet50')
return model
model_resnet = ResNet50(input_shape = (3, 256, 256), classes = 1)
# compile model
model_resnet.compile(loss='binary_crossentropy',
optimizer=tf.keras.optimizers.Adam(learning_rate = 0.0001),
metrics=['accuracy', 'Recall', 'Precision'])
# make directory for logs
logdir = os.path.join('logs', model_name)
#os.mkdir(logdir)
from math import floor
N_FLODS = 5
INIT_LR = 1e-4 # 0.001
T_BS = 16
V_BS = 16
decay_rate = 0.95
decay_step = 1
# early stopping
cp = EarlyStopping(monitor ='val_loss', mode = 'min', verbose = 2, patience = PATIENCE, restore_best_weights=True)
mc = ModelCheckpoint(model_name, monitor = 'val_loss', mode = 'min', verbose = 2, save_best_only = True)
tsb = TensorBoard(log_dir=logdir)
lrs = LearningRateScheduler(lambda epoch : INIT_LR * pow(decay_rate, floor(epoch / decay_step)))
# training
start = timer()
# Fit the model
history_resnet= model_resnet.fit(train_g1,
epochs=1000,
steps_per_epoch=len(train_g),
validation_data=val_g,
validation_steps=len(val_g),
callbacks= [cp, mc, tsb])
end = timer()
elapsed = end - start
print('Total Time Elapsed: ', int(elapsed//60), ' minutes ', (round(elapsed%60)), ' seconds')

Pytorch network parameter calculation

Can someone tell me please about how the network parameter (10) is calculated? Thanks in advance.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(x.size()[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
print(net)
print(len(list(net.parameters())))
Output:
Net(
(conv1): Conv2d (1, 6, kernel_size=(5, 5), stride=(1, 1))
(conv2): Conv2d (6, 16, kernel_size=(5, 5), stride=(1, 1))
(fc1): Linear(in_features=400, out_features=120)
(fc2): Linear(in_features=120, out_features=84)
(fc3): Linear(in_features=84, out_features=10)
)
10
Best,
Zack
Most layer modules in PyTorch (e.g. Linear, Conv2d, etc.) group parameters into specific categories, such as weights and biases. Each of the five layer instances in your network has a "weight" and a "bias" parameter. This is why "10" is printed.
Of course, all of these "weight" and "bias" fields contain many parameters. For example, your first fully connected layer self.fc1 contains 16 * 5 * 5 * 120 = 48000 parameters. So len(params) doesn't tell you the number of parameters in the network--it gives you just the total number of "groupings" of parameters in the network.
Since Bill already answered why "10" is printed, I am just sharing a code snippet which you can use to find out the number of parameters associated with each layer in your network.
def count_parameters(model):
total_param = 0
for name, param in model.named_parameters():
if param.requires_grad:
num_param = numpy.prod(param.size())
if param.dim() > 1:
print(name, ':', 'x'.join(str(x) for x in list(param.size())), '=', num_param)
else:
print(name, ':', num_param)
total_param += num_param
return total_param
Use the above function as follows.
print('number of trainable parameters =', count_parameters(net))
Output:
conv1.weight : 6x1x5x5 = 150
conv1.bias : 6
conv2.weight : 16x6x5x5 = 2400
conv2.bias : 16
fc1.weight : 120x400 = 48000
fc1.bias : 120
fc2.weight : 84x120 = 10080
fc2.bias : 84
fc3.weight : 10x84 = 840
fc3.bias : 10
number of trainable parameters = 61706

CNN Regression predicts the same value for each test image.

So just a set up of the problem I am trying to solve. I have around 200k 64x64x3 RGB images of patches of terrain that a robot drove over. Each patch has a corresponding label of what the roughness of that image patch is. The roughness values range from 0-160. The data was collected with the robot driving at varying speeds, hence the range of the roughness values. My aim is to be able to predict the roughness of a patch. I am using the VGG-16 network, with the last layer modified to do regression. My batch size is 1024, the loss is mean sqaured error, the optimize is rmsprop. The network is shown below. My problem is that after training, the network predicts the exact same value for each test image. Another point to note is that the training loss is always higher than the validation loss which is odd. Lastly I tried with other optimizers such as SGD and Adam, as well as varying batch sizes. Right now I am trying to train the network from scratch but it does not seem too promising. I am not sure what is going wrong here, and I would really appreciate any help I can get. Thanks
if input_tensor is None:
img_input = Input(shape=input_shape)
else:
if not K.is_keras_tensor(input_tensor):
img_input = Input(tensor=input_tensor, shape=input_shape)
else:
img_input = input_tensor
# Block 1
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
# Block 2
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
# Block 3
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
# Block 4
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
# Block 5
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc1')(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = Dense(1,name='regression_dense')(x)
According to your explanation you have roughness values ranging from 0 to 160. Normalize these values in [-1, 1]. You can stick to your linear activation function in the last layer.
But in general I think you can solve this problem with a much shallower architecture with just a fraction of parameters.
Cheers

How do I use Keras when the number of training data and test data is different?

The following code creates a model that predicts a specific value using multiple variables. My data compose 11947 by 9 in matrix form and I have created data to use 9000 for training and 2947 for test.
import numpy as np
from keras.models import Model
from keras.layers import Dense, Input, concatenate, Conv1D
batchSize=1000
def get_model(rows, cols):
inputs1 = Input(shape=(1, rows))
inputs2 = Input(shape=(1, rows))
inputs3 = Input(shape=(1, rows))
inputs4 = Input(shape=(1, rows))
inputs5 = Input(shape=(1, rows))
inputs6 = Input(shape=(1, rows))
inputs7 = Input(shape=(1, rows))
conv1 = Conv1D(1024, 1, activation='relu', padding='same')(inputs1)
conv1 = Conv1D(512, 1, activation='relu', padding='same' )(conv1)
conv2 = Conv1D(1024, 1, activation='relu', padding='same')(inputs2)
conv2 = Conv1D(512, 1, activation='relu', padding='same')(conv2)
conv3 = Conv1D(1024, 1, activation='relu', padding='same')(inputs3)
conv3 = Conv1D(512, 1, activation='relu', padding='same')(conv3)
conv4 = Conv1D(1024, 1, activation='relu', padding='same')(inputs4)
conv4 = Conv1D(512, 1, activation='relu', padding='same')(conv4)
conv5 = Conv1D(1024, 1, activation='relu', padding='same')(inputs5)
conv5 = Conv1D(512, 1, activation='relu', padding='same')(conv5)
conv6 = Conv1D(1024, 1, activation='relu', padding='same')(inputs6)
conv6 = Conv1D(512, 1, activation='relu', padding='same')(conv6)
conv7 = Conv1D(1024, 1, activation='relu', padding='same')(inputs7)
conv7 = Conv1D(512, 1, activation='relu', padding='same')(conv7)
convConcat = concatenate([conv1, conv2, conv3, conv4, conv5, conv6, conv7])
convOut = Dense(rows, activation='relu')(convConcat)
model = Model(inputs=[inputs1, inputs2, inputs3, inputs4, inputs5, inputs6, inputs7], outputs=[convOut])
model.compile(optimizer='Adam', loss='mean_squared_error', metrics=['accuracy'])
return model
def train_and_predict():
dataT = data_train.transpose()
dataT_O = dataT[0].reshape((1, 1, -1))
dataT_1 = dataT[2].reshape((1, 1, -1))
dataT_2 = dataT[3].reshape((1, 1, -1))
dataT_3 = dataT[4].reshape((1, 1, -1))
dataT_4 = dataT[5].reshape((1, 1, -1))
dataT_5 = dataT[6].reshape((1, 1, -1))
dataT_6 = dataT[7].reshape((1, 1, -1))
dataT_7 = dataT[8].reshape((1, 1, -1))
model.fit([dataT_1, dataT_2, dataT_3, dataT_4, dataT_5, dataT_6, dataT_7], dataT_O, epochs=100, batch_size=batchSize)
dataT = data_test.transpose()
dataT_1 = dataT[2].reshape((1, 1, -1))
dataT_2 = dataT[3].reshape((1, 1, -1))
dataT_3 = dataT[4].reshape((1, 1, -1))
dataT_4 = dataT[5].reshape((1, 1, -1))
dataT_5 = dataT[6].reshape((1, 1, -1))
dataT_6 = dataT[7].reshape((1, 1, -1))
dataT_7 = dataT[8].reshape((1, 1, -1))
model.predict([dataT_1, dataT_2, dataT_3, dataT_4, dataT_5, dataT_6, dataT_7])
print('Done!')
if __name__ == '__main__':
data_train = np.genfromtxt('./trainingLossdata_train.txt')
data_test = np.genfromtxt('./trainingLossdata_test.txt')
model = get_model(data_train.shape[0], data_train.shape[1])
train_and_predict()
However, an error occured due to the difference between number of training data and test data. Exactly the following error message is displayed.
"ValueError: Error when checking: expected input_1 to have shape (None, 1, 9000) but got array with shape (1, 1, 2947)"
How can I solve this problem? In fact, I am a beginner of using Keras to do deep learning programming. Please give me some advices.
It seems to me that your input creation is wrong. It has nothing to do with different number of training and test sample (actually that happens all the time). Now, you said
My data compose 11947 by 9
This means you have 11947 samples each having 9 features. So the ideal shape for input should be None,9 or None,1,9. This None actually stands for the size of input.