Building neural network using k-fold cross validation - deep-learning

I am new to deep learning, trying to implement a neural network using 4-fold cross-validation for training, testing, and validating. The topic is to classify the vehicle using an existing dataset.
The accuracy result is 0.7.
Traning Accuracy
An example output for epochs
I also don't know whether the code is correct and what to do for increasing the accuracy.
Here is the code:
!pip install category_encoders
import tensorflow as tf
from sklearn.model_selection import KFold
import pandas as pd
import numpy as np
from tensorflow import keras
import category_encoders as ce
from category_encoders import OrdinalEncoder
car_data = pd.read_csv('car_data.csv')
car_data.columns = ['Purchasing', 'Maintenance', 'No_Doors','Capacity','BootSize','Safety','Evaluation']
# Extract the features and labels from the dataset
X = car_data.drop(['Evaluation'], axis=1)
Y = car_data['Evaluation']
encoder = ce.OrdinalEncoder(cols=['Purchasing', 'Maintenance', 'No_Doors','Capacity','BootSize','Safety'])
X = encoder.fit_transform(X)
X = X.to_numpy()
Y_df = pd.DataFrame(Y, columns=['Evaluation'])
encoder = OrdinalEncoder(cols=['Evaluation'])
Y_encoded = encoder.fit_transform(Y_df)
Y = Y_encoded.to_numpy()
input_layer = tf.keras.layers.Input(shape=(X.shape[1]))
# Define the hidden layers
hidden_layer_1 = tf.keras.layers.Dense(units=64, activation='relu', kernel_initializer='glorot_uniform')(input_layer)
hidden_layer_2 = tf.keras.layers.Dense(units=32, activation='relu', kernel_initializer='glorot_uniform')(hidden_layer_1)
# Define the output layer
output_layer = tf.keras.layers.Dense(units=1, activation='sigmoid', kernel_initializer='glorot_uniform')(hidden_layer_2)
# Create the model
model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
# Initialize the 4-fold cross-validation
kfold = KFold(n_splits=4, shuffle=True, random_state=42)
# Initialize a list to store the scores
scores = []
quality_weights= []
# Compile the model
model.compile(optimizer='adam',
loss=''sparse_categorical_crossentropy'',
metrics=['accuracy'],
sample_weight_mode='temporal')
for train_index, test_index in kfold.split(X,Y):
# Split the data into train and test sets
X_train, X_test = X[train_index], X[test_index]
Y_train, Y_test = Y[train_index], Y[test_index]
# Fit the model on the training data
model.fit(X_train, Y_train, epochs=300, batch_size=64, sample_weight=quality_weights)
# Evaluate the model on the test data
score = model.evaluate(X_test, Y_test)
# Append the score to the scores list
scores.append(score[1])
plt.plot(history.history['accuracy'])
plt.title('Model Training Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')
plt.show()
# Print the mean and standard deviation of the scores
print(f'Mean accuracy: {np.mean(scores):.3f} +/- {np.std(scores):.3f}')

The first thing that caught my attention was here:
model.fit(X_train, Y_train, epochs=300, batch_size=64, sample_weight=quality_weights)
Your quality_weights should be a numpy array of size of the input.
Refer here: https://keras.io/api/models/model_training_apis/#fit-method
If changing that doesn't seemt to help then may be your network doesn't seem to be learning from the data. A few possible reasons could be:
The network is a bit too shallow. Try adding just one more hidden layer to see if that improves anything
From the code I can't see the size of your input data. Does it have enough datapoints for 4-fold cross-validation? Can you somehow augment the data?

Related

System exit 1 error after obtaining NaN losses from finetuning Mask R-CNN in Pytorch

I am following this tutorial from Pytorch for Finetuning a pre-trained model on my own dataset. I have my annotation in the COCO format in a json file, so, I first implemented the dataloader as follows:
import torch
import json
from torch.utils.data import Dataset
from pycocotools.coco import COCO
from PIL import Image
import os
import numpy as np
from torchvision import transforms
import Config
import transforms as T
from torchvision.transforms import functional as F
class CustomDataset(Dataset):
def __init__(self, root, json_file, transform=None):
self.root = root
with open(json_file) as f:
self.data = json.load(f)
self.transform = transform
self.image_ids = [img["id"] for img in self.data["images"]]
self.imgs = list(sorted(os.listdir(os.path.join(root, "Images"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "Masks"))))
def __getitem__(self, idx):
# Get image ID
img_id = self.image_ids[idx]
img = next(image for image in self.data["images"] if image["id"] == img_id)
img_path = os.path.join(self.root, "Images")
mask_path = os.path.join(self.root, "Masks")
# Load image
image = Image.open(os.path.join(img_path, img['file_name'])).convert("RGB")
# extract annotations from the json file
annotations = [ann for ann in self.data["annotations"] if ann["image_id"] == img_id]
# extract labels from annotations
labels = [ann["label"] for ann in annotations]
# convert labels to integers
labels = [label for label in labels]
labels = torch.as_tensor(labels, dtype=torch.int64)
# extract boxes and convert them to format [x1, y1, x2, y2]
boxes = [ann["bbox"] for ann in annotations]
boxes = [[bbox[0], bbox[1], bbox[2], bbox[3]] for bbox in boxes]
num_objects = len(boxes)
# read the mask and include the number of objects in the first dimension
mask = np.array(Image.open(os.path.join(mask_path, img['file_name'])).convert("L"))
# Check if mask is empty
if mask.size == 0:
mask = np.zeros((num_objects, 1, 1), dtype=np.uint8)
else:
mask = np.expand_dims(mask, axis=0)
mask = np.repeat(mask, num_objects, axis=0)
# convert the binary mask array to a torch tensor
mask = torch.as_tensor(mask, dtype=torch.uint8)
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objects,), dtype=torch.int64)
# convert bboxes to tensors
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# calculate the area of the bounding box
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# convert id to tensor
image_id = torch.tensor([idx])
# create target dictionary
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = mask
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
# apply the transform if any
if self.transform is not None:
image, target = self.transform(image, target)
return image, target
def __len__(self):
return len(self.imgs)
and I am using this code for training:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from engine import train_one_epoch
import utils
import transforms as T
from dataloader import CustomDataset
import Config
import torch
import utils
from tqdm import tqdm
from torch.optim.lr_scheduler import StepLR
from torchvision.transforms import functional as F
def get_instance_segmentation_model(num_classes):
# load an instance segmentation model pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
# converts the image, a PIL image, into a PyTorch Tensor
transforms.append(T.PILToTensor())
if train:
# during training, randomly flip the training images
# and ground-truth for data augmentation
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
json_path = 'annotations.json'
# use our dataset and defined transformations
dataset = CustomDataset(root = Config.Dataset_dir, json_file=json_path, transform = get_transform(train=True))
# for image, target in dataset:
# print(image.shape)
# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-500])
dataset_test = torch.utils.data.Subset(dataset, indices[-500:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=1, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
device = Config.DEVICE
# # our dataset has two classes only - background and person
num_classes = 2
# get the model using our helper function
model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.1,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
This training code is as stated in the tutorial is using some helper functions which can be accessed from here. I have run the training code and the training is working for the first 10 samples in the data, but then it gives the following error:
Epoch: [0] [ 0/2759] eta: 13:29:50 lr: 0.000200 loss: -136.8811 (-136.8811) loss_classifier: 0.9397 (0.9397) loss_box_reg: 0.0017 (0.0017) loss_mask: -137.9142 (-137.9142) loss_objectness: 0.0859 (0.0859) loss_rpn_box_reg: 0.0057 (0.0057) time: 17.6117 data: 10.0775
Loss is nan, stopping training
{'loss_classifier': tensor(nan, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(nan, grad_fn=<DivBackward0>), 'loss_mask': tensor(nan, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_objectness': tensor(nan, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(nan, grad_fn=<DivBackward0>)}
An exception has occurred, use %tb to see the full traceback.
SystemExit: 1
This error is raised from the engine.py train_one_epoch function, especially from this part of the function:
with torch.cuda.amp.autocast(enabled=scaler is not None):
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print(f"Loss is {loss_value}, stopping training")
print(loss_dict_reduced)
sys.exit(1)
Which indicates that the losses returned after the first loop are NaN ... What could be wrong here please? I am running out of ideas and don't know what's going wrong anymore.

Train Caffe CNN to output multidimensional features

I'd like to build a feature extractor using Caffe's CNNs and I already have a large sample of input features and desired output features.
Now I need to train some convolutional layers to learn how to transform the input features into the output.
My question is: How can I achieve this on Caffe?
As a minimal example, suppose I wanted to train a CNN that inverts the values of a 2D array.
For example, if my input is
[[0,1,0],
[1,1,1],
[0,1,0]]
the CNN should output
[[1,0,1],
[0,0,0],
[1,0,1]].
For
[[0,0,0],
[0,1,0],
[0,0,0]]
the output should be
[[1,1,1],
[1,0,1],
[1,1,1]]
and so on.
Of course this is just a minimal example to share, the actual problem is nearly impossible to tackle without the use of multiple convolutions.
I was able to create this code for this problem. I used the Euclidean Loss at the end, but unfortunately the CNN is not learning anything.
ROOT_DIR = '/home'
from os.path import join
import numpy as np
import h5py
from itertools import product
import caffe
from caffe import layers
from caffe.proto import caffe_pb2
#%% GENERATE DATA
data_in = np.array([np.array(seq).reshape(1,3,3) for seq in product([0,1], repeat=9)])
data_out = np.array([-1*array+1 for array in data_in])
with open(join(ROOT_DIR, 'data.txt'), 'w') as ftxt:
with h5py.File(join(ROOT_DIR, 'data.hdf5'), 'w') as fhdf5:
fhdf5['data'] = data_in.astype(np.float32)
fhdf5['label'] = data_out.astype(np.float32)
ftxt.write(join(ROOT_DIR, 'data.hdf5'))
#%%DEFINE NET
net = caffe.NetSpec()
net.data, net.label = layers.HDF5Data(batch_size=64, source=join(ROOT_DIR, 'data.txt'), ntop=2)
net.conv1 = layers.Convolution(net.data, kernel_size=1, num_output=128)
net.relu1 = layers.ReLU(net.conv1, in_place=True)
net.conv2 = layers.Convolution(net.relu1, kernel_size=1, num_output=1)
net.relu2 = layers.ReLU(net.conv2, in_place=True)
net.loss = layers.EuclideanLoss(net.relu2, net.label)
net.to_proto()
with open(join(ROOT_DIR, 'invert_net.prototxt'), 'w') as f:
f.write(str(net.to_proto()))
#%% DEFINE SOLVER
solver = caffe_pb2.SolverParameter()
solver.train_net = join(ROOT_DIR, 'invert_net.prototxt')
solver.max_iter = 10000
solver.base_lr = 0.01
solver.lr_policy = 'fixed'
with open(join(ROOT_DIR, 'solver.prototxt'), 'w') as f:
f.write(str(solver))
#%% TRAIN NET
caffe.set_mode_cpu()
solver = caffe.SGDSolver(join(ROOT_DIR, 'solver.prototxt'))
solver.solve()

How to use keras to fine-tune inception v3 to do multi-class classification?

I want to use Keras to do two classes image classify using Cat vs. Dog dataset from Kaggle.com.
But I have some problem with param "class_mode" as below code.
if I use "binary" mode, accuracy is about 95%, but if I use "categorical" accuracy is abnormally low, only above 50%.
binary mode means only one output in last layer and use sigmoid activation to classify. sample's label is only one integer.
categorical means two output in last layer and use softmax activation to classify. sample's label is one hot format, eg.(1,0), (0,1).
I think these two ways should have the similar result. Anyone knows the reason for the difference? Thanks very much!
import os
import sys
import glob
import argparse
import matplotlib.pyplot as plt
from keras import __version__
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
set some params here
IM_WIDTH, IM_HEIGHT = 299, 299 #fixed size for InceptionV3
NB_EPOCHS = 1
BAT_SIZE = 32
FC_SIZE = 1024
NB_IV3_LAYERS_TO_FREEZE = 172
loss_mode = "binary_crossentropy"
def get_nb_files(directory):
"""Get number of files by searching directory recursively"""
if not os.path.exists(directory):
return 0
cnt = 0
for r, dirs, files in os.walk(directory):
for dr in dirs:
cnt += len(glob.glob(os.path.join(r, dr + "/*")))
return cnt
transfer_learn, keep the weights in inception v3
def setup_to_transfer_learn(model, base_model):
"""Freeze all layers and compile the model"""
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer='rmsprop', loss=loss_mode, metrics=['accuracy'])
Add last layer to do two classes classification.
def add_new_last_layer(base_model, nb_classes):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top
nb_classes: # of classes
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(FC_SIZE, activation='relu')(x) #new FC layer, random init
if args.class_mode == "binary":
predictions = Dense(1, activation='sigmoid')(x) #new softmax layer
else:
predictions = Dense(nb_classes, activation='softmax')(x) #new softmax layer
model = Model(inputs=base_model.input, outputs=predictions)
return model
Freeze the bottom NB_IV3_LAYERS and retrain the remaining top layers,
and fine tune weights.
def setup_to_finetune(model):
"""Freeze the bottom NB_IV3_LAYERS and retrain the remaining top layers.
note: NB_IV3_LAYERS corresponds to the top 2 inception blocks in the inceptionv3 arch
Args:
model: keras model
"""
for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]:
layer.trainable = False
for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]:
layer.trainable = True
model.compile(optimizer="rmsprop", loss=loss_mode, metrics=['accuracy'])
#model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])
def train(args):
"""Use transfer learning and fine-tuning to train a network on a new dataset"""
nb_train_samples = get_nb_files(args.train_dir)
nb_classes = len(glob.glob(args.train_dir + "/*"))
nb_val_samples = get_nb_files(args.val_dir)
nb_epoch = int(args.nb_epoch)
batch_size = int(args.batch_size)
print("nb_classes:{}".format(nb_classes))
data prepare
train_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
test_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
train_generator = train_datagen.flow_from_directory(
args.train_dir,
target_size=(IM_WIDTH, IM_HEIGHT),
batch_size=batch_size,
#class_mode='binary'
class_mode=args.class_mode
)
validation_generator = test_datagen.flow_from_directory(
args.val_dir,
target_size=(IM_WIDTH, IM_HEIGHT),
batch_size=batch_size,
#class_mode='binary'
class_mode=args.class_mode
)
setup model
base_model = InceptionV3(weights='imagenet', include_top=False) #include_top=False excludes final FC layer
model = add_new_last_layer(base_model, nb_classes)
transfer learning
setup_to_transfer_learn(model, base_model)
#model.summary()
history_tl = model.fit_generator(
train_generator,
epochs=nb_epoch,
steps_per_epoch=nb_train_samples//BAT_SIZE,
validation_data=validation_generator,
validation_steps=nb_val_samples//BAT_SIZE)
fine-tuning
setup_to_finetune(model)
history_ft = model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples//BAT_SIZE,
epochs=nb_epoch,
validation_data=validation_generator,
validation_steps=nb_val_samples//BAT_SIZE)
model.save(args.output_model_file)
if args.plot:
plot_training(history_ft)
def plot_training(history):
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r.')
plt.plot(epochs, val_acc, 'r')
plt.title('Training and validation accuracy')
plt.figure()
plt.plot(epochs, loss, 'r.')
plt.plot(epochs, val_loss, 'r-')
plt.title('Training and validation loss')
plt.show()
main func
if __name__=="__main__":
a = argparse.ArgumentParser()
a.add_argument("--train_dir", default="train2")
a.add_argument("--val_dir", default="test2")
a.add_argument("--nb_epoch", default=NB_EPOCHS)
a.add_argument("--batch_size", default=BAT_SIZE)
a.add_argument("--output_model_file", default="inceptionv3-ft.model")
a.add_argument("--plot", action="store_true")
a.add_argument("--class_mode", default="binary")
args = a.parse_args()
if args.train_dir is None or args.val_dir is None:
a.print_help()
sys.exit(1)
if args.class_mode != "binary" and args.class_mode != "categorical":
print("set class_mode as 'binary' or 'categorical'")
if args.class_mode == "categorical":
loss_mode = "categorical_crossentropy"
#set class_mode
print("class_mode:{}, loss_mode:{}".format(args.class_mode, loss_mode))
if (not os.path.exists(args.train_dir)) or (not os.path.exists(args.val_dir)):
print("directories do not exist")
sys.exit(1)
train(args)
I had this problem on several tasks when the learning rate was too high. Try something like 0.0001 or even less.
According to the Keras Documentation, the default rate ist 0.001:
keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
See https://keras.io/optimizers/#rmsprop
I found that if I use SDG or Adam optimizer, the accuracy can go up normally. So is there something wrong using RMSprop optimizer with default learning rate=0.001?

How to continue training for a saved and then loaded Keras model?

Following the official keras documentation , I was able to save and load a model. Keras is using tensorflow as the backend.
However, is it possible to run more training for such saved and loaded models.
Following is the code borrowed from Link. Then edited.
In the following code, the model is trained for 75 epochs and saved then loaded again.
However, when I tried to train it further with more 75 epochs it seems model was not trained and I got the same result without any modifications.
# -*- coding: utf-8 -*-
from keras.models import Sequential
from keras.layers import Dense
from keras.models import model_from_json
import numpy
import os
# fix random seed for reproducibility
numpy.random.seed(7)
# load pima indians dataset
dataset = numpy.loadtxt("pima-indians-diabetes.txt", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))
model.add(Dense(8, kernel_initializer='uniform', activation='relu'))
model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# Fit the model
model.fit(X, Y, epochs=75, batch_size=10, verbose=0)
# evaluate the model
scores = model.evaluate(X, Y, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file: json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")
# later...
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")
# evaluate loaded model on test data
loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
score = loaded_model.evaluate(X, Y, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
model.fit(X, Y, epochs=75, batch_size=10, verbose=0)
score = loaded_model.evaluate(X, Y, verbose=0)
print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
It looks like from that code that your evaluating the loaded_model twice, but your extra training is done on just the model. Instead of copying and pasting different variable names you could try something like this... I find it a little easier to keep track of. Also, add some white space to your code between comments, it will help keep things clear and organized.
# Save a model you have trained
model.save('trained_model.h5')
# Delete the model
del model
# Load the model
model = load_model('trained_model.h5')
# Train more on the loaded model
model.fit(data, labels, epochs, batch_size)

Different between Caffe and Keras

I have trained LeNet for MNIST using Caffe and now I would like to export this model to be used within Keras.
To this end I tried to extract weights from caffe.Net and use them to initialize Keras's network. However, I received different predictions from the two models. So I have tried to debug them layer by layer, starting with the first one. The code I have tested is as follows:
# import caffe and load facce.Net from prototxt and caffemodel files
import sys
sys.path.append('/opt/caffe/python')
import caffe
net = caffe.Net('lenet_train_test.prototxt', 'save/mnist_iter_500000.caffemodel', caffe.TEST)
# this should be the kernel weights and bias for the first convolution layer 'conv1'
c1_w = net.params['conv1'][0].data
c1_b = net.params['conv1'][1].data
# import Keras and build a convolution layer using the same parameters as in lenet_train_test.prototxt and instantiate it with the weights above
import keras
from keras.layers import Convolution2D
conv1 = Convolution2D(20, 5, 5, border_mode='valid', input_shape=(1, 28, 28), weights=[c1_w,c1_b], activation='linear')
from keras.models import Sequential
model=Sequential()
model.add(conv1)
model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy'])
# load MNIST data and do scaling like I did when training Caffe model
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
X_test = X_test.astype('float32')
X_test *= 0.00392157
# set the first test example and get the prediction
net.blobs['data'].data[0] = X_test[0][0]
net.forward()
out0 = net.blobs['conv1'].data[0] # this is the prediction for 20 kernels
m0 = out0[0] # just consider the first one
m0.shape # >>> (24,24)
# pass the same example through the conv1 layer in Keras model
import numpy as np
a = np.zeros( (1,1,28,28) )
a[0] = X_test[0]
out1 = model.predict_on_batch(a) # this is the prediction for 20 kernels
m1 = out1[0][0] # just consider the first one
m1.shape # >>> (24,24) the same size
# I get a lots of 'False'
m0 == m1
Have I done anything wrong in the layer construction? or maybe Caffe and Keras implement the convolution2D differently?