I tried running an example on SHAP Deep Explainer from this link using this Titanic dataset. This is the code from the example:
# import package
import shap
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import optimizers
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import os
# load data
os.chdir('/titanic/')
train_data = pd.read_csv('./train.csv', index_col=0)
test_data = pd.read_csv('./test.csv', index_col=0)
train_data.head()
def data_preprocessing(df):
df = df.drop(columns=['Name', 'Ticket', 'Cabin'])
# fill na
df[['Age']] = df[['Age']].fillna(value=df[['Age']].mean())
df[['Embarked']] = df[['Embarked']].fillna(value=df['Embarked'].value_counts().idxmax())
df[['Fare']] = df[['Fare']].fillna(value=df[['Fare']].mean())
# categorical features into numeric
df['Sex'] = df['Sex'].map( {'female': 1, 'male': 0} ).astype(int)
# one-hot encoding
embarked_one_hot = pd.get_dummies(df['Embarked'], prefix='Embarked')
df = df.drop('Embarked', axis=1)
df = df.join(embarked_one_hot)
return df
# train data processing
train_data = data_preprocessing(train_data)
train_data.isnull().sum()
# create data for training
x_train = train_data.drop(['Survived'], axis=1).values
# Check test data
test_data.isnull().sum()
# scale
scale = StandardScaler()
x_train = scale.fit_transform(x_train)
# prepare y_train
y_train = train_data['Survived'].values
test_data = data_preprocessing(test_data)
x_test = test_data.values.astype(float)
# scaling
x_test = scale.transform(x_test)
# Check test data
test_data.isnull().sum()
# build mlp
model = Sequential()
model.add(Dense(32, input_dim=x_train.shape[1], activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(2, activation='softmax'))
# compile model
model.compile(loss='sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
# fit model
model.fit(x_train, y_train, epochs=10, batch_size=64)
# compute SHAP values
explainer = shap.DeepExplainer(model, x_train)
shap_values = explainer.shap_values(x_test)
shap.summary_plot(shap_values[0], plot_type = 'bar', feature_names = test_data.columns)
shap.initjs()
shap.force_plot(explainer.expected_value[0].numpy(), shap_values[0][0], features = test_data.columns)
shap.decision_plot(explainer.expected_value[0].numpy(), shap_values[0][0], features = test_data.iloc[0,:], feature_names = test_data.columns.tolist())
shap.plots._waterfall.waterfall_legacy(explainer.expected_value[0].numpy(), shap_values[0][0], feature_names = test_data.columns)
There is no code for generating a beeswarm plot in the example, but I used
shap.summary_plot(shap_values[0], feature_names = test_data.columns)
and got a beeswarm plot. From my understanding, the color of the dots displays the original value of each feature, falling along a gradient of blue to red. However, the plot I got only has blue dots and doesn't have a gradient ruler on the side.
Here is the plot I got:
And here is what I expected (photo from https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/beeswarm.html):
Any suggestions on what could have caused this and what I can do to get the colors would be greatly appreciated. Thank you!
I am new to deep learning, trying to implement a neural network using 4-fold cross-validation for training, testing, and validating. The topic is to classify the vehicle using an existing dataset.
The accuracy result is 0.7.
Traning Accuracy
An example output for epochs
I also don't know whether the code is correct and what to do for increasing the accuracy.
Here is the code:
!pip install category_encoders
import tensorflow as tf
from sklearn.model_selection import KFold
import pandas as pd
import numpy as np
from tensorflow import keras
import category_encoders as ce
from category_encoders import OrdinalEncoder
car_data = pd.read_csv('car_data.csv')
car_data.columns = ['Purchasing', 'Maintenance', 'No_Doors','Capacity','BootSize','Safety','Evaluation']
# Extract the features and labels from the dataset
X = car_data.drop(['Evaluation'], axis=1)
Y = car_data['Evaluation']
encoder = ce.OrdinalEncoder(cols=['Purchasing', 'Maintenance', 'No_Doors','Capacity','BootSize','Safety'])
X = encoder.fit_transform(X)
X = X.to_numpy()
Y_df = pd.DataFrame(Y, columns=['Evaluation'])
encoder = OrdinalEncoder(cols=['Evaluation'])
Y_encoded = encoder.fit_transform(Y_df)
Y = Y_encoded.to_numpy()
input_layer = tf.keras.layers.Input(shape=(X.shape[1]))
# Define the hidden layers
hidden_layer_1 = tf.keras.layers.Dense(units=64, activation='relu', kernel_initializer='glorot_uniform')(input_layer)
hidden_layer_2 = tf.keras.layers.Dense(units=32, activation='relu', kernel_initializer='glorot_uniform')(hidden_layer_1)
# Define the output layer
output_layer = tf.keras.layers.Dense(units=1, activation='sigmoid', kernel_initializer='glorot_uniform')(hidden_layer_2)
# Create the model
model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
# Initialize the 4-fold cross-validation
kfold = KFold(n_splits=4, shuffle=True, random_state=42)
# Initialize a list to store the scores
scores = []
quality_weights= []
# Compile the model
model.compile(optimizer='adam',
loss=''sparse_categorical_crossentropy'',
metrics=['accuracy'],
sample_weight_mode='temporal')
for train_index, test_index in kfold.split(X,Y):
# Split the data into train and test sets
X_train, X_test = X[train_index], X[test_index]
Y_train, Y_test = Y[train_index], Y[test_index]
# Fit the model on the training data
model.fit(X_train, Y_train, epochs=300, batch_size=64, sample_weight=quality_weights)
# Evaluate the model on the test data
score = model.evaluate(X_test, Y_test)
# Append the score to the scores list
scores.append(score[1])
plt.plot(history.history['accuracy'])
plt.title('Model Training Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')
plt.show()
# Print the mean and standard deviation of the scores
print(f'Mean accuracy: {np.mean(scores):.3f} +/- {np.std(scores):.3f}')
The first thing that caught my attention was here:
model.fit(X_train, Y_train, epochs=300, batch_size=64, sample_weight=quality_weights)
Your quality_weights should be a numpy array of size of the input.
Refer here: https://keras.io/api/models/model_training_apis/#fit-method
If changing that doesn't seemt to help then may be your network doesn't seem to be learning from the data. A few possible reasons could be:
The network is a bit too shallow. Try adding just one more hidden layer to see if that improves anything
From the code I can't see the size of your input data. Does it have enough datapoints for 4-fold cross-validation? Can you somehow augment the data?
Thanks in advance to anyone who takes time to answer this. I'm learning Keras and got stuck with a problem where I have 3 classes and the test set accuracy moves up to 0.6667 and then stalls on that exact number for 50 epochs. The accuracy is also way higher than what it should be if it were correct. This worked fine when I only had 2 classes.
What am I doing wrong here?
import pandas as pd
import numpy as np
import keras.utils
#Create train and test data
def create_Xt_Yt(X, y, percentage=0.8):
p = int(len(X) * percentage)
X_train = X[0:p]
Y_train = y[0:p]
X_test = X[p:]
Y_test = y[p:]
return X_train, X_test, Y_train, Y_test
df = pd.read_csv('data.csv', parse_dates=['Date'])
df.set_index(['Date'], inplace=True)
df.drop(['Volume'],1, inplace=True)
df.dropna(inplace=True)
data = df.loc[:, 'AMD-close'].tolist()
window = 30
forecast = 3
forecast_target_long = 1.015
forecast_target_short= 0.985
x_holder = []
y_holder = []
for i in range(len(data)):
try:
x_class = data[i:i+window]
y_class = data[i+window+forecast]
window_last_price = data[i+window]
forecast_price = y_class
if forecast_price > (window_last_price*forecast_target_long):
y_class = [1]
elif forecast_price < (window_last_price*forecast_target_short):
y_class = [-1]
else:
y_class = [0]
y_holder.append(y_class)
x_holder.append(x_class)
except Exception as e:
print(e)
break
normalize = [(np.array(i) - np.mean(i)) / np.std(i) for i in x_holder]
y_holder = keras.utils.to_categorical(y_holder, 3)
x_holder, y_holder = np.array(x_holder), np.array(y_holder)
X_train, X_test, Y_train, Y_test = create_Xt_Yt(x_holder, y_holder)
This is the model:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.normalization import BatchNormalization
from keras.optimizers import RMSprop, Adam, SGD, Nadam
from keras.callbacks import ReduceLROnPlateau
from keras import regularizers
from keras import losses
model = Sequential()
model.add(Dense(64, input_dim=window, activity_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activity_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(3))
model.add(Activation('sigmoid'))
reduce_learning_ontop = ReduceLROnPlateau(monitor='val_acc', factor=0.9, patience=25, min_lr=0.000001, verbose=1)
model.compile(Adam(lr=.0001),loss='binary_crossentropy', metrics=['accuracy'])
myModel = model.fit(X_train, Y_train, batch_size=128, epochs=160, verbose=1, shuffle=True, validation_data=(X_test, Y_test))
So two thing here:
Change activation:
model.add(Activation('softmax'))
sigmoid is designed for binary classification - in case of multiclass classification - softmax is the state of the art activation.
Change loss:
model.compile(
Adam(lr=.0001),
loss='categorical_crossentropy', metrics=['accuracy'])
binary_crossentropy is also designed for binary_classification. An equivalent to this is categorical_crossentropy.
I want to use Keras to do two classes image classify using Cat vs. Dog dataset from Kaggle.com.
But I have some problem with param "class_mode" as below code.
if I use "binary" mode, accuracy is about 95%, but if I use "categorical" accuracy is abnormally low, only above 50%.
binary mode means only one output in last layer and use sigmoid activation to classify. sample's label is only one integer.
categorical means two output in last layer and use softmax activation to classify. sample's label is one hot format, eg.(1,0), (0,1).
I think these two ways should have the similar result. Anyone knows the reason for the difference? Thanks very much!
import os
import sys
import glob
import argparse
import matplotlib.pyplot as plt
from keras import __version__
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
set some params here
IM_WIDTH, IM_HEIGHT = 299, 299 #fixed size for InceptionV3
NB_EPOCHS = 1
BAT_SIZE = 32
FC_SIZE = 1024
NB_IV3_LAYERS_TO_FREEZE = 172
loss_mode = "binary_crossentropy"
def get_nb_files(directory):
"""Get number of files by searching directory recursively"""
if not os.path.exists(directory):
return 0
cnt = 0
for r, dirs, files in os.walk(directory):
for dr in dirs:
cnt += len(glob.glob(os.path.join(r, dr + "/*")))
return cnt
transfer_learn, keep the weights in inception v3
def setup_to_transfer_learn(model, base_model):
"""Freeze all layers and compile the model"""
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer='rmsprop', loss=loss_mode, metrics=['accuracy'])
Add last layer to do two classes classification.
def add_new_last_layer(base_model, nb_classes):
"""Add last layer to the convnet
Args:
base_model: keras model excluding top
nb_classes: # of classes
Returns:
new keras model with last layer
"""
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(FC_SIZE, activation='relu')(x) #new FC layer, random init
if args.class_mode == "binary":
predictions = Dense(1, activation='sigmoid')(x) #new softmax layer
else:
predictions = Dense(nb_classes, activation='softmax')(x) #new softmax layer
model = Model(inputs=base_model.input, outputs=predictions)
return model
Freeze the bottom NB_IV3_LAYERS and retrain the remaining top layers,
and fine tune weights.
def setup_to_finetune(model):
"""Freeze the bottom NB_IV3_LAYERS and retrain the remaining top layers.
note: NB_IV3_LAYERS corresponds to the top 2 inception blocks in the inceptionv3 arch
Args:
model: keras model
"""
for layer in model.layers[:NB_IV3_LAYERS_TO_FREEZE]:
layer.trainable = False
for layer in model.layers[NB_IV3_LAYERS_TO_FREEZE:]:
layer.trainable = True
model.compile(optimizer="rmsprop", loss=loss_mode, metrics=['accuracy'])
#model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])
def train(args):
"""Use transfer learning and fine-tuning to train a network on a new dataset"""
nb_train_samples = get_nb_files(args.train_dir)
nb_classes = len(glob.glob(args.train_dir + "/*"))
nb_val_samples = get_nb_files(args.val_dir)
nb_epoch = int(args.nb_epoch)
batch_size = int(args.batch_size)
print("nb_classes:{}".format(nb_classes))
data prepare
train_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
test_datagen = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
train_generator = train_datagen.flow_from_directory(
args.train_dir,
target_size=(IM_WIDTH, IM_HEIGHT),
batch_size=batch_size,
#class_mode='binary'
class_mode=args.class_mode
)
validation_generator = test_datagen.flow_from_directory(
args.val_dir,
target_size=(IM_WIDTH, IM_HEIGHT),
batch_size=batch_size,
#class_mode='binary'
class_mode=args.class_mode
)
setup model
base_model = InceptionV3(weights='imagenet', include_top=False) #include_top=False excludes final FC layer
model = add_new_last_layer(base_model, nb_classes)
transfer learning
setup_to_transfer_learn(model, base_model)
#model.summary()
history_tl = model.fit_generator(
train_generator,
epochs=nb_epoch,
steps_per_epoch=nb_train_samples//BAT_SIZE,
validation_data=validation_generator,
validation_steps=nb_val_samples//BAT_SIZE)
fine-tuning
setup_to_finetune(model)
history_ft = model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples//BAT_SIZE,
epochs=nb_epoch,
validation_data=validation_generator,
validation_steps=nb_val_samples//BAT_SIZE)
model.save(args.output_model_file)
if args.plot:
plot_training(history_ft)
def plot_training(history):
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'r.')
plt.plot(epochs, val_acc, 'r')
plt.title('Training and validation accuracy')
plt.figure()
plt.plot(epochs, loss, 'r.')
plt.plot(epochs, val_loss, 'r-')
plt.title('Training and validation loss')
plt.show()
main func
if __name__=="__main__":
a = argparse.ArgumentParser()
a.add_argument("--train_dir", default="train2")
a.add_argument("--val_dir", default="test2")
a.add_argument("--nb_epoch", default=NB_EPOCHS)
a.add_argument("--batch_size", default=BAT_SIZE)
a.add_argument("--output_model_file", default="inceptionv3-ft.model")
a.add_argument("--plot", action="store_true")
a.add_argument("--class_mode", default="binary")
args = a.parse_args()
if args.train_dir is None or args.val_dir is None:
a.print_help()
sys.exit(1)
if args.class_mode != "binary" and args.class_mode != "categorical":
print("set class_mode as 'binary' or 'categorical'")
if args.class_mode == "categorical":
loss_mode = "categorical_crossentropy"
#set class_mode
print("class_mode:{}, loss_mode:{}".format(args.class_mode, loss_mode))
if (not os.path.exists(args.train_dir)) or (not os.path.exists(args.val_dir)):
print("directories do not exist")
sys.exit(1)
train(args)
I had this problem on several tasks when the learning rate was too high. Try something like 0.0001 or even less.
According to the Keras Documentation, the default rate ist 0.001:
keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
See https://keras.io/optimizers/#rmsprop
I found that if I use SDG or Adam optimizer, the accuracy can go up normally. So is there something wrong using RMSprop optimizer with default learning rate=0.001?
I am trying, just for practising with Keras, to train a network to learn a very easy function.
The input of the network is 2Dimensional . The output is one dimensional.
The function can indeed represented with an image, and the same is for the approximate function.
At the moment I'm not looking for any good generalization, I just want that the network is at least good in representing the training set.
Here I place my code:
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
import random as rnd
import math
m = [
[1,1,1,1,0,0,0,0,1,1],
[1,1,0,0,0,0,0,0,1,1],
[1,0,0,0,1,1,0,1,0,0],
[1,0,0,1,0,0,0,0,0,0],
[0,0,0,0,1,1,0,0,0,0],
[0,0,0,0,1,1,0,0,0,0],
[0,0,0,0,0,0,1,0,0,1],
[0,0,1,0,1,1,0,0,0,1],
[1,1,0,0,0,0,0,0,1,1],
[1,1,0,0,0,0,1,1,1,1]] #A representation of the function that I would like to approximize
matrix = np.matrix(m)
evaluation = np.zeros((100,100))
x_train = np.zeros((10000,2))
y_train = np.zeros((10000,1))
for x in range(0,100):
for y in range(0,100):
x_train[x+100*y,0] = x/100. #I normilize the input of the function, between [0,1)
x_train[x+100*y,1] = y/100.
y_train[x+100*y,0] = matrix[int(x/10),int(y/10)] +0.0
#Here I show graphically what I would like to have
plt.matshow(matrix, interpolation='nearest', cmap=plt.cm.ocean, extent=(0,1,0,1))
#Here I built the model
model = Sequential()
model.add(Dense(20, input_dim=2, init='uniform'))
model.add(Activation('tanh'))
model.add(Dense(1, init='uniform'))
model.add(Activation('sigmoid'))
#Here I train it
sgd = SGD(lr=0.5)
model.compile(loss='mean_squared_error', optimizer=sgd)
model.fit(x_train, y_train,
nb_epoch=100,
batch_size=100,
show_accuracy=True)
#Here (I'm not sure), I'm using the network over the given example
x = model.predict(x_train,batch_size=1)
#Here I show the approximated function
print x
print x_train
for i in range(0, 10000):
evaluation[int(x_train[i,0]*100),int(x_train[i,1]*100)] = x[i]
plt.matshow(evaluation, interpolation='nearest', cmap=plt.cm.ocean, extent=(0,1,0,1))
plt.colorbar()
plt.show()
As you can see, the two function are completely different, and I can't understand why.
I think that maybe model.predict doesn't work as I axpect.
Your understanding is correct; it's just a question of hyperparameter tuning.
I just tried your code, and it looks like you're not giving your training enough time:
Look at the loss, under 100 epochs, it's stuck at around 0.23. But try using the 'adam' otimizer instead of SGD, and increase the number of epochs up to 10,000: the loss now decreases down to 0.09 and your picture looks much better.
If it's still not precise enough for you, you may also want to try increasing the number of parameters: just add a few layers; this will make overfitting much easier ! :-)
I have changed just your network structure and added a training dataset. The loss decreases down to 0.01.
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 16 15:26:52 2017
#author: Administrator
"""
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
import random as rnd
import math
from keras.optimizers import Adam,SGD
m = [
[1,1,1,1,0,0,0,0,1,1],
[1,1,0,0,0,0,0,0,1,1],
[1,0,0,0,1,1,0,1,0,0],
[1,0,0,1,0,0,0,0,0,0],
[0,0,0,0,1,1,0,0,0,0],
[0,0,0,0,1,1,0,0,0,0],
[0,0,0,0,0,0,1,0,0,1],
[0,0,1,0,1,1,0,0,0,1],
[1,1,0,0,0,0,0,0,1,1],
[1,1,0,0,0,0,1,1,1,1]] #A representation of the function that I would like to approximize
matrix = np.matrix(m)
evaluation = np.zeros((1000,1000))
x_train = np.zeros((1000000,2))
y_train = np.zeros((1000000,1))
for x in range(0,1000):
for y in range(0,1000):
x_train[x+1000*y,0] = x/1000. #I normilize the input of the function, between [0,1)
x_train[x+1000*y,1] = y/1000.
y_train[x+1000*y,0] = matrix[int(x/100),int(y/100)] +0.0
#Here I show graphically what I would like to have
plt.matshow(matrix, interpolation='nearest', cmap=plt.cm.ocean, extent=(0,1,0,1))
#Here I built the model
model = Sequential()
model.add(Dense(50, input_dim=2, init='uniform'))## init是关键字,’uniform’表示用均匀分布去初始化权重
model.add(Activation('tanh'))
model.add(Dense(20, init='uniform'))
model.add(Activation('tanh'))
model.add(Dense(1, init='uniform'))
model.add(Activation('sigmoid'))
#Here I train it
#sgd = SGD(lr=0.01)
adam = Adam(lr = 0.01)
model.compile(loss='mean_squared_error', optimizer=adam)
model.fit(x_train, y_train,
nb_epoch=100,
batch_size=100,
show_accuracy=True)
#Here (I'm not sure), I'm using the network over the given example
x = model.predict(x_train,batch_size=1)
#Here I show the approximated function
print (x)
print (x_train)
for i in range(0, 1000000):
evaluation[int(x_train[i,0]*1000),int(x_train[i,1]*1000)] = x[i]
plt.matshow(evaluation, interpolation='nearest', cmap=plt.cm.ocean, extent=(0,1,0,1))
plt.colorbar()
plt.show()