Multiple classes in Keras - deep-learning

Thanks in advance to anyone who takes time to answer this. I'm learning Keras and got stuck with a problem where I have 3 classes and the test set accuracy moves up to 0.6667 and then stalls on that exact number for 50 epochs. The accuracy is also way higher than what it should be if it were correct. This worked fine when I only had 2 classes.
What am I doing wrong here?
import pandas as pd
import numpy as np
import keras.utils
#Create train and test data
def create_Xt_Yt(X, y, percentage=0.8):
p = int(len(X) * percentage)
X_train = X[0:p]
Y_train = y[0:p]
X_test = X[p:]
Y_test = y[p:]
return X_train, X_test, Y_train, Y_test
df = pd.read_csv('data.csv', parse_dates=['Date'])
df.set_index(['Date'], inplace=True)
df.drop(['Volume'],1, inplace=True)
df.dropna(inplace=True)
data = df.loc[:, 'AMD-close'].tolist()
window = 30
forecast = 3
forecast_target_long = 1.015
forecast_target_short= 0.985
x_holder = []
y_holder = []
for i in range(len(data)):
try:
x_class = data[i:i+window]
y_class = data[i+window+forecast]
window_last_price = data[i+window]
forecast_price = y_class
if forecast_price > (window_last_price*forecast_target_long):
y_class = [1]
elif forecast_price < (window_last_price*forecast_target_short):
y_class = [-1]
else:
y_class = [0]
y_holder.append(y_class)
x_holder.append(x_class)
except Exception as e:
print(e)
break
normalize = [(np.array(i) - np.mean(i)) / np.std(i) for i in x_holder]
y_holder = keras.utils.to_categorical(y_holder, 3)
x_holder, y_holder = np.array(x_holder), np.array(y_holder)
X_train, X_test, Y_train, Y_test = create_Xt_Yt(x_holder, y_holder)
This is the model:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.normalization import BatchNormalization
from keras.optimizers import RMSprop, Adam, SGD, Nadam
from keras.callbacks import ReduceLROnPlateau
from keras import regularizers
from keras import losses
model = Sequential()
model.add(Dense(64, input_dim=window, activity_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activity_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(3))
model.add(Activation('sigmoid'))
reduce_learning_ontop = ReduceLROnPlateau(monitor='val_acc', factor=0.9, patience=25, min_lr=0.000001, verbose=1)
model.compile(Adam(lr=.0001),loss='binary_crossentropy', metrics=['accuracy'])
myModel = model.fit(X_train, Y_train, batch_size=128, epochs=160, verbose=1, shuffle=True, validation_data=(X_test, Y_test))

So two thing here:
Change activation:
model.add(Activation('softmax'))
sigmoid is designed for binary classification - in case of multiclass classification - softmax is the state of the art activation.
Change loss:
model.compile(
Adam(lr=.0001),
loss='categorical_crossentropy', metrics=['accuracy'])
binary_crossentropy is also designed for binary_classification. An equivalent to this is categorical_crossentropy.

Related

How to create a CNN model using Pytorch from a csv file with 209 rows and 8 columns?

I am new to deep learning and after searching I could only find examples of CNN models for images only. My dataset is simply a csv file with 209 rows and 8 columns. I can not figure out how to pass input shape to the CNN model for my dataset.
Want a simple CNN model using Pytorch for a csv file with 209 rows and 8 columns.
This is just general code that you will have to adjust based on your needs.
First off, you want to read your csv file into a dataframe, which you will use pandas for:
import pandas as pd
data = pd.read_csv('your_csv_file.csv')
Then you will split the features from the labels:
features = data.iloc[:, :-1].values
labels = data.iloc[:, -1].values
Then you can go ahead and perform some sort of normalization/standardization of the features to have them scaled. StandardScaler from sklearn is a good tool for this:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features = scaler.fit_transform(features)
You will need to have a certain shape. Given your data doing something like:
features = features.reshape(-1, 1, 8)
Now you can define a CNN model, define loss and optimization functions, and train the defined model:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv1d(1, 16, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(32 * 2, 64)
self.fc2 = nn.Linear(64, 1)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1, 32 * 2)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
net = Net()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for epoch in range(num_epochs):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 10 == 9:
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 10))
running_loss = 0.0
I wrote my code like below-
from __future__ import division
import argparse
import torch
from torch.utils import model_zoo
from torch.autograd import Variable
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import models
import utils
import os
import pickle
import pandas as pd
# from Lenet import *
# from Utils import *
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
from data_loader import get_train_test_loader, get_office31_dataloader
from sklearn.utils import resample
import warnings
warnings.filterwarnings("ignore")
import logging
handler=logging.basicConfig(level=logging.INFO)
lgr = logging.getLogger(__name__)
from sklearn.metrics import roc_auc_score, log_loss, roc_auc_score, roc_curve, auc,accuracy_score
from utils import accuracy, Tracker
from torchmetrics.classification import BinaryAccuracy
from sklearn.preprocessing import StandardScaler
########################################################################
fnameand='vectors_Qv_vlen1_updated_location_variance_android.csv'
fnameios='vectors_Qv_vlen1_updated_location_variance_ios.csv'
#figure, ax = plt.subplots()
dfand = pd.read_csv(fnameand, sep=',')
dfios = pd.read_csv(fnameios, sep=',')
# upsampling
dfandupsample = resample(dfand,replace=True,n_samples=len(dfios),random_state=42)
Xs=dfios[["location_variance0","time_spent_moving0","total_distance0","AMS0","unique_locations0","entropy0","normalized_entropy0","time_home0"]]
ys = dfios[['finallabel']]
# changing labels to 1 or 0
ys.loc[ys["finallabel"] == "improved", "finallabel"] = 0
ys.loc[ys["finallabel"] == "nonImproved", "finallabel"] = 1
#ys=np.array(ys).astype("float32")
#dfand = pd.read_csv(fnameand, sep=',')
Xt=dfandupsample[["location_variance0","time_spent_moving0","total_distance0","AMS0","unique_locations0","entropy0","normalized_entropy0","time_home0"]]
yt = dfandupsample[['finallabel']]
# changing labels to 1 or 0
yt.loc[yt["finallabel"] == "improved", "finallabel"] = 0
yt.loc[yt["finallabel"] == "nonImproved", "finallabel"] = 1
#yt=np.array(yt).astype("float32")
trainX, trainY = Xs, ys
targetX,targetY=Xt,yt
print (trainX.shape,trainY.shape,targetX.shape,targetY.shape)# (209, 8) (209, 1) (209, 8) (209, 1)
########################################################################################
######################################################################################
features = trainX.values
labels=trainY.values
scaler = StandardScaler()
features = scaler.fit_transform(features)
features = features.reshape(-1, 1, 8)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv1d(1, 16, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(32 * 2, 64)
self.fc2 = nn.Linear(64, 1)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1, 32 * 2)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
net = Net()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
num_epochs=2
for epoch in range(num_epochs):
running_loss = 0.0
for i, data in enumerate(features, 0):
inputs = data
optimizer.zero_grad()
outputs = net(inputs)
But getting error -
TypeError: conv1d() received an invalid combination of arguments - got (numpy.ndarray, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
(Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
(Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
How to resolve this?

How can I fix the xception model

I would like to train a model which can seperate different cat's can,but it's seems to be false,and I don't know why the model can't predict correctly.
The dataset we made by ourself,and we took photos from different angles.
I'm not sure which part is wrong.
Would you please help to see how can I get the model?
Here is my code:
import keras.backend
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Input, Dense, GlobalAveragePooling2D, BatchNormalization, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from keras.models import Model
import tensorflow as tf
from keras.applications import Xception
train_path = './train'
test_path = './test'
batch_size = 16
image_size = (224,224)
epoch = 30
FREEZE_LAYERS = 2
model = Xception(include_top=False,
weights='imagenet',
input_shape=(224,224, 3))
x = model.output
x = GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = Dropout(0.5)(x)
predictions = Dense(26, activation='softmax')(x)
model = Model(inputs=model.input, outputs=predictions)
model.compile(optimizer=Adam(lr=0.001),
loss='categorical_crossentropy',
metrics=['accuracy'])
estop = EarlyStopping(monitor='val_loss', patience=10, mode='min', verbose=1)
checkpoint = ModelCheckpoint('Xception_checkpoint.h5', verbose=1,
monitor='val_loss', save_best_only=True,
mode='max')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,
patience=5, mode='min', verbose=1,
min_lr=1e-4)
train_datagen = ImageDataGenerator(rescale= 1.0/255,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
channel_shift_range=10,
horizontal_flip=True,
fill_mode='nearest')
val_datagen = ImageDataGenerator(rescale= 1.0/255)
train_generator = train_datagen.flow_from_directory(train_path,
target_size=image_size,
class_mode='categorical',
shuffle=True,
batch_size=batch_size)
valid_generator = val_datagen.flow_from_directory(test_path,
target_size=image_size,
class_mode='categorical',
shuffle=False,
batch_size=batch_size)
history = model.fit_generator(train_generator,
epochs=epoch, verbose=1,
steps_per_epoch=train_generator.samples//batch_size,
validation_data=valid_generator,
validation_steps=valid_generator.samples//batch_size,
callbacks=[checkpoint, estop, reduce_lr])
#class_weight=class_weights)
model.save('./Xception_retrained_v2.h5')
print('saved Xception_retrained_v2.h5')

DeepSHAP beeswarm plot only shows blue dots and does not have a color gradient to show feature values

I tried running an example on SHAP Deep Explainer from this link using this Titanic dataset. This is the code from the example:
# import package
import shap
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import optimizers
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import os
# load data
os.chdir('/titanic/')
train_data = pd.read_csv('./train.csv', index_col=0)
test_data = pd.read_csv('./test.csv', index_col=0)
train_data.head()
def data_preprocessing(df):
df = df.drop(columns=['Name', 'Ticket', 'Cabin'])
# fill na
df[['Age']] = df[['Age']].fillna(value=df[['Age']].mean())
df[['Embarked']] = df[['Embarked']].fillna(value=df['Embarked'].value_counts().idxmax())
df[['Fare']] = df[['Fare']].fillna(value=df[['Fare']].mean())
# categorical features into numeric
df['Sex'] = df['Sex'].map( {'female': 1, 'male': 0} ).astype(int)
# one-hot encoding
embarked_one_hot = pd.get_dummies(df['Embarked'], prefix='Embarked')
df = df.drop('Embarked', axis=1)
df = df.join(embarked_one_hot)
return df
# train data processing
train_data = data_preprocessing(train_data)
train_data.isnull().sum()
# create data for training
x_train = train_data.drop(['Survived'], axis=1).values
# Check test data
test_data.isnull().sum()
# scale
scale = StandardScaler()
x_train = scale.fit_transform(x_train)
# prepare y_train
y_train = train_data['Survived'].values
test_data = data_preprocessing(test_data)
x_test = test_data.values.astype(float)
# scaling
x_test = scale.transform(x_test)
# Check test data
test_data.isnull().sum()
# build mlp
model = Sequential()
model.add(Dense(32, input_dim=x_train.shape[1], activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(8, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(2, activation='softmax'))
# compile model
model.compile(loss='sparse_categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
# fit model
model.fit(x_train, y_train, epochs=10, batch_size=64)
# compute SHAP values
explainer = shap.DeepExplainer(model, x_train)
shap_values = explainer.shap_values(x_test)
shap.summary_plot(shap_values[0], plot_type = 'bar', feature_names = test_data.columns)
shap.initjs()
shap.force_plot(explainer.expected_value[0].numpy(), shap_values[0][0], features = test_data.columns)
shap.decision_plot(explainer.expected_value[0].numpy(), shap_values[0][0], features = test_data.iloc[0,:], feature_names = test_data.columns.tolist())
shap.plots._waterfall.waterfall_legacy(explainer.expected_value[0].numpy(), shap_values[0][0], feature_names = test_data.columns)
There is no code for generating a beeswarm plot in the example, but I used
shap.summary_plot(shap_values[0], feature_names = test_data.columns)
and got a beeswarm plot. From my understanding, the color of the dots displays the original value of each feature, falling along a gradient of blue to red. However, the plot I got only has blue dots and doesn't have a gradient ruler on the side.
Here is the plot I got:
And here is what I expected (photo from https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/beeswarm.html):
Any suggestions on what could have caused this and what I can do to get the colors would be greatly appreciated. Thank you!

Filters are not being learnt in sparse coding

Could you please take a look at the code below? I am trying to implement a simple sparse coding algorithm. I try to visualize the filters at the end, but it seems that filters are not learnt at all.
in the code, phi and weights should be learnt independently. I tried ISTA algorithm to learn phi.
I appreciate it if you could take a look.
Thank you.
import torch
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device is:', device)
# dataset definition
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
mnist_trainset.data = mnist_trainset.data[:10000]
mnist_testset.data = mnist_testset.data[:5000]
from torch.utils.data import DataLoader
train_dl = DataLoader(mnist_trainset, batch_size=32, shuffle=True)
test_dl = DataLoader(mnist_testset, batch_size=1024, shuffle=False)
from numpy import vstack
from sklearn.metrics import accuracy_score
from torch.optim import SGD
from torch.nn import Module
from torch.nn import Linear
from tqdm import tqdm
class MNIST_ISTA(Module):
# define model elements
def __init__(self, n_inputs):
self.lambda_ = 0.5e-5
super(MNIST_ISTA, self).__init__()
# input to first hidden layer
# self.sc = Scattering2D(shape=(28,28), J=2)
# self.view = Vi
self.neurons = 400
self.receptive_field = 10
self.output = Linear(self.neurons, 28*28)
self.phi = None
# forward propagate input
def ista_(self, img_batch):
self.phi = torch.zeros((img_batch.shape[0], 400), requires_grad=True)
converged = False
# optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = torch.optim.SGD([{'params': self.phi, "lr": 0.1e-3},{'params': self.parameters(), "lr": 0.1e-3}])
while not converged:
phi_old = self.phi.clone().detach()
pred = self.output(self.phi)
loss = ((img_batch-pred)**2).sum() + torch.norm(self.phi,p=1)
loss.backward()
optimizer.step()
self.zero_grad()
self.phi.data = self.soft_thresholding_(self.phi, self.lambda_ )
converged = torch.norm(self.phi - phi_old)/torch.norm(phi_old)<1e-1
def soft_thresholding_(self,x, alpha):
with torch.no_grad():
rtn = F.relu(x-alpha)- F.relu(-x-alpha)
return rtn.data
def zero_grad(self):
self.phi.grad.zero_()
self.output.zero_grad()
def forward(self, img_batch):
self.ista_(img_batch)
pred = self.output(self.phi)
return pred
ista_model = MNIST_ISTA(400)
optim = torch.optim.SGD([{'params': ista_model.output.weight, "lr": 0.01}])
for epoch in range(100):
running_loss = 0
c=0
for img_batch in tqdm(train_dl, desc='training', total=len(train_dl)):
img_batch = img_batch[0]
img_batch = img_batch.reshape(img_batch.shape[0], -1)
pred = ista_model(img_batch)
loss = ((img_batch - pred) ** 2).sum()
running_loss += loss.item()
loss.backward()
optim.step()
# zero grad
ista_model.zero_grad()
weight = ista_model.output.weight.data.numpy()
print(weight.shape)
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(20,20))
for i in range(20):
for j in range(20):
ax=fig.add_subplot(20,20,i*20+j+1)
ax.imshow(weight[:,1].reshape((28,28)))
ax.axis('off')
# plt.close(fig)

Neural network predicts very poorly though it has high accuracy

I am working on RNN. After training, I got a high accuracy on the test data set. However, when I make a prediction with some external data, it predicts so poorly. Also, I used the same data set, which has over 300,000 texts and 57 classes, on artificial neural networks, it's still predicting very poorly. When I tried the same data set on a machine learning model, it worked fine.
Here is my code:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, LSTM, BatchNormalization
from keras.layers.embeddings import Embedding
from sklearn.model_selection import train_test_split
df = pd.read_excel("data.xlsx", usecols=["X", "y"])
df = df.sample(frac = 1)
X = np.array(df["X"])
y = np.array(df["y"])
le = LabelEncoder()
y = le.fit_transform(y)
y = y.reshape(-1,1)
encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y)
num_words = 100000
token = Tokenizer(num_words=num_words)
token.fit_on_texts(X)
seq = token.texts_to_sequences(X)
X = sequence.pad_sequences(seq, padding = "pre", truncating = "pre")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = Sequential()
model.add(Embedding(num_words, 96, input_length = X.shape[1]))
model.add(LSTM(108, activation='relu', dropout=0.1, recurrent_dropout = 0.2))
model.add(BatchNormalization())
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
model.summary()
history = model.fit(X_train, y_train, epochs=4, batch_size=64, validation_data = (X_test, y_test))
loss, accuracy = model.evaluate(X_test, y_test)
Here are the history plots of the model:
After doing some research, I have realized that the model was actually working fine. The problem was using Keras Tokenizer wrongly.
At the end of the code, I used the following code:
sentence = ["Example Sentence to Make Prediction."]
token.fit_on_texts(sentence) # <- This row is redundant.
seq = token.texts_to_sequences(sentence)
cx = sequence.pad_sequences(seq, maxlen = X.shape[1])
sx = np.argmax(model.predict(cx), axis=1)
The problem occurs when I want to fit Tokenizer again, on the new data. So, removing that code line solved the problem for me.