Filters are not being learnt in sparse coding - deep-learning

Could you please take a look at the code below? I am trying to implement a simple sparse coding algorithm. I try to visualize the filters at the end, but it seems that filters are not learnt at all.
in the code, phi and weights should be learnt independently. I tried ISTA algorithm to learn phi.
I appreciate it if you could take a look.
Thank you.
import torch
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device is:', device)
# dataset definition
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
mnist_trainset.data = mnist_trainset.data[:10000]
mnist_testset.data = mnist_testset.data[:5000]
from torch.utils.data import DataLoader
train_dl = DataLoader(mnist_trainset, batch_size=32, shuffle=True)
test_dl = DataLoader(mnist_testset, batch_size=1024, shuffle=False)
from numpy import vstack
from sklearn.metrics import accuracy_score
from torch.optim import SGD
from torch.nn import Module
from torch.nn import Linear
from tqdm import tqdm
class MNIST_ISTA(Module):
# define model elements
def __init__(self, n_inputs):
self.lambda_ = 0.5e-5
super(MNIST_ISTA, self).__init__()
# input to first hidden layer
# self.sc = Scattering2D(shape=(28,28), J=2)
# self.view = Vi
self.neurons = 400
self.receptive_field = 10
self.output = Linear(self.neurons, 28*28)
self.phi = None
# forward propagate input
def ista_(self, img_batch):
self.phi = torch.zeros((img_batch.shape[0], 400), requires_grad=True)
converged = False
# optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = torch.optim.SGD([{'params': self.phi, "lr": 0.1e-3},{'params': self.parameters(), "lr": 0.1e-3}])
while not converged:
phi_old = self.phi.clone().detach()
pred = self.output(self.phi)
loss = ((img_batch-pred)**2).sum() + torch.norm(self.phi,p=1)
loss.backward()
optimizer.step()
self.zero_grad()
self.phi.data = self.soft_thresholding_(self.phi, self.lambda_ )
converged = torch.norm(self.phi - phi_old)/torch.norm(phi_old)<1e-1
def soft_thresholding_(self,x, alpha):
with torch.no_grad():
rtn = F.relu(x-alpha)- F.relu(-x-alpha)
return rtn.data
def zero_grad(self):
self.phi.grad.zero_()
self.output.zero_grad()
def forward(self, img_batch):
self.ista_(img_batch)
pred = self.output(self.phi)
return pred
ista_model = MNIST_ISTA(400)
optim = torch.optim.SGD([{'params': ista_model.output.weight, "lr": 0.01}])
for epoch in range(100):
running_loss = 0
c=0
for img_batch in tqdm(train_dl, desc='training', total=len(train_dl)):
img_batch = img_batch[0]
img_batch = img_batch.reshape(img_batch.shape[0], -1)
pred = ista_model(img_batch)
loss = ((img_batch - pred) ** 2).sum()
running_loss += loss.item()
loss.backward()
optim.step()
# zero grad
ista_model.zero_grad()
weight = ista_model.output.weight.data.numpy()
print(weight.shape)
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(20,20))
for i in range(20):
for j in range(20):
ax=fig.add_subplot(20,20,i*20+j+1)
ax.imshow(weight[:,1].reshape((28,28)))
ax.axis('off')
# plt.close(fig)

Related

How to create a CNN model using Pytorch from a csv file with 209 rows and 8 columns?

I am new to deep learning and after searching I could only find examples of CNN models for images only. My dataset is simply a csv file with 209 rows and 8 columns. I can not figure out how to pass input shape to the CNN model for my dataset.
Want a simple CNN model using Pytorch for a csv file with 209 rows and 8 columns.
This is just general code that you will have to adjust based on your needs.
First off, you want to read your csv file into a dataframe, which you will use pandas for:
import pandas as pd
data = pd.read_csv('your_csv_file.csv')
Then you will split the features from the labels:
features = data.iloc[:, :-1].values
labels = data.iloc[:, -1].values
Then you can go ahead and perform some sort of normalization/standardization of the features to have them scaled. StandardScaler from sklearn is a good tool for this:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features = scaler.fit_transform(features)
You will need to have a certain shape. Given your data doing something like:
features = features.reshape(-1, 1, 8)
Now you can define a CNN model, define loss and optimization functions, and train the defined model:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv1d(1, 16, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(32 * 2, 64)
self.fc2 = nn.Linear(64, 1)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1, 32 * 2)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
net = Net()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
for epoch in range(num_epochs):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i % 10 == 9:
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 10))
running_loss = 0.0
I wrote my code like below-
from __future__ import division
import argparse
import torch
from torch.utils import model_zoo
from torch.autograd import Variable
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import models
import utils
import os
import pickle
import pandas as pd
# from Lenet import *
# from Utils import *
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
from data_loader import get_train_test_loader, get_office31_dataloader
from sklearn.utils import resample
import warnings
warnings.filterwarnings("ignore")
import logging
handler=logging.basicConfig(level=logging.INFO)
lgr = logging.getLogger(__name__)
from sklearn.metrics import roc_auc_score, log_loss, roc_auc_score, roc_curve, auc,accuracy_score
from utils import accuracy, Tracker
from torchmetrics.classification import BinaryAccuracy
from sklearn.preprocessing import StandardScaler
########################################################################
fnameand='vectors_Qv_vlen1_updated_location_variance_android.csv'
fnameios='vectors_Qv_vlen1_updated_location_variance_ios.csv'
#figure, ax = plt.subplots()
dfand = pd.read_csv(fnameand, sep=',')
dfios = pd.read_csv(fnameios, sep=',')
# upsampling
dfandupsample = resample(dfand,replace=True,n_samples=len(dfios),random_state=42)
Xs=dfios[["location_variance0","time_spent_moving0","total_distance0","AMS0","unique_locations0","entropy0","normalized_entropy0","time_home0"]]
ys = dfios[['finallabel']]
# changing labels to 1 or 0
ys.loc[ys["finallabel"] == "improved", "finallabel"] = 0
ys.loc[ys["finallabel"] == "nonImproved", "finallabel"] = 1
#ys=np.array(ys).astype("float32")
#dfand = pd.read_csv(fnameand, sep=',')
Xt=dfandupsample[["location_variance0","time_spent_moving0","total_distance0","AMS0","unique_locations0","entropy0","normalized_entropy0","time_home0"]]
yt = dfandupsample[['finallabel']]
# changing labels to 1 or 0
yt.loc[yt["finallabel"] == "improved", "finallabel"] = 0
yt.loc[yt["finallabel"] == "nonImproved", "finallabel"] = 1
#yt=np.array(yt).astype("float32")
trainX, trainY = Xs, ys
targetX,targetY=Xt,yt
print (trainX.shape,trainY.shape,targetX.shape,targetY.shape)# (209, 8) (209, 1) (209, 8) (209, 1)
########################################################################################
######################################################################################
features = trainX.values
labels=trainY.values
scaler = StandardScaler()
features = scaler.fit_transform(features)
features = features.reshape(-1, 1, 8)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv1d(1, 16, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
self.conv2 = nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(32 * 2, 64)
self.fc2 = nn.Linear(64, 1)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(x)
x = F.relu(self.conv2(x))
x = self.pool(x)
x = x.view(-1, 32 * 2)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
net = Net()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
num_epochs=2
for epoch in range(num_epochs):
running_loss = 0.0
for i, data in enumerate(features, 0):
inputs = data
optimizer.zero_grad()
outputs = net(inputs)
But getting error -
TypeError: conv1d() received an invalid combination of arguments - got (numpy.ndarray, Parameter, Parameter, tuple, tuple, tuple, int), but expected one of:
(Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
(Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !Parameter!, !tuple!, !tuple!, !tuple!, int)
How to resolve this?

How can I fix the xception model

I would like to train a model which can seperate different cat's can,but it's seems to be false,and I don't know why the model can't predict correctly.
The dataset we made by ourself,and we took photos from different angles.
I'm not sure which part is wrong.
Would you please help to see how can I get the model?
Here is my code:
import keras.backend
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Input, Dense, GlobalAveragePooling2D, BatchNormalization, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from keras.models import Model
import tensorflow as tf
from keras.applications import Xception
train_path = './train'
test_path = './test'
batch_size = 16
image_size = (224,224)
epoch = 30
FREEZE_LAYERS = 2
model = Xception(include_top=False,
weights='imagenet',
input_shape=(224,224, 3))
x = model.output
x = GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = Dropout(0.5)(x)
predictions = Dense(26, activation='softmax')(x)
model = Model(inputs=model.input, outputs=predictions)
model.compile(optimizer=Adam(lr=0.001),
loss='categorical_crossentropy',
metrics=['accuracy'])
estop = EarlyStopping(monitor='val_loss', patience=10, mode='min', verbose=1)
checkpoint = ModelCheckpoint('Xception_checkpoint.h5', verbose=1,
monitor='val_loss', save_best_only=True,
mode='max')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5,
patience=5, mode='min', verbose=1,
min_lr=1e-4)
train_datagen = ImageDataGenerator(rescale= 1.0/255,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
channel_shift_range=10,
horizontal_flip=True,
fill_mode='nearest')
val_datagen = ImageDataGenerator(rescale= 1.0/255)
train_generator = train_datagen.flow_from_directory(train_path,
target_size=image_size,
class_mode='categorical',
shuffle=True,
batch_size=batch_size)
valid_generator = val_datagen.flow_from_directory(test_path,
target_size=image_size,
class_mode='categorical',
shuffle=False,
batch_size=batch_size)
history = model.fit_generator(train_generator,
epochs=epoch, verbose=1,
steps_per_epoch=train_generator.samples//batch_size,
validation_data=valid_generator,
validation_steps=valid_generator.samples//batch_size,
callbacks=[checkpoint, estop, reduce_lr])
#class_weight=class_weights)
model.save('./Xception_retrained_v2.h5')
print('saved Xception_retrained_v2.h5')

Neural network predicts very poorly though it has high accuracy

I am working on RNN. After training, I got a high accuracy on the test data set. However, when I make a prediction with some external data, it predicts so poorly. Also, I used the same data set, which has over 300,000 texts and 57 classes, on artificial neural networks, it's still predicting very poorly. When I tried the same data set on a machine learning model, it worked fine.
Here is my code:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, LSTM, BatchNormalization
from keras.layers.embeddings import Embedding
from sklearn.model_selection import train_test_split
df = pd.read_excel("data.xlsx", usecols=["X", "y"])
df = df.sample(frac = 1)
X = np.array(df["X"])
y = np.array(df["y"])
le = LabelEncoder()
y = le.fit_transform(y)
y = y.reshape(-1,1)
encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y)
num_words = 100000
token = Tokenizer(num_words=num_words)
token.fit_on_texts(X)
seq = token.texts_to_sequences(X)
X = sequence.pad_sequences(seq, padding = "pre", truncating = "pre")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = Sequential()
model.add(Embedding(num_words, 96, input_length = X.shape[1]))
model.add(LSTM(108, activation='relu', dropout=0.1, recurrent_dropout = 0.2))
model.add(BatchNormalization())
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer="rmsprop", metrics=['accuracy'])
model.summary()
history = model.fit(X_train, y_train, epochs=4, batch_size=64, validation_data = (X_test, y_test))
loss, accuracy = model.evaluate(X_test, y_test)
Here are the history plots of the model:
After doing some research, I have realized that the model was actually working fine. The problem was using Keras Tokenizer wrongly.
At the end of the code, I used the following code:
sentence = ["Example Sentence to Make Prediction."]
token.fit_on_texts(sentence) # <- This row is redundant.
seq = token.texts_to_sequences(sentence)
cx = sequence.pad_sequences(seq, maxlen = X.shape[1])
sx = np.argmax(model.predict(cx), axis=1)
The problem occurs when I want to fit Tokenizer again, on the new data. So, removing that code line solved the problem for me.

LSTM layer returns nan when fed by its own output in PyTorch

I’m trying to generate time-series data with an LSTM and a Mixture Density Network as described in https://arxiv.org/pdf/1308.0850.pdf
Here is a link to my implementation: https://github.com/NeoVand/MDNLSTM
The repository contains a toy dataset to train the network.
On training, the LSTM layer returns nan for its hidden state after one iteration. A similar issue is reported here.
For your convenience, here is the code:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as npr
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
ts = torch.load('LDS_Toy_Data.pt')
def detach(states):
return [state.detach() for state in states]
class MDNLSTM(nn.Module):
def __init__(self, d_obs, d_lat=2, n_gaussians=2, n_layers=1):
super(MDNLSTM, self).__init__()
self.d_obs = d_obs
self.d_lat = d_lat
self.n_gaussians = n_gaussians
self.n_layers = n_layers
self.lstm = nn.LSTM(d_obs, d_lat, n_layers, batch_first=True)
self.fcPi = nn.Linear(d_lat, n_gaussians*d_obs)
self.fcMu = nn.Linear(d_lat, n_gaussians*d_obs)
self.fcSigma = nn.Linear(d_lat, n_gaussians*d_obs)
def get_mixture_coef(self, y):
time_steps = y.size(1)
pi, mu, sigma = self.fcPi(y), self.fcMu(y), self.fcSigma(y)
pi = pi.view(-1, time_steps, self.n_gaussians, self.d_obs)
mu = mu.view(-1, time_steps, self.n_gaussians, self.d_obs)
sigma = sigma.view(-1, time_steps, self.n_gaussians, self.d_obs)
pi = F.softmax(pi, 2)
sigma = torch.exp(sigma)
return pi, mu, sigma
def forward(self, x, h):
y, (h, c) = self.lstm(x, h)
#print(h)
pi, mu, sigma = self.get_mixture_coef(y)
return (pi, mu, sigma), (h, c)
def init_hidden(self, bsz):
return (torch.zeros(self.n_layers, bsz, self.d_lat).to(device),
torch.zeros(self.n_layers, bsz, self.d_lat).to(device))
def mdn_loss_fn(y, pi, mu, sigma):
m = torch.distributions.Normal(loc=mu, scale=sigma)
loss = torch.exp(m.log_prob(y))
loss = torch.sum(loss * pi, dim=2)
loss = -torch.log(loss)
return loss.mean()
def criterion(y, pi, mu, sigma):
y = y.unsqueeze(2)
return mdn_loss_fn(y, pi, mu, sigma)
DOBS = 10
DLAT = 2
INSTS = 100
seqlen = 30
epochs = 200
mdnlstm = MDNLSTM(DOBS, DLAT).to(device)
optimizer = torch.optim.Adam(mdnlstm.parameters())
z = torch.from_numpy(ts[:INSTS,:,:]).float().to(device)
# hiddens=[]
# Train the model
for epoch in range(epochs):
# Set initial hidden and cell states
hidden = mdnlstm.init_hidden(INSTS)
for i in range(0, z.size(1) - seqlen, seqlen):
# Get mini-batch inputs and targets
inputs = z[:, i:i+seqlen, :]
targets = z[:, (i+1):(i+1)+seqlen, :]
hidden = detach(hidden)
# hiddens.append(hidden)
(pi, mu, sigma), hidden = mdnlstm(inputs, hidden)
loss = criterion(targets, pi, mu, sigma)
mdnlstm.zero_grad()
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print ('Epoch [{}/{}], Loss: {:.4f}'
.format(epoch, epochs, loss.item()))
I would appreciate any help on this.
The issue was caused by the log-sum-exp operation not being done in a stable way. Here is an implementation of a weighted log-sum-exp trick that I used and could fix the problem:
def weighted_logsumexp(x,w, dim=None, keepdim=False):
if dim is None:
x, dim = x.view(-1), 0
xm, _ = torch.max(x, dim, keepdim=True)
x = torch.where(
# to prevent nasty nan's
(xm == float('inf')) | (xm == float('-inf')),
xm,
xm + torch.log(torch.sum(torch.exp(x - xm)*w, dim, keepdim=True)))
return x if keepdim else x.squeeze(dim)
and using that implemented the stable loss function:
def mdn_loss_stable(y,pi,mu,sigma):
m = torch.distributions.Normal(loc=mu, scale=sigma)
m_lp_y = m.log_prob(y)
loss = -weighted_logsumexp(m_lp_y,pi,dim=2)
return loss.mean()
This worked like a charm. In general, the problem is that torch won't report under-flows.

Multiple classes in Keras

Thanks in advance to anyone who takes time to answer this. I'm learning Keras and got stuck with a problem where I have 3 classes and the test set accuracy moves up to 0.6667 and then stalls on that exact number for 50 epochs. The accuracy is also way higher than what it should be if it were correct. This worked fine when I only had 2 classes.
What am I doing wrong here?
import pandas as pd
import numpy as np
import keras.utils
#Create train and test data
def create_Xt_Yt(X, y, percentage=0.8):
p = int(len(X) * percentage)
X_train = X[0:p]
Y_train = y[0:p]
X_test = X[p:]
Y_test = y[p:]
return X_train, X_test, Y_train, Y_test
df = pd.read_csv('data.csv', parse_dates=['Date'])
df.set_index(['Date'], inplace=True)
df.drop(['Volume'],1, inplace=True)
df.dropna(inplace=True)
data = df.loc[:, 'AMD-close'].tolist()
window = 30
forecast = 3
forecast_target_long = 1.015
forecast_target_short= 0.985
x_holder = []
y_holder = []
for i in range(len(data)):
try:
x_class = data[i:i+window]
y_class = data[i+window+forecast]
window_last_price = data[i+window]
forecast_price = y_class
if forecast_price > (window_last_price*forecast_target_long):
y_class = [1]
elif forecast_price < (window_last_price*forecast_target_short):
y_class = [-1]
else:
y_class = [0]
y_holder.append(y_class)
x_holder.append(x_class)
except Exception as e:
print(e)
break
normalize = [(np.array(i) - np.mean(i)) / np.std(i) for i in x_holder]
y_holder = keras.utils.to_categorical(y_holder, 3)
x_holder, y_holder = np.array(x_holder), np.array(y_holder)
X_train, X_test, Y_train, Y_test = create_Xt_Yt(x_holder, y_holder)
This is the model:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.normalization import BatchNormalization
from keras.optimizers import RMSprop, Adam, SGD, Nadam
from keras.callbacks import ReduceLROnPlateau
from keras import regularizers
from keras import losses
model = Sequential()
model.add(Dense(64, input_dim=window, activity_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(16, activity_regularizer=regularizers.l2(0.01)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(3))
model.add(Activation('sigmoid'))
reduce_learning_ontop = ReduceLROnPlateau(monitor='val_acc', factor=0.9, patience=25, min_lr=0.000001, verbose=1)
model.compile(Adam(lr=.0001),loss='binary_crossentropy', metrics=['accuracy'])
myModel = model.fit(X_train, Y_train, batch_size=128, epochs=160, verbose=1, shuffle=True, validation_data=(X_test, Y_test))
So two thing here:
Change activation:
model.add(Activation('softmax'))
sigmoid is designed for binary classification - in case of multiclass classification - softmax is the state of the art activation.
Change loss:
model.compile(
Adam(lr=.0001),
loss='categorical_crossentropy', metrics=['accuracy'])
binary_crossentropy is also designed for binary_classification. An equivalent to this is categorical_crossentropy.