How to plot polynomial regression line - regression

I'm having trouble plotting the regression line. Here is the code and my plot so far. I wanted to see the regression line between the points. Appreciate the help. I'm trying to do this in Python. Forgive my code, I'm a newb at this.
data = https://archive.ics.uci.edu/ml/machine-learning-databases/blood-transfusion/
df = pd.DataFrame(data)
x = df.iloc[:, 2:3]
y= df.iloc[:, -1]
x_train = np.array(x[:-20])
x_test = np.array(x[-20:])
y_train = np.array(y[:-20])
y_test = np.array(y[-20:])
for i in range(1,11):
poly_regr = PolynomialFeatures(degree = i)
x_train_poly = poly_regr.fit_transform(x_train)
x_test_poly = poly_regr.fit_transform(x_test)
clf = LinearRegression()
clf.fit(x_train_poly, y_train)
train_pred = clf.predict(x_train_poly)
test_pred = clf.predict(x_test_poly)
train_error = (mean_squared_error(y_train, train_pred))
test_error = (mean_squared_error(y_test, test_pred))
x_axis = np.arange(0,10,.1)
plt.scatter(i, train_error, color='green')
plt.scatter(i, test_error, color='black')
plt.grid(True)
for i in range(1,11):
clf = Lasso(alpha = i)
clf.fit(x_train, y_train)
train_pred = clf.predict(x_train)
test_pred = clf.predict(x_test)
train_error = (mean_squared_error(y_train, train_pred))
test_error = (mean_squared_error(y_test, test_pred))
x_axis = np.arange(0,10)
plt.scatter(i, train_error, color='red')
plt.scatter(i, test_error, color='brown')
enter image description here
I'd like to see the regression line.

Looks like you have two plots, train and test on one plot. I assume that there will be two regression lines for both of them; one for train and one for test.
I suggest you put the errors in a list/array and add the plotting outside the loop. This will help you to use np.polyfit to find the slope and intercept which will help in plotting.
P.S. I used the train_test_split() so the plot looks a bit different
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('transfusion.data', sep=',', names=['R', 'F', 'M', 'T', 'class'])
df = df.drop(0)
df = df.astype('int32')
X = df.drop('class', axis=1)
y = df['class']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
# initialize tr and te arrays for storing training and testing
# accuracies respectively
### Linear Regression ###
tr = []
te = []
for i in range(1,11):
poly_regr = PolynomialFeatures(degree = i)
x_train_poly = poly_regr.fit_transform(X_train)
x_test_poly = poly_regr.fit_transform(X_test)
clf = LinearRegression()
clf.fit(x_train_poly, y_train)
train_pred = clf.predict(x_train_poly)
test_pred = clf.predict(x_test_poly)
train_error = (mean_squared_error(y_train, train_pred))
tr.append(train_error)
test_error = (mean_squared_error(y_test, test_pred))
te.append(test_error)
x_axis = np.arange(1,11)
plt.scatter(x_axis, tr, color='green')
plt.scatter(x_axis, te, color='black')
m, b = np.polyfit(x_axis, tr, 1)
plt.plot(x_axis, m*x_axis+b)
m, b = np.polyfit(x_axis, te, 1)
plt.plot(x_axis, m*x_axis+b)
plt.grid(True)
### Lasso ###
tr = []
te = []
for i in range(1,11):
clf = Lasso(alpha = i)
clf.fit(X_train, y_train)
train_pred = clf.predict(X_train)
test_pred = clf.predict(X_test)
train_error = (mean_squared_error(y_train, train_pred))
tr.append(train_error)
test_error = (mean_squared_error(y_test, test_pred))
te.append(test_error)
x_axis = np.arange(1,11)
plt.scatter(x_axis, tr, color='red')
plt.scatter(x_axis, te, color='brown')
m, b = np.polyfit(x_axis, tr, 1)
plt.plot(x_axis, m*x_axis+b)
m, b = np.polyfit(x_axis, te, 1)
plt.plot(x_axis, m*x_axis+b)
Output for Linear Regression
And output for Lasso

Related

Filters are not being learnt in sparse coding

Could you please take a look at the code below? I am trying to implement a simple sparse coding algorithm. I try to visualize the filters at the end, but it seems that filters are not learnt at all.
in the code, phi and weights should be learnt independently. I tried ISTA algorithm to learn phi.
I appreciate it if you could take a look.
Thank you.
import torch
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device is:', device)
# dataset definition
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
mnist_trainset.data = mnist_trainset.data[:10000]
mnist_testset.data = mnist_testset.data[:5000]
from torch.utils.data import DataLoader
train_dl = DataLoader(mnist_trainset, batch_size=32, shuffle=True)
test_dl = DataLoader(mnist_testset, batch_size=1024, shuffle=False)
from numpy import vstack
from sklearn.metrics import accuracy_score
from torch.optim import SGD
from torch.nn import Module
from torch.nn import Linear
from tqdm import tqdm
class MNIST_ISTA(Module):
# define model elements
def __init__(self, n_inputs):
self.lambda_ = 0.5e-5
super(MNIST_ISTA, self).__init__()
# input to first hidden layer
# self.sc = Scattering2D(shape=(28,28), J=2)
# self.view = Vi
self.neurons = 400
self.receptive_field = 10
self.output = Linear(self.neurons, 28*28)
self.phi = None
# forward propagate input
def ista_(self, img_batch):
self.phi = torch.zeros((img_batch.shape[0], 400), requires_grad=True)
converged = False
# optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = torch.optim.SGD([{'params': self.phi, "lr": 0.1e-3},{'params': self.parameters(), "lr": 0.1e-3}])
while not converged:
phi_old = self.phi.clone().detach()
pred = self.output(self.phi)
loss = ((img_batch-pred)**2).sum() + torch.norm(self.phi,p=1)
loss.backward()
optimizer.step()
self.zero_grad()
self.phi.data = self.soft_thresholding_(self.phi, self.lambda_ )
converged = torch.norm(self.phi - phi_old)/torch.norm(phi_old)<1e-1
def soft_thresholding_(self,x, alpha):
with torch.no_grad():
rtn = F.relu(x-alpha)- F.relu(-x-alpha)
return rtn.data
def zero_grad(self):
self.phi.grad.zero_()
self.output.zero_grad()
def forward(self, img_batch):
self.ista_(img_batch)
pred = self.output(self.phi)
return pred
ista_model = MNIST_ISTA(400)
optim = torch.optim.SGD([{'params': ista_model.output.weight, "lr": 0.01}])
for epoch in range(100):
running_loss = 0
c=0
for img_batch in tqdm(train_dl, desc='training', total=len(train_dl)):
img_batch = img_batch[0]
img_batch = img_batch.reshape(img_batch.shape[0], -1)
pred = ista_model(img_batch)
loss = ((img_batch - pred) ** 2).sum()
running_loss += loss.item()
loss.backward()
optim.step()
# zero grad
ista_model.zero_grad()
weight = ista_model.output.weight.data.numpy()
print(weight.shape)
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(20,20))
for i in range(20):
for j in range(20):
ax=fig.add_subplot(20,20,i*20+j+1)
ax.imshow(weight[:,1].reshape((28,28)))
ax.axis('off')
# plt.close(fig)

Binary DenseNet 121 Classifier only predicting positive with probability >0.5

I borrowed code from this github repo for training of a DenseNet-121 [https://github.com/gaetandi/cheXpert/blob/master/cheXpert_final.ipynb][1]
The github code is for 14 class classification on the CheXpert chest X-ray dataset. I've revised it for binary classification.
# initialize and load the model
pathModel = "/ds2/images/model_ones_2epoch_densenet.tar"#"m-epoch0-07032019-213933.pth.tar"
I initialize the 14 class model so I can use the pretrained weights:
model = DenseNet121(nnClassCount).cuda()
model = torch.nn.DataParallel(model).cuda()
modelCheckpoint = torch.load(pathModel)
model.load_state_dict(modelCheckpoint['state_dict'])
And then convert to binary classification:
nnClassCount = 1
model.module.densenet121.classifier = nn.Sequential(
nn.Linear(1024, nnClassCount),
nn.Sigmoid()
).cuda()
model = torch.nn.DataParallel(model).cuda()
And then train via:
batch, losst, losse = CheXpertTrainer.train(model, dataLoaderTrain, dataLoaderVal, nnClassCount, 100, timestampLaunch, checkpoint = None, weight_path = weight_path)
My training data is laid out in a 2 column csv with column headers ('Path' and 'Class-Positive'), with path locations in the first column and 0 or 1 in the second column. I used oversampling when compiling the training list so paths in the csv are roughly a 50/50 split between 0's and 1's...shuffled.
I use livelossplot to monitor training/validation loss and accuracy. My loss plots look as expected but accuracy plots are flatlined around 0.5 (which makes sense given the 50/50 data if the net is saying its 100% positive or negative). I'm assuming I'm doing something wrong in how I'm doing predictions, but maybe something in the training is incorrect.
For predictions and probabilities I'm running:
varOutput = model(varInput)
_, preds = torch.max(varOutput, 1)
print('varshape: ',varOutput.shape)
probs = torch.sigmoid(varOutput)
*My issue: preds are all coming out as 0 and probs all above 0.5 *
Here is the initial code from github:
import os
import numpy as np
import time
import sys
import csv
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as tfunc
from torch.utils.data import Dataset
from torch.utils.data.dataset import random_split
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from PIL import Image
import torch.nn.functional as func
from sklearn.metrics.ranking import roc_auc_score
import sklearn.metrics as metrics
import random
use_gpu = torch.cuda.is_available()
# Paths to the files with training, and validation sets.
# Each file contains pairs (path to image, output vector)
pathFileTrain = '../CheXpert-v1.0-small/train.csv'
pathFileValid = '../CheXpert-v1.0-small/valid.csv'
# Neural network parameters:
nnIsTrained = False #pre-trained using ImageNet
nnClassCount = 14 #dimension of the output
# Training settings: batch size, maximum number of epochs
trBatchSize = 64
trMaxEpoch = 3
# Parameters related to image transforms: size of the down-scaled image, cropped image
imgtransResize = (320, 320)
imgtransCrop = 224
# Class names
class_names = ['No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity',
'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax',
'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices']
class CheXpertDataSet(Dataset):
def __init__(self, image_list_file, transform=None, policy="ones"):
"""
image_list_file: path to the file containing images with corresponding labels.
transform: optional transform to be applied on a sample.
Upolicy: name the policy with regard to the uncertain labels
"""
image_names = []
labels = []
with open(image_list_file, "r") as f:
csvReader = csv.reader(f)
next(csvReader, None)
k=0
for line in csvReader:
k+=1
image_name= line[0]
label = line[5:]
for i in range(14):
if label[i]:
a = float(label[i])
if a == 1:
label[i] = 1
elif a == -1:
if policy == "ones":
label[i] = 1
elif policy == "zeroes":
label[i] = 0
else:
label[i] = 0
else:
label[i] = 0
else:
label[i] = 0
image_names.append('../' + image_name)
labels.append(label)
self.image_names = image_names
self.labels = labels
self.transform = transform
def __getitem__(self, index):
"""Take the index of item and returns the image and its labels"""
image_name = self.image_names[index]
image = Image.open(image_name).convert('RGB')
label = self.labels[index]
if self.transform is not None:
image = self.transform(image)
return image, torch.FloatTensor(label)
def __len__(self):
return len(self.image_names)
#TRANSFORM DATA
normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
transformList = []
#transformList.append(transforms.Resize(imgtransCrop))
transformList.append(transforms.RandomResizedCrop(imgtransCrop))
transformList.append(transforms.RandomHorizontalFlip())
transformList.append(transforms.ToTensor())
transformList.append(normalize)
transformSequence=transforms.Compose(transformList)
#LOAD DATASET
dataset = CheXpertDataSet(pathFileTrain ,transformSequence, policy="ones")
datasetTest, datasetTrain = random_split(dataset, [500, len(dataset) - 500])
datasetValid = CheXpertDataSet(pathFileValid, transformSequence)
#Problèmes de l'overlapping de patients et du transform identique ?
dataLoaderTrain = DataLoader(dataset=datasetTrain, batch_size=trBatchSize, shuffle=True, num_workers=24, pin_memory=True)
dataLoaderVal = DataLoader(dataset=datasetValid, batch_size=trBatchSize, shuffle=False, num_workers=24, pin_memory=True)
dataLoaderTest = DataLoader(dataset=datasetTest, num_workers=24, pin_memory=True)
class CheXpertTrainer():
def train (model, dataLoaderTrain, dataLoaderVal, nnClassCount, trMaxEpoch, launchTimestamp, checkpoint):
#SETTINGS: OPTIMIZER & SCHEDULER
optimizer = optim.Adam (model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)
#SETTINGS: LOSS
loss = torch.nn.BCELoss(size_average = True)
#LOAD CHECKPOINT
if checkpoint != None and use_gpu:
modelCheckpoint = torch.load(checkpoint)
model.load_state_dict(modelCheckpoint['state_dict'])
optimizer.load_state_dict(modelCheckpoint['optimizer'])
#TRAIN THE NETWORK
lossMIN = 100000
for epochID in range(0, trMaxEpoch):
timestampTime = time.strftime("%H%M%S")
timestampDate = time.strftime("%d%m%Y")
timestampSTART = timestampDate + '-' + timestampTime
batchs, losst, losse = CheXpertTrainer.epochTrain(model, dataLoaderTrain, optimizer, trMaxEpoch, nnClassCount, loss)
lossVal = CheXpertTrainer.epochVal(model, dataLoaderVal, optimizer, trMaxEpoch, nnClassCount, loss)
timestampTime = time.strftime("%H%M%S")
timestampDate = time.strftime("%d%m%Y")
timestampEND = timestampDate + '-' + timestampTime
if lossVal < lossMIN:
lossMIN = lossVal
torch.save({'epoch': epochID + 1, 'state_dict': model.state_dict(), 'best_loss': lossMIN, 'optimizer' : optimizer.state_dict()}, 'm-epoch'+str(epochID)+'-' + launchTimestamp + '.pth.tar')
print ('Epoch [' + str(epochID + 1) + '] [save] [' + timestampEND + '] loss= ' + str(lossVal))
else:
print ('Epoch [' + str(epochID + 1) + '] [----] [' + timestampEND + '] loss= ' + str(lossVal))
return batchs, losst, losse
#--------------------------------------------------------------------------------
def epochTrain(model, dataLoader, optimizer, epochMax, classCount, loss):
batch = []
losstrain = []
losseval = []
model.train()
for batchID, (varInput, target) in enumerate(dataLoaderTrain):
varTarget = target.cuda(non_blocking = True)
#varTarget = target.cuda()
varOutput = model(varInput)
lossvalue = loss(varOutput, varTarget)
optimizer.zero_grad()
lossvalue.backward()
optimizer.step()
l = lossvalue.item()
losstrain.append(l)
if batchID%35==0:
print(batchID//35, "% batches computed")
#Fill three arrays to see the evolution of the loss
batch.append(batchID)
le = CheXpertTrainer.epochVal(model, dataLoaderVal, optimizer, trMaxEpoch, nnClassCount, loss).item()
losseval.append(le)
print(batchID)
print(l)
print(le)
return batch, losstrain, losseval
#--------------------------------------------------------------------------------
def epochVal(model, dataLoader, optimizer, epochMax, classCount, loss):
model.eval()
lossVal = 0
lossValNorm = 0
with torch.no_grad():
for i, (varInput, target) in enumerate(dataLoaderVal):
target = target.cuda(non_blocking = True)
varOutput = model(varInput)
losstensor = loss(varOutput, target)
lossVal += losstensor
lossValNorm += 1
outLoss = lossVal / lossValNorm
return outLoss
#--------------------------------------------------------------------------------
#---- Computes area under ROC curve
#---- dataGT - ground truth data
#---- dataPRED - predicted data
#---- classCount - number of classes
def computeAUROC (dataGT, dataPRED, classCount):
outAUROC = []
datanpGT = dataGT.cpu().numpy()
datanpPRED = dataPRED.cpu().numpy()
for i in range(classCount):
try:
outAUROC.append(roc_auc_score(datanpGT[:, i], datanpPRED[:, i]))
except ValueError:
pass
return outAUROC
#--------------------------------------------------------------------------------
def test(model, dataLoaderTest, nnClassCount, checkpoint, class_names):
cudnn.benchmark = True
if checkpoint != None and use_gpu:
modelCheckpoint = torch.load(checkpoint)
model.load_state_dict(modelCheckpoint['state_dict'])
if use_gpu:
outGT = torch.FloatTensor().cuda()
outPRED = torch.FloatTensor().cuda()
else:
outGT = torch.FloatTensor()
outPRED = torch.FloatTensor()
model.eval()
with torch.no_grad():
for i, (input, target) in enumerate(dataLoaderTest):
target = target.cuda()
outGT = torch.cat((outGT, target), 0).cuda()
bs, c, h, w = input.size()
varInput = input.view(-1, c, h, w)
out = model(varInput)
outPRED = torch.cat((outPRED, out), 0)
aurocIndividual = CheXpertTrainer.computeAUROC(outGT, outPRED, nnClassCount)
aurocMean = np.array(aurocIndividual).mean()
print ('AUROC mean ', aurocMean)
for i in range (0, len(aurocIndividual)):
print (class_names[i], ' ', aurocIndividual[i])
return outGT, outPRED
class DenseNet121(nn.Module):
"""Model modified.
The architecture of our model is the same as standard DenseNet121
except the classifier layer which has an additional sigmoid function.
"""
def __init__(self, out_size):
super(DenseNet121, self).__init__()
self.densenet121 = torchvision.models.densenet121(pretrained=True)
num_ftrs = self.densenet121.classifier.in_features
self.densenet121.classifier = nn.Sequential(
nn.Linear(num_ftrs, out_size),
nn.Sigmoid()
)
def forward(self, x):
x = self.densenet121(x)
return x
# initialize and load the model
model = DenseNet121(nnClassCount).cuda()
model = torch.nn.DataParallel(model).cuda()
timestampTime = time.strftime("%H%M%S")
timestampDate = time.strftime("%d%m%Y")
timestampLaunch = timestampDate + '-' + timestampTime
batch, losst, losse = CheXpertTrainer.train(model, dataLoaderTrain, dataLoaderVal, nnClassCount, trMaxEpoch, timestampLaunch, checkpoint = None)
print("Model trained")
It looks like you have adapted the training correctly for the binary classification, but the prediction wasn't, as you are still trying it as if it were a multi-class prediction.
The output of your model (varOutput) has the size (batch_size, 1), since there is only one class. The maximum across that dimension will always be 0, since that is the only class available, there is no separate class for 1.
This single class represents both cases (0 and 1), so you can consider it is a the probability of it being positive (1). To get the distinct value of either 0 or 1, you simply use a threshold of 0.5, so everything below that receives the class 0 and above that 1. This can be easily done with torch.round.
But you also have another problem, you're applying the sigmoid function twice in a row, once in the classifier nn.Sigmoid() and then afterwards again torch.sigmoid(varOutput). That is problematic, because sigmoid(0) = 0.5, hence all your probabilities are over 0.5.
The output of your model are already the probabilities, the only thing left is to round them:
probs = model(varInput)
# The .squeeze(1) is to get rid of the singular class dimension
preds = torch.round(probs).squeeze(1)

LSTM layer returns nan when fed by its own output in PyTorch

I’m trying to generate time-series data with an LSTM and a Mixture Density Network as described in https://arxiv.org/pdf/1308.0850.pdf
Here is a link to my implementation: https://github.com/NeoVand/MDNLSTM
The repository contains a toy dataset to train the network.
On training, the LSTM layer returns nan for its hidden state after one iteration. A similar issue is reported here.
For your convenience, here is the code:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as npr
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
ts = torch.load('LDS_Toy_Data.pt')
def detach(states):
return [state.detach() for state in states]
class MDNLSTM(nn.Module):
def __init__(self, d_obs, d_lat=2, n_gaussians=2, n_layers=1):
super(MDNLSTM, self).__init__()
self.d_obs = d_obs
self.d_lat = d_lat
self.n_gaussians = n_gaussians
self.n_layers = n_layers
self.lstm = nn.LSTM(d_obs, d_lat, n_layers, batch_first=True)
self.fcPi = nn.Linear(d_lat, n_gaussians*d_obs)
self.fcMu = nn.Linear(d_lat, n_gaussians*d_obs)
self.fcSigma = nn.Linear(d_lat, n_gaussians*d_obs)
def get_mixture_coef(self, y):
time_steps = y.size(1)
pi, mu, sigma = self.fcPi(y), self.fcMu(y), self.fcSigma(y)
pi = pi.view(-1, time_steps, self.n_gaussians, self.d_obs)
mu = mu.view(-1, time_steps, self.n_gaussians, self.d_obs)
sigma = sigma.view(-1, time_steps, self.n_gaussians, self.d_obs)
pi = F.softmax(pi, 2)
sigma = torch.exp(sigma)
return pi, mu, sigma
def forward(self, x, h):
y, (h, c) = self.lstm(x, h)
#print(h)
pi, mu, sigma = self.get_mixture_coef(y)
return (pi, mu, sigma), (h, c)
def init_hidden(self, bsz):
return (torch.zeros(self.n_layers, bsz, self.d_lat).to(device),
torch.zeros(self.n_layers, bsz, self.d_lat).to(device))
def mdn_loss_fn(y, pi, mu, sigma):
m = torch.distributions.Normal(loc=mu, scale=sigma)
loss = torch.exp(m.log_prob(y))
loss = torch.sum(loss * pi, dim=2)
loss = -torch.log(loss)
return loss.mean()
def criterion(y, pi, mu, sigma):
y = y.unsqueeze(2)
return mdn_loss_fn(y, pi, mu, sigma)
DOBS = 10
DLAT = 2
INSTS = 100
seqlen = 30
epochs = 200
mdnlstm = MDNLSTM(DOBS, DLAT).to(device)
optimizer = torch.optim.Adam(mdnlstm.parameters())
z = torch.from_numpy(ts[:INSTS,:,:]).float().to(device)
# hiddens=[]
# Train the model
for epoch in range(epochs):
# Set initial hidden and cell states
hidden = mdnlstm.init_hidden(INSTS)
for i in range(0, z.size(1) - seqlen, seqlen):
# Get mini-batch inputs and targets
inputs = z[:, i:i+seqlen, :]
targets = z[:, (i+1):(i+1)+seqlen, :]
hidden = detach(hidden)
# hiddens.append(hidden)
(pi, mu, sigma), hidden = mdnlstm(inputs, hidden)
loss = criterion(targets, pi, mu, sigma)
mdnlstm.zero_grad()
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print ('Epoch [{}/{}], Loss: {:.4f}'
.format(epoch, epochs, loss.item()))
I would appreciate any help on this.
The issue was caused by the log-sum-exp operation not being done in a stable way. Here is an implementation of a weighted log-sum-exp trick that I used and could fix the problem:
def weighted_logsumexp(x,w, dim=None, keepdim=False):
if dim is None:
x, dim = x.view(-1), 0
xm, _ = torch.max(x, dim, keepdim=True)
x = torch.where(
# to prevent nasty nan's
(xm == float('inf')) | (xm == float('-inf')),
xm,
xm + torch.log(torch.sum(torch.exp(x - xm)*w, dim, keepdim=True)))
return x if keepdim else x.squeeze(dim)
and using that implemented the stable loss function:
def mdn_loss_stable(y,pi,mu,sigma):
m = torch.distributions.Normal(loc=mu, scale=sigma)
m_lp_y = m.log_prob(y)
loss = -weighted_logsumexp(m_lp_y,pi,dim=2)
return loss.mean()
This worked like a charm. In general, the problem is that torch won't report under-flows.

Getting polynomial regression to overfit with TensorFlow

The Sklearn documentation contains an example of a polynomial regression which beautifully illustrates the idea of overfitting (link).
The third plot shows a 15th order polynomial that overfits the simulated data. I replicated this model in TensorFlow, but I cannot get it to overfit.
Even when tuning the learning rate and the numbers of learning epochs, I cannot get the model to overfit. What am I missing?
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
def true_fun(X):
return np.cos(1.5 * np.pi * X)
# Generate dataset
n_samples = 30
np.random.seed(0)
x_train = np.sort(np.random.rand(n_samples)) # Draw from uniform distribution
y_train = true_fun(x_train) + np.random.randn(n_samples) * 0.1
x_test = np.linspace(0, 1, 100)
y_true = true_fun(x_test)
# Helper function
def run_dir(base_dir, dirname='run'):
"Number log directories incrementally"
import os
import re
pattern = re.compile(dirname+'_(\d+)')
try:
previous_runs = os.listdir(base_dir)
except FileNotFoundError:
previous_runs = []
run_number = 0
for name in previous_runs:
match = pattern.search(name)
if match:
number = int(match.group(1))
if number > run_number:
run_number = number
run_number += 1
logdir = os.path.join(base_dir, dirname + '_%02d' % run_number)
return(logdir)
# Define the polynomial model
def model(X, w):
"""Polynomial model
param X: data
param y: coeficients in the polynomial regression
returns: Polynomial function Y(X, w)
"""
terms = []
for i in range(int(w.shape[0])):
term = tf.multiply(w[i], tf.pow(X, i))
terms.append(term)
return(tf.add_n(terms))
# Create the computation graph
order = 15
tf.reset_default_graph()
X = tf.placeholder("float")
Y = tf.placeholder("float")
w = tf.Variable([0.]*order, name="parameters")
lambda_reg = tf.placeholder('float', shape=[])
learning_rate_ph = tf.placeholder('float', shape=[])
y_model = model(X, w)
loss = tf.div(tf.reduce_mean(tf.square(Y-y_model)), 2) # Square error
loss_rg = tf.multiply(lambda_reg, tf.reduce_sum(tf.square(w))) # L2 pentalty
loss_total = tf.add(loss, loss_rg)
loss_hist1 = tf.summary.scalar('loss', loss)
loss_hist2 = tf.summary.scalar('loss_rg', loss_rg)
loss_hist3 = tf.summary.scalar('loss_total', loss_total)
summary = tf.summary.merge([loss_hist1, loss_hist2, loss_hist3])
train_op = tf.train.GradientDescentOptimizer(learning_rate_ph).minimize(loss_total)
init = tf.global_variables_initializer()
def train(sess, x_train, y_train, lambda_val=0, epochs=2000, learning_rate=0.01):
feed_dict={X: x_train, Y: y_train, lambda_reg: lambda_val, learning_rate_ph: learning_rate}
logdir = run_dir("logs/polynomial_regression2/")
writer = tf.summary.FileWriter(logdir)
sess.run(init)
for epoch in range(epochs):
_, summary_str = sess.run([train_op, summary], feed_dict=feed_dict)
writer.add_summary(summary_str, global_step=epoch)
final_cost, final_cost_rg, w_learned = sess.run([loss, loss_rg, w], feed_dict=feed_dict)
return final_cost, final_cost_rg, w_learned
def plot_test(w_learned, x_test, x_train, y_train):
y_learned = calculate_y(x_test, w_learned)
plt.scatter(x_train, y_train)
plt.plot(x_test, y_true, label="true function")
plt.plot(x_test, y_learned,'r', label="learned function")
#plt.title('$\lambda = {:03.2f}$'.format(lambda_values[i]))
plt.ylabel('y')
plt.xlabel('x')
plt.legend()
plt.show()
def calculate_y(x, w):
y = 0
for i in range(w.shape[0]):
y += w[i] * np.power(x, i)
return y
sess = tf.Session()
final_cost, final_cost_rg, w_learned = train(sess, x_train, y_train, lambda_val=0,
learning_rate=0.3, epochs=2000)
sess.close()
plot_test(w_learned, x_test, x_train, y_train)
I have same problem about this. When I do polynomial regression, I also can't overfit the data by using GD in Tensorflow.
Then I compare the coefficients(weights) of the model by using sklearn LinearRegression, I found when the polynomial degree is larger the coefficient of high order is very smaller(i.e. 1e-4), and the low order is relative large(i.e. 0.1).
That's mean when you using GD algorithm for searching the best value of weights, the high order coefficient become extreme sensitive about the value change, and the low order coefficient is not.
And I guess the best coefficient(overfit with data) of low order term is large, and of high order term is tiny. When you set large learning rate, it's impossible to find the right answer, and when you set tiny learning rate, you need lots of iterations.
It's obvious when you using GD algorithm with small data set to make overfit.

EM-Algorithm with Pykalman

I am trying to implement a simpel application of the Kalman Filter using Pykalman, but I am getting an error on the estimation step of the EM-Algorithm that comes with the Pykalman package.
It is a simple linear regression with time-varying coefficient, based on simulated data. The code below simulates the data and starts the kalman filter, but when I try to estimate the parameters based on the observations, using kf.em(Data), it returns the error: ValueError: object arrays are not supported.
Am I doing something wrong with pykalman?
Model and full code below. The error occurs on the last line of the code.
Model (small images)
Description of the problem
State-Space representation of the model
Full Code
import pandas as pd
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt
import pylab as pl
from pykalman import KalmanFilter
# generates the data
Data = pd.DataFrame(columns=['NoiseAR','NoiseReg', 'x', 'beta', 'y'], index=range(1000))
Data['NoiseAR'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
Data['NoiseReg'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
for i in range(1000):
if i==0:
Data.loc[i, 'x'] = Data.loc[i, 'NoiseAR']
else:
Data.loc[i, 'x'] = 0.95*Data.loc[i-1, 'x'] + Data.loc[i, 'NoiseAR']
for i in range(1000):
Data.loc[i, 'beta'] = np.sin(np.radians(i))
Data['y'] = Data['x']*Data['beta'] + Data['NoiseReg']
# set up the kalman filter
F = [1.]
H = Data['x'].values.reshape(1000,1,1)
Q = [2.]
R = [2.]
init_state_mean = [0.]
init_state_cov = [2.]
kf = KalmanFilter(
transition_matrices=F,
observation_matrices=H,
transition_covariance=Q,
observation_covariance=R,
initial_state_mean=init_state_mean,
initial_state_covariance=init_state_cov,
em_vars=['transition_covariance', 'observation_covariance', 'initial_state_mean', 'initial_state_covariance']
)
# estimate the parameters from em_vars using the EM algorithm
kf = kf.em(Data['y'].values)
I figured it out! Data['y'].values is a numpy array with dtype=object. All I had to do is to change the type of the array to float using .astype(float). This has to be done with everything that goes into the kalman filter object of pykalman, so I also had to change the type of the H matrix.
Hope this helps somebody in the future!
Here is what the final working code looks like:
import pandas as pd
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt
import pylab as pl
from pykalman import KalmanFilter
Data = pd.DataFrame(columns=['NoiseAR','NoiseReg', 'x', 'beta', 'y'], index=range(1000))
Data['NoiseAR'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
Data['NoiseReg'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
plt.plot(Data[['NoiseAR','NoiseReg']])
plt.show()
for i in range(1000):
if i == 0:
Data.loc[i, 'x'] = Data.loc[i, 'NoiseAR']
else:
Data.loc[i, 'x'] = 0.95 * Data.loc[i - 1, 'x'] + Data.loc[i, 'NoiseAR']
plt.plot(Data['x'])
plt.show()
for i in range(1000):
Data.loc[i, 'beta'] = np.sin(np.radians(i))
plt.plot(Data['beta'])
plt.show()
Data['y'] = Data['x']*Data['beta'] + Data['NoiseReg']
plt.plot(Data[['x', 'y']])
plt.show()
F = [1.]
H = Data['x'].values.reshape(1000,1,1).astype(float)
Q = [2.]
R = [2.]
init_state_mean = [0.]
init_state_cov = [2.]
kf = KalmanFilter(
transition_matrices=F,
observation_matrices=H,
transition_covariance=Q,
observation_covariance=R,
initial_state_mean=init_state_mean,
initial_state_covariance=init_state_cov,
em_vars=['transition_covariance', 'observation_covariance', 'initial_state_mean', 'initial_state_covariance']
)
kf = kf.em(Data['y'].values.astype(float))
filtered_state_estimates = kf.filter(Data['y'].values.astype(float))[0]
smoothed_state_estimates = kf.smooth(Data['y'].values.astype(float))[0]
pl.figure(figsize=(10, 6))
lines_true = pl.plot(Data['beta'].values, linestyle='-', color='b')
lines_filt = pl.plot(filtered_state_estimates, linestyle='--', color='g')
lines_smooth = pl.plot(smoothed_state_estimates, linestyle='-.', color='r')
pl.legend(
(lines_true[0], lines_filt[0], lines_smooth[0]),
('true', 'filtered', 'smoothed')
)
pl.xlabel('time')
pl.ylabel('state')
pl.show()