EM-Algorithm with Pykalman - kalman-filter

I am trying to implement a simpel application of the Kalman Filter using Pykalman, but I am getting an error on the estimation step of the EM-Algorithm that comes with the Pykalman package.
It is a simple linear regression with time-varying coefficient, based on simulated data. The code below simulates the data and starts the kalman filter, but when I try to estimate the parameters based on the observations, using kf.em(Data), it returns the error: ValueError: object arrays are not supported.
Am I doing something wrong with pykalman?
Model and full code below. The error occurs on the last line of the code.
Model (small images)
Description of the problem
State-Space representation of the model
Full Code
import pandas as pd
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt
import pylab as pl
from pykalman import KalmanFilter
# generates the data
Data = pd.DataFrame(columns=['NoiseAR','NoiseReg', 'x', 'beta', 'y'], index=range(1000))
Data['NoiseAR'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
Data['NoiseReg'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
for i in range(1000):
if i==0:
Data.loc[i, 'x'] = Data.loc[i, 'NoiseAR']
else:
Data.loc[i, 'x'] = 0.95*Data.loc[i-1, 'x'] + Data.loc[i, 'NoiseAR']
for i in range(1000):
Data.loc[i, 'beta'] = np.sin(np.radians(i))
Data['y'] = Data['x']*Data['beta'] + Data['NoiseReg']
# set up the kalman filter
F = [1.]
H = Data['x'].values.reshape(1000,1,1)
Q = [2.]
R = [2.]
init_state_mean = [0.]
init_state_cov = [2.]
kf = KalmanFilter(
transition_matrices=F,
observation_matrices=H,
transition_covariance=Q,
observation_covariance=R,
initial_state_mean=init_state_mean,
initial_state_covariance=init_state_cov,
em_vars=['transition_covariance', 'observation_covariance', 'initial_state_mean', 'initial_state_covariance']
)
# estimate the parameters from em_vars using the EM algorithm
kf = kf.em(Data['y'].values)

I figured it out! Data['y'].values is a numpy array with dtype=object. All I had to do is to change the type of the array to float using .astype(float). This has to be done with everything that goes into the kalman filter object of pykalman, so I also had to change the type of the H matrix.
Hope this helps somebody in the future!
Here is what the final working code looks like:
import pandas as pd
import scipy as sp
import numpy as np
import matplotlib.pyplot as plt
import pylab as pl
from pykalman import KalmanFilter
Data = pd.DataFrame(columns=['NoiseAR','NoiseReg', 'x', 'beta', 'y'], index=range(1000))
Data['NoiseAR'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
Data['NoiseReg'] = np.random.normal(loc=0.0, scale=1.0, size=1000)
plt.plot(Data[['NoiseAR','NoiseReg']])
plt.show()
for i in range(1000):
if i == 0:
Data.loc[i, 'x'] = Data.loc[i, 'NoiseAR']
else:
Data.loc[i, 'x'] = 0.95 * Data.loc[i - 1, 'x'] + Data.loc[i, 'NoiseAR']
plt.plot(Data['x'])
plt.show()
for i in range(1000):
Data.loc[i, 'beta'] = np.sin(np.radians(i))
plt.plot(Data['beta'])
plt.show()
Data['y'] = Data['x']*Data['beta'] + Data['NoiseReg']
plt.plot(Data[['x', 'y']])
plt.show()
F = [1.]
H = Data['x'].values.reshape(1000,1,1).astype(float)
Q = [2.]
R = [2.]
init_state_mean = [0.]
init_state_cov = [2.]
kf = KalmanFilter(
transition_matrices=F,
observation_matrices=H,
transition_covariance=Q,
observation_covariance=R,
initial_state_mean=init_state_mean,
initial_state_covariance=init_state_cov,
em_vars=['transition_covariance', 'observation_covariance', 'initial_state_mean', 'initial_state_covariance']
)
kf = kf.em(Data['y'].values.astype(float))
filtered_state_estimates = kf.filter(Data['y'].values.astype(float))[0]
smoothed_state_estimates = kf.smooth(Data['y'].values.astype(float))[0]
pl.figure(figsize=(10, 6))
lines_true = pl.plot(Data['beta'].values, linestyle='-', color='b')
lines_filt = pl.plot(filtered_state_estimates, linestyle='--', color='g')
lines_smooth = pl.plot(smoothed_state_estimates, linestyle='-.', color='r')
pl.legend(
(lines_true[0], lines_filt[0], lines_smooth[0]),
('true', 'filtered', 'smoothed')
)
pl.xlabel('time')
pl.ylabel('state')
pl.show()

Related

Differentiation with GradientTape of Tensorflow in 3 dimensions

I am studying PDE using PINN and constructed a code with Tensorflow.
Morespecific, I deal with Heat equation in 3-dimensional spaces (2 dim spaces + 1 dim time).
However, the results are not very good.
I guess there is a problem with the twice derivative.
Here, I attached my code.
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input
from tensorflow.keras import layers
NN = Sequential()
NN.add(Input((3,)))
NN.add(Dense(units = 36, activation = 'tanh'))
NN.add(Dense(units = 60, activation = 'tanh'))
NN.add(Dense(units = 1))
def dim3_neural_network(self, neural_network, train):
with tf.GradientTape(persistent=True) as tape2:
with tf.GradientTape(persistent=True) as tape1:
x = tf.Variable(train[0], trainable=True)
y = tf.Variable(train[1], trainable=True)
z = tf.Variable(train[2], trainable=True)
u = tf.transpose(neural_network(tf.transpose(tf.stack([x, y, z], axis=0))))
du_dx = tape1.gradient(u, x, unconnected_gradients=tf.UnconnectedGradients.ZERO)
du_dy = tape1.gradient(u, y, unconnected_gradients=tf.UnconnectedGradients.ZERO)
du_dz = tape1.gradient(u, z, unconnected_gradients=tf.UnconnectedGradients.ZERO)
d2u_dxx = tape2.gradient(du_dx, x, unconnected_gradients=tf.UnconnectedGradients.ZERO)
d2u_dyy = tape2.gradient(du_dy, y, unconnected_gradients=tf.UnconnectedGradients.ZERO)
d2u_dzz = tape2.gradient(du_dz, z, unconnected_gradients=tf.UnconnectedGradients.ZERO)
result = tf.convert_to_tensor([x.numpy(), y.numpy(), z.numpy(), u.numpy(),
du_dx.numpy(), du_dy.numpy(), du_dz.numpy(),
d2u_dxx.numpy(), d2u_dyy.numpy(), d2u_dzz.numpy()],
dtype=tf.float32)
Since my code is too long, I did not attacted the whole code.
However, you can see the whole code in my GitHub.
Whole Code
I am expecting to discover any problem.
Thanks.
Tensorflow verision : 2.10.0
Python version : 3.8.8

How to plot polynomial regression line

I'm having trouble plotting the regression line. Here is the code and my plot so far. I wanted to see the regression line between the points. Appreciate the help. I'm trying to do this in Python. Forgive my code, I'm a newb at this.
data = https://archive.ics.uci.edu/ml/machine-learning-databases/blood-transfusion/
df = pd.DataFrame(data)
x = df.iloc[:, 2:3]
y= df.iloc[:, -1]
x_train = np.array(x[:-20])
x_test = np.array(x[-20:])
y_train = np.array(y[:-20])
y_test = np.array(y[-20:])
for i in range(1,11):
poly_regr = PolynomialFeatures(degree = i)
x_train_poly = poly_regr.fit_transform(x_train)
x_test_poly = poly_regr.fit_transform(x_test)
clf = LinearRegression()
clf.fit(x_train_poly, y_train)
train_pred = clf.predict(x_train_poly)
test_pred = clf.predict(x_test_poly)
train_error = (mean_squared_error(y_train, train_pred))
test_error = (mean_squared_error(y_test, test_pred))
x_axis = np.arange(0,10,.1)
plt.scatter(i, train_error, color='green')
plt.scatter(i, test_error, color='black')
plt.grid(True)
for i in range(1,11):
clf = Lasso(alpha = i)
clf.fit(x_train, y_train)
train_pred = clf.predict(x_train)
test_pred = clf.predict(x_test)
train_error = (mean_squared_error(y_train, train_pred))
test_error = (mean_squared_error(y_test, test_pred))
x_axis = np.arange(0,10)
plt.scatter(i, train_error, color='red')
plt.scatter(i, test_error, color='brown')
enter image description here
I'd like to see the regression line.
Looks like you have two plots, train and test on one plot. I assume that there will be two regression lines for both of them; one for train and one for test.
I suggest you put the errors in a list/array and add the plotting outside the loop. This will help you to use np.polyfit to find the slope and intercept which will help in plotting.
P.S. I used the train_test_split() so the plot looks a bit different
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
df = pd.read_csv('transfusion.data', sep=',', names=['R', 'F', 'M', 'T', 'class'])
df = df.drop(0)
df = df.astype('int32')
X = df.drop('class', axis=1)
y = df['class']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
# initialize tr and te arrays for storing training and testing
# accuracies respectively
### Linear Regression ###
tr = []
te = []
for i in range(1,11):
poly_regr = PolynomialFeatures(degree = i)
x_train_poly = poly_regr.fit_transform(X_train)
x_test_poly = poly_regr.fit_transform(X_test)
clf = LinearRegression()
clf.fit(x_train_poly, y_train)
train_pred = clf.predict(x_train_poly)
test_pred = clf.predict(x_test_poly)
train_error = (mean_squared_error(y_train, train_pred))
tr.append(train_error)
test_error = (mean_squared_error(y_test, test_pred))
te.append(test_error)
x_axis = np.arange(1,11)
plt.scatter(x_axis, tr, color='green')
plt.scatter(x_axis, te, color='black')
m, b = np.polyfit(x_axis, tr, 1)
plt.plot(x_axis, m*x_axis+b)
m, b = np.polyfit(x_axis, te, 1)
plt.plot(x_axis, m*x_axis+b)
plt.grid(True)
### Lasso ###
tr = []
te = []
for i in range(1,11):
clf = Lasso(alpha = i)
clf.fit(X_train, y_train)
train_pred = clf.predict(X_train)
test_pred = clf.predict(X_test)
train_error = (mean_squared_error(y_train, train_pred))
tr.append(train_error)
test_error = (mean_squared_error(y_test, test_pred))
te.append(test_error)
x_axis = np.arange(1,11)
plt.scatter(x_axis, tr, color='red')
plt.scatter(x_axis, te, color='brown')
m, b = np.polyfit(x_axis, tr, 1)
plt.plot(x_axis, m*x_axis+b)
m, b = np.polyfit(x_axis, te, 1)
plt.plot(x_axis, m*x_axis+b)
Output for Linear Regression
And output for Lasso

Filters are not being learnt in sparse coding

Could you please take a look at the code below? I am trying to implement a simple sparse coding algorithm. I try to visualize the filters at the end, but it seems that filters are not learnt at all.
in the code, phi and weights should be learnt independently. I tried ISTA algorithm to learn phi.
I appreciate it if you could take a look.
Thank you.
import torch
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device is:', device)
# dataset definition
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
mnist_trainset.data = mnist_trainset.data[:10000]
mnist_testset.data = mnist_testset.data[:5000]
from torch.utils.data import DataLoader
train_dl = DataLoader(mnist_trainset, batch_size=32, shuffle=True)
test_dl = DataLoader(mnist_testset, batch_size=1024, shuffle=False)
from numpy import vstack
from sklearn.metrics import accuracy_score
from torch.optim import SGD
from torch.nn import Module
from torch.nn import Linear
from tqdm import tqdm
class MNIST_ISTA(Module):
# define model elements
def __init__(self, n_inputs):
self.lambda_ = 0.5e-5
super(MNIST_ISTA, self).__init__()
# input to first hidden layer
# self.sc = Scattering2D(shape=(28,28), J=2)
# self.view = Vi
self.neurons = 400
self.receptive_field = 10
self.output = Linear(self.neurons, 28*28)
self.phi = None
# forward propagate input
def ista_(self, img_batch):
self.phi = torch.zeros((img_batch.shape[0], 400), requires_grad=True)
converged = False
# optimizer = SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = torch.optim.SGD([{'params': self.phi, "lr": 0.1e-3},{'params': self.parameters(), "lr": 0.1e-3}])
while not converged:
phi_old = self.phi.clone().detach()
pred = self.output(self.phi)
loss = ((img_batch-pred)**2).sum() + torch.norm(self.phi,p=1)
loss.backward()
optimizer.step()
self.zero_grad()
self.phi.data = self.soft_thresholding_(self.phi, self.lambda_ )
converged = torch.norm(self.phi - phi_old)/torch.norm(phi_old)<1e-1
def soft_thresholding_(self,x, alpha):
with torch.no_grad():
rtn = F.relu(x-alpha)- F.relu(-x-alpha)
return rtn.data
def zero_grad(self):
self.phi.grad.zero_()
self.output.zero_grad()
def forward(self, img_batch):
self.ista_(img_batch)
pred = self.output(self.phi)
return pred
ista_model = MNIST_ISTA(400)
optim = torch.optim.SGD([{'params': ista_model.output.weight, "lr": 0.01}])
for epoch in range(100):
running_loss = 0
c=0
for img_batch in tqdm(train_dl, desc='training', total=len(train_dl)):
img_batch = img_batch[0]
img_batch = img_batch.reshape(img_batch.shape[0], -1)
pred = ista_model(img_batch)
loss = ((img_batch - pred) ** 2).sum()
running_loss += loss.item()
loss.backward()
optim.step()
# zero grad
ista_model.zero_grad()
weight = ista_model.output.weight.data.numpy()
print(weight.shape)
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(20,20))
for i in range(20):
for j in range(20):
ax=fig.add_subplot(20,20,i*20+j+1)
ax.imshow(weight[:,1].reshape((28,28)))
ax.axis('off')
# plt.close(fig)

Binary DenseNet 121 Classifier only predicting positive with probability >0.5

I borrowed code from this github repo for training of a DenseNet-121 [https://github.com/gaetandi/cheXpert/blob/master/cheXpert_final.ipynb][1]
The github code is for 14 class classification on the CheXpert chest X-ray dataset. I've revised it for binary classification.
# initialize and load the model
pathModel = "/ds2/images/model_ones_2epoch_densenet.tar"#"m-epoch0-07032019-213933.pth.tar"
I initialize the 14 class model so I can use the pretrained weights:
model = DenseNet121(nnClassCount).cuda()
model = torch.nn.DataParallel(model).cuda()
modelCheckpoint = torch.load(pathModel)
model.load_state_dict(modelCheckpoint['state_dict'])
And then convert to binary classification:
nnClassCount = 1
model.module.densenet121.classifier = nn.Sequential(
nn.Linear(1024, nnClassCount),
nn.Sigmoid()
).cuda()
model = torch.nn.DataParallel(model).cuda()
And then train via:
batch, losst, losse = CheXpertTrainer.train(model, dataLoaderTrain, dataLoaderVal, nnClassCount, 100, timestampLaunch, checkpoint = None, weight_path = weight_path)
My training data is laid out in a 2 column csv with column headers ('Path' and 'Class-Positive'), with path locations in the first column and 0 or 1 in the second column. I used oversampling when compiling the training list so paths in the csv are roughly a 50/50 split between 0's and 1's...shuffled.
I use livelossplot to monitor training/validation loss and accuracy. My loss plots look as expected but accuracy plots are flatlined around 0.5 (which makes sense given the 50/50 data if the net is saying its 100% positive or negative). I'm assuming I'm doing something wrong in how I'm doing predictions, but maybe something in the training is incorrect.
For predictions and probabilities I'm running:
varOutput = model(varInput)
_, preds = torch.max(varOutput, 1)
print('varshape: ',varOutput.shape)
probs = torch.sigmoid(varOutput)
*My issue: preds are all coming out as 0 and probs all above 0.5 *
Here is the initial code from github:
import os
import numpy as np
import time
import sys
import csv
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as tfunc
from torch.utils.data import Dataset
from torch.utils.data.dataset import random_split
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from PIL import Image
import torch.nn.functional as func
from sklearn.metrics.ranking import roc_auc_score
import sklearn.metrics as metrics
import random
use_gpu = torch.cuda.is_available()
# Paths to the files with training, and validation sets.
# Each file contains pairs (path to image, output vector)
pathFileTrain = '../CheXpert-v1.0-small/train.csv'
pathFileValid = '../CheXpert-v1.0-small/valid.csv'
# Neural network parameters:
nnIsTrained = False #pre-trained using ImageNet
nnClassCount = 14 #dimension of the output
# Training settings: batch size, maximum number of epochs
trBatchSize = 64
trMaxEpoch = 3
# Parameters related to image transforms: size of the down-scaled image, cropped image
imgtransResize = (320, 320)
imgtransCrop = 224
# Class names
class_names = ['No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly', 'Lung Opacity',
'Lung Lesion', 'Edema', 'Consolidation', 'Pneumonia', 'Atelectasis', 'Pneumothorax',
'Pleural Effusion', 'Pleural Other', 'Fracture', 'Support Devices']
class CheXpertDataSet(Dataset):
def __init__(self, image_list_file, transform=None, policy="ones"):
"""
image_list_file: path to the file containing images with corresponding labels.
transform: optional transform to be applied on a sample.
Upolicy: name the policy with regard to the uncertain labels
"""
image_names = []
labels = []
with open(image_list_file, "r") as f:
csvReader = csv.reader(f)
next(csvReader, None)
k=0
for line in csvReader:
k+=1
image_name= line[0]
label = line[5:]
for i in range(14):
if label[i]:
a = float(label[i])
if a == 1:
label[i] = 1
elif a == -1:
if policy == "ones":
label[i] = 1
elif policy == "zeroes":
label[i] = 0
else:
label[i] = 0
else:
label[i] = 0
else:
label[i] = 0
image_names.append('../' + image_name)
labels.append(label)
self.image_names = image_names
self.labels = labels
self.transform = transform
def __getitem__(self, index):
"""Take the index of item and returns the image and its labels"""
image_name = self.image_names[index]
image = Image.open(image_name).convert('RGB')
label = self.labels[index]
if self.transform is not None:
image = self.transform(image)
return image, torch.FloatTensor(label)
def __len__(self):
return len(self.image_names)
#TRANSFORM DATA
normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
transformList = []
#transformList.append(transforms.Resize(imgtransCrop))
transformList.append(transforms.RandomResizedCrop(imgtransCrop))
transformList.append(transforms.RandomHorizontalFlip())
transformList.append(transforms.ToTensor())
transformList.append(normalize)
transformSequence=transforms.Compose(transformList)
#LOAD DATASET
dataset = CheXpertDataSet(pathFileTrain ,transformSequence, policy="ones")
datasetTest, datasetTrain = random_split(dataset, [500, len(dataset) - 500])
datasetValid = CheXpertDataSet(pathFileValid, transformSequence)
#Problèmes de l'overlapping de patients et du transform identique ?
dataLoaderTrain = DataLoader(dataset=datasetTrain, batch_size=trBatchSize, shuffle=True, num_workers=24, pin_memory=True)
dataLoaderVal = DataLoader(dataset=datasetValid, batch_size=trBatchSize, shuffle=False, num_workers=24, pin_memory=True)
dataLoaderTest = DataLoader(dataset=datasetTest, num_workers=24, pin_memory=True)
class CheXpertTrainer():
def train (model, dataLoaderTrain, dataLoaderVal, nnClassCount, trMaxEpoch, launchTimestamp, checkpoint):
#SETTINGS: OPTIMIZER & SCHEDULER
optimizer = optim.Adam (model.parameters(), lr=0.0001, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-5)
#SETTINGS: LOSS
loss = torch.nn.BCELoss(size_average = True)
#LOAD CHECKPOINT
if checkpoint != None and use_gpu:
modelCheckpoint = torch.load(checkpoint)
model.load_state_dict(modelCheckpoint['state_dict'])
optimizer.load_state_dict(modelCheckpoint['optimizer'])
#TRAIN THE NETWORK
lossMIN = 100000
for epochID in range(0, trMaxEpoch):
timestampTime = time.strftime("%H%M%S")
timestampDate = time.strftime("%d%m%Y")
timestampSTART = timestampDate + '-' + timestampTime
batchs, losst, losse = CheXpertTrainer.epochTrain(model, dataLoaderTrain, optimizer, trMaxEpoch, nnClassCount, loss)
lossVal = CheXpertTrainer.epochVal(model, dataLoaderVal, optimizer, trMaxEpoch, nnClassCount, loss)
timestampTime = time.strftime("%H%M%S")
timestampDate = time.strftime("%d%m%Y")
timestampEND = timestampDate + '-' + timestampTime
if lossVal < lossMIN:
lossMIN = lossVal
torch.save({'epoch': epochID + 1, 'state_dict': model.state_dict(), 'best_loss': lossMIN, 'optimizer' : optimizer.state_dict()}, 'm-epoch'+str(epochID)+'-' + launchTimestamp + '.pth.tar')
print ('Epoch [' + str(epochID + 1) + '] [save] [' + timestampEND + '] loss= ' + str(lossVal))
else:
print ('Epoch [' + str(epochID + 1) + '] [----] [' + timestampEND + '] loss= ' + str(lossVal))
return batchs, losst, losse
#--------------------------------------------------------------------------------
def epochTrain(model, dataLoader, optimizer, epochMax, classCount, loss):
batch = []
losstrain = []
losseval = []
model.train()
for batchID, (varInput, target) in enumerate(dataLoaderTrain):
varTarget = target.cuda(non_blocking = True)
#varTarget = target.cuda()
varOutput = model(varInput)
lossvalue = loss(varOutput, varTarget)
optimizer.zero_grad()
lossvalue.backward()
optimizer.step()
l = lossvalue.item()
losstrain.append(l)
if batchID%35==0:
print(batchID//35, "% batches computed")
#Fill three arrays to see the evolution of the loss
batch.append(batchID)
le = CheXpertTrainer.epochVal(model, dataLoaderVal, optimizer, trMaxEpoch, nnClassCount, loss).item()
losseval.append(le)
print(batchID)
print(l)
print(le)
return batch, losstrain, losseval
#--------------------------------------------------------------------------------
def epochVal(model, dataLoader, optimizer, epochMax, classCount, loss):
model.eval()
lossVal = 0
lossValNorm = 0
with torch.no_grad():
for i, (varInput, target) in enumerate(dataLoaderVal):
target = target.cuda(non_blocking = True)
varOutput = model(varInput)
losstensor = loss(varOutput, target)
lossVal += losstensor
lossValNorm += 1
outLoss = lossVal / lossValNorm
return outLoss
#--------------------------------------------------------------------------------
#---- Computes area under ROC curve
#---- dataGT - ground truth data
#---- dataPRED - predicted data
#---- classCount - number of classes
def computeAUROC (dataGT, dataPRED, classCount):
outAUROC = []
datanpGT = dataGT.cpu().numpy()
datanpPRED = dataPRED.cpu().numpy()
for i in range(classCount):
try:
outAUROC.append(roc_auc_score(datanpGT[:, i], datanpPRED[:, i]))
except ValueError:
pass
return outAUROC
#--------------------------------------------------------------------------------
def test(model, dataLoaderTest, nnClassCount, checkpoint, class_names):
cudnn.benchmark = True
if checkpoint != None and use_gpu:
modelCheckpoint = torch.load(checkpoint)
model.load_state_dict(modelCheckpoint['state_dict'])
if use_gpu:
outGT = torch.FloatTensor().cuda()
outPRED = torch.FloatTensor().cuda()
else:
outGT = torch.FloatTensor()
outPRED = torch.FloatTensor()
model.eval()
with torch.no_grad():
for i, (input, target) in enumerate(dataLoaderTest):
target = target.cuda()
outGT = torch.cat((outGT, target), 0).cuda()
bs, c, h, w = input.size()
varInput = input.view(-1, c, h, w)
out = model(varInput)
outPRED = torch.cat((outPRED, out), 0)
aurocIndividual = CheXpertTrainer.computeAUROC(outGT, outPRED, nnClassCount)
aurocMean = np.array(aurocIndividual).mean()
print ('AUROC mean ', aurocMean)
for i in range (0, len(aurocIndividual)):
print (class_names[i], ' ', aurocIndividual[i])
return outGT, outPRED
class DenseNet121(nn.Module):
"""Model modified.
The architecture of our model is the same as standard DenseNet121
except the classifier layer which has an additional sigmoid function.
"""
def __init__(self, out_size):
super(DenseNet121, self).__init__()
self.densenet121 = torchvision.models.densenet121(pretrained=True)
num_ftrs = self.densenet121.classifier.in_features
self.densenet121.classifier = nn.Sequential(
nn.Linear(num_ftrs, out_size),
nn.Sigmoid()
)
def forward(self, x):
x = self.densenet121(x)
return x
# initialize and load the model
model = DenseNet121(nnClassCount).cuda()
model = torch.nn.DataParallel(model).cuda()
timestampTime = time.strftime("%H%M%S")
timestampDate = time.strftime("%d%m%Y")
timestampLaunch = timestampDate + '-' + timestampTime
batch, losst, losse = CheXpertTrainer.train(model, dataLoaderTrain, dataLoaderVal, nnClassCount, trMaxEpoch, timestampLaunch, checkpoint = None)
print("Model trained")
It looks like you have adapted the training correctly for the binary classification, but the prediction wasn't, as you are still trying it as if it were a multi-class prediction.
The output of your model (varOutput) has the size (batch_size, 1), since there is only one class. The maximum across that dimension will always be 0, since that is the only class available, there is no separate class for 1.
This single class represents both cases (0 and 1), so you can consider it is a the probability of it being positive (1). To get the distinct value of either 0 or 1, you simply use a threshold of 0.5, so everything below that receives the class 0 and above that 1. This can be easily done with torch.round.
But you also have another problem, you're applying the sigmoid function twice in a row, once in the classifier nn.Sigmoid() and then afterwards again torch.sigmoid(varOutput). That is problematic, because sigmoid(0) = 0.5, hence all your probabilities are over 0.5.
The output of your model are already the probabilities, the only thing left is to round them:
probs = model(varInput)
# The .squeeze(1) is to get rid of the singular class dimension
preds = torch.round(probs).squeeze(1)

Can I export a tensorflow summary to CSV?

Is there a way to extract scalar summaries to CSV (preferably from within tensorboard) from tfevents files?
Example code
The following code generates tfevent files in a summary_dir within the same directory. Suppose you let it run and you find something interesting. You want to get the raw data for further investigation. How would you do that?
#!/usr/bin/env python
"""A very simple MNIST classifier."""
import argparse
import sys
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
ce_with_logits = tf.nn.softmax_cross_entropy_with_logits
FLAGS = None
def inference(x):
"""
Build the inference graph.
Parameters
----------
x : placeholder
Returns
-------
Output tensor with the computed logits.
"""
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b
return y
def loss(logits, labels):
"""
Calculate the loss from the logits and the labels.
Parameters
----------
logits : Logits tensor, float - [batch_size, NUM_CLASSES].
labels : Labels tensor, int32 - [batch_size]
"""
cross_entropy = tf.reduce_mean(ce_with_logits(labels=labels,
logits=logits))
return cross_entropy
def training(loss, learning_rate=0.5):
"""
Set up the training Ops.
Parameters
----------
loss : Loss tensor, from loss().
learning_rate : The learning rate to use for gradient descent.
Returns
-------
train_op: The Op for training.
"""
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_step = optimizer.minimize(loss)
return train_step
def main(_):
# Import data
mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
# Create the model
x = tf.placeholder(tf.float32, [None, 784])
y = inference(x)
# Define loss and optimizer
y_ = tf.placeholder(tf.float32, [None, 10])
loss_ = loss(logits=y, labels=y_)
train_step = training(loss_)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.name_scope('accuracy'):
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()
sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter('summary_dir/train', sess.graph)
test_writer = tf.summary.FileWriter('summary_dir/test', sess.graph)
tf.global_variables_initializer().run()
for train_step_i in range(100000):
if train_step_i % 100 == 0:
summary, acc = sess.run([merged, accuracy],
feed_dict={x: mnist.test.images,
y_: mnist.test.labels})
test_writer.add_summary(summary, train_step_i)
summary, acc = sess.run([merged, accuracy],
feed_dict={x: mnist.train.images,
y_: mnist.train.labels})
train_writer.add_summary(summary, train_step_i)
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
print(sess.run(accuracy, feed_dict={x: mnist.test.images,
y_: mnist.test.labels}))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir',
type=str,
default='/tmp/tensorflow/mnist/input_data',
help='Directory for storing input data')
FLAGS, unparsed = parser.parse_known_args()
tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
While the answer here is as requested within tensorboard it only allows to download a csv for a single run of a single tag.
If you have for example 10 tags and 20 runs (what is not at all much) you would need to do the above step 200 times (that alone will probably take you more than a hour).
If now you for some reason would like to actually do something with the data for all runs for a single tag you would need to write some weird CSV accumulation script or copy everything by hand (what will probably cost you more than a day).
Therefore I would like to add a solution that extracts a CSV file for every tag with all runs contained. Column headers are the run path names and row indices are the run step numbers.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
def tabulate_events(dpath):
summary_iterators = [EventAccumulator(os.path.join(dpath, dname)).Reload() for dname in os.listdir(dpath)]
tags = summary_iterators[0].Tags()['scalars']
for it in summary_iterators:
assert it.Tags()['scalars'] == tags
out = defaultdict(list)
steps = []
for tag in tags:
steps = [e.step for e in summary_iterators[0].Scalars(tag)]
for events in zip(*[acc.Scalars(tag) for acc in summary_iterators]):
assert len(set(e.step for e in events)) == 1
out[tag].append([e.value for e in events])
return out, steps
def to_csv(dpath):
dirs = os.listdir(dpath)
d, steps = tabulate_events(dpath)
tags, values = zip(*d.items())
np_values = np.array(values)
for index, tag in enumerate(tags):
df = pd.DataFrame(np_values[index], index=steps, columns=dirs)
df.to_csv(get_file_path(dpath, tag))
def get_file_path(dpath, tag):
file_name = tag.replace("/", "_") + '.csv'
folder_path = os.path.join(dpath, 'csv')
if not os.path.exists(folder_path):
os.makedirs(folder_path)
return os.path.join(folder_path, file_name)
if __name__ == '__main__':
path = "path_to_your_summaries"
to_csv(path)
My solution builds upon: https://stackoverflow.com/a/48774926/2230045
EDIT:
I created a more sophisticated version and released it on GitHub: https://github.com/Spenhouet/tensorboard-aggregator
This version aggregates multiple tensorboard runs and is able to save the aggregates to a new tensorboard summary or as a .csv file.
Just check the "Data download links" option on the upper-left in TensorBoard, and then click on the "CSV" button that will appear under your scalar summary.
Here is my solution which bases on the previous solutions but can scale up.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
def tabulate_events(dpath):
final_out = {}
for dname in os.listdir(dpath):
print(f"Converting run {dname}",end="")
ea = EventAccumulator(os.path.join(dpath, dname)).Reload()
tags = ea.Tags()['scalars']
out = {}
for tag in tags:
tag_values=[]
wall_time=[]
steps=[]
for event in ea.Scalars(tag):
tag_values.append(event.value)
wall_time.append(event.wall_time)
steps.append(event.step)
out[tag]=pd.DataFrame(data=dict(zip(steps,np.array([tag_values,wall_time]).transpose())), columns=steps,index=['value','wall_time'])
if len(tags)>0:
df= pd.concat(out.values(),keys=out.keys())
df.to_csv(f'{dname}.csv')
print("- Done")
else:
print('- Not scalers to write')
final_out[dname] = df
return final_out
if __name__ == '__main__':
path = "youre/path/here"
steps = tabulate_events(path)
pd.concat(steps.values(),keys=steps.keys()).to_csv('all_result.csv')
Very minimal example:
import pandas as pd
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
log_dir = "lightning_logs/version_1"
event_accumulator = EventAccumulator(log_dir)
event_accumulator.Reload()
events = event_accumulator.Scalars("train_loss")
x = [x.step for x in events]
y = [x.value for x in events]
df = pd.DataFrame({"step": x, "train_loss": y})
df.to_csv("train_loss.csv")
print(df)
step train_loss
0 0 700.491516
1 1 163.593246
2 2 146.365448
3 3 153.830215
...
Plotting loss vs epochs example:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
log_dir = "lightning_logs/version_1"
y_key = "val_loss"
event_accumulator = EventAccumulator(log_dir)
event_accumulator.Reload()
steps = {x.step for x in event_accumulator.Scalars("epoch")}
x = list(range(len(steps)))
y = [x.value for x in event_accumulator.Scalars(y_key) if x.step in steps]
df = pd.DataFrame({"epoch": x, y_key: y})
df.to_csv(f"{y_key}.csv")
fig, ax = plt.subplots()
sns.lineplot(data=df, x="epoch", y=y_key)
fig.savefig("plot.png", dpi=300)
Just to add to #Spen
in case you want to export the data when you have varying numbers of steps.
This will make one large csv file.
Might need to change around the keys for it to work for you.
import os
import numpy as np
import pandas as pd
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import glob
import pandas as pd
listOutput = (glob.glob("*/"))
listDF = []
for tb_output_folder in listOutput:
print(tb_output_folder)
x = EventAccumulator(path=tb_output_folder)
x.Reload()
x.FirstEventTimestamp()
keys = ['loss', 'mean_absolute_error', 'val_loss', 'val_mean_absolute_error']
listValues = {}
steps = [e.step for e in x.Scalars(keys[0])]
wall_time = [e.wall_time for e in x.Scalars(keys[0])]
index = [e.index for e in x.Scalars(keys[0])]
count = [e.count for e in x.Scalars(keys[0])]
n_steps = len(steps)
listRun = [tb_output_folder] * n_steps
printOutDict = {}
data = np.zeros((n_steps, len(keys)))
for i in range(len(keys)):
data[:,i] = [e.value for e in x.Scalars(keys[i])]
printOutDict = {keys[0]: data[:,0], keys[1]: data[:,1],keys[2]: data[:,2],keys[3]: data[:,3]}
printOutDict['Name'] = listRun
DF = pd.DataFrame(data=printOutDict)
listDF.append(DF)
df = pd.concat(listDF)
df.to_csv('Output.csv')