How can I replace a csv format dataset with MNIST in a code which has MNIST as load_digits()? The new dataset is in csv format - csv

from sklearn.datasets import load_digits
from sklearn.preprocessing import MinMaxScaler
digits = load_digits() # using sklearn's MNIST dataset
X, y = digits.data, digits.target
scaler = MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)
X_train, X_test = X[:-config.LEN_TEST], X[-config.LEN_TEST:]
y_train, y_test = y[:-config.LEN_TEST], y[-config.LEN_TEST:]
X_train = X_train[:number_of_samples]
y_train = y_train[:number_of_samples]

Related

Update all the weights in every layer via a simple Neural Network using PyTorch

I have the following neural network.
I'm unable to update every layer when using backward().
Why is it not learning the weights through each layer?
from typing import Iterable
import os
import torch
from torch import nn
import torch.nn.functional as F
from torch.optim import SGD
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import seaborn as sns
class BasicNN(nn.Module):
def __init__(self):
super().__init__()
self.w00 = nn.Parameter(torch.tensor(1.7), requires_grad=True)
self.b00 = nn.Parameter(torch.tensor(-0.85), requires_grad=True)
self.w01 = nn.Parameter(torch.tensor(-40.8), requires_grad=True)
self.w10 = nn.Parameter(torch.tensor(12.6), requires_grad=True)
self.b10 = nn.Parameter(torch.tensor(0.0), requires_grad=True)
self.w11 = nn.Parameter(torch.tensor(2.7), requires_grad=True)
self.final_bias = nn.Parameter(torch.tensor(0.), requires_grad=True)
def forward(self, input):
input_to_top_relu = input * self.w00 + self.b00
top_relu_output = F.relu(input_to_top_relu)
scaled_top_relu_output = top_relu_output * self.w01
input_to_bottom_relu = input * self.w10 + self.b10
bottom_relu_output = F.relu(input_to_bottom_relu)
scaled_bottom_relu_output = bottom_relu_output * self.w11
input_to_final_relu = scaled_top_relu_output + scaled_bottom_relu_output + self.final_bias
output = F.relu(input_to_final_relu)
return output
inputs = torch.tensor([0., 0.5, 1.])
labels = torch.tensor([0., 1., 0.])
model = BasicNN()
optimizer = SGD(model.parameters(), lr=0.1)
print("Final bias, before optimization: " + str(model.final_bias.data) + "\n")
for epoch in range(100):
total_loss = 0
for iteration in range(len(inputs)):
input_i = inputs[iteration]
label_i = labels[iteration]
output_i = model(input_i)
loss = (output_i - label_i)**2
loss.backward()
total_loss += float(loss)
if (total_loss < 0.0001):
print("Num steps: " + str(epoch))
break
optimizer.step()
optimizer.zero_grad()
print("Total loss: " + str(total_loss))
print("Final bias, after optimization: " + str(model.final_bias.data))

How to use the 1st derivative of a network for training a data-driven physics model approximating a function? Using PyTorch

I am trying to reproduce the neural network proposed in this paper using PyTorch: A mechanics-informed artificial neural network approach in data driven constitutive modeling. The goal of the neural network is to approximate a nonlinear function, which maps the input to the output, but using the derivative of the network as the actual output. That means, that the network learns the integral of the function.
My approach to coding this with PyTorch is at the end of this question (It would be great if you could review that as well, as the mistake might be somewhere else and I don't see it.).
The whole idea of the paper is as follows:
Input: x_0; Output: y
x_1 = activation(w_input * x_0 +b)
for l = 2,…,N_l do
x_l = activation(softplus(Q;alpha) * x_(l-1) + b_l + w_skip_l * x_o
end for
y = softplus(Q, alpha) * x_(N_l-1) + f(x_0)
with the activation function being essentially the softplus function squared :
class LearnedSoftPlusSquared(torch.nn.Module):
def __init__(self, init_beta=1.0, threshold=20):
super().__init__()
self.log_beta = torch.nn.Parameter(torch.tensor(float(init_beta)).log())
self.threshold = 20
def forward(self, x):
beta = self.log_beta.exp()
beta_squared = beta**2
beta_squared_x = beta_squared * x
return torch.where(beta_squared_x < 20, 0.5 * ((torch.log1p(beta_squared_x.exp()) / beta_squared)**2), x)
And the loss function (better representation in the linked paper):
Loss = sum_over_samples_and_dimensions(((derivative_wrt_to_input(evaluated at input) - derivative_wrt_to_input(evaluated at 0) - target value)/elementwise_standard_deviation)^2)
In code:
def final_loss_func(gradients_output_wrt_input, gradients_corrector, target):
sigma_k = torch.std(target, dim = 0)
loss = torch.mean(torch.sum(((gradients_output_wrt_input - gradients_corrector - target)/sigma_k)**2, dim = 1))
return loss
All in all this network has the parameters (alpha, A, Q_l, W_l, b_l, beta).
I can’t really figure out my mistake, although I believe the problem lies in my implementation of the loss function, which uses the derivative of the network and a correction term (which is also based on the derivative of the network.
Right now, the loss starts out very high and decreases to a certain point, from which it rises again.
It is also not possible to overfit, even with very small examples. In my opinion that is caused by the correction term which is applied in the loss function.
My questions regarding this network are:
How does one implement the double differentiation properly?
I tried to get the gradient of the output with respect to the input by using autograd, same procedure for the correction term, enabling the option create_graph. Those quantities were then used in the loss function, which is then used with the backward() method. My code for the training routine looks like this:
X_corr = torch.zeros(1,input_dim, requires_grad = True).type(torch.FloatTensor)
Y_corr = torch.zeros_like(X_corr)
def final_loss_func(gradients_output_wrt_input, gradients_corrector, target):
sigma_k = torch.std(target, dim = 0)
loss = torch.mean(torch.sum(((gradients_output_wrt_input - gradients_corrector - target)/sigma_k)**2, dim = 1))
return loss
model = MyModule()
learning_rate = 0.00001
optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)
epochs = 20000+1
for epoch in range(epochs):
optimizer.zero_grad()
pred = model(X)
pred_corr = model(X_corr)
X_gradients = torch.autograd.grad(pred, X, retain_graph=True, grad_outputs=torch.ones_like(pred), create_graph = True)[0]
X_corr_gradients = torch.autograd.grad(pred_corr, X_corr, retain_graph=True, grad_outputs=torch.ones_like(pred_corr), create_graph = True)[0]
X_corr_gradients_no_grad = X_corr_gradients.detach()
loss = final_loss_func(X_gradients, X_corr_gradients_no_grad, Y)
optimizer.step()
Also: how do I add a convex function like $f(x) = x^T A^T A x$, which is proposed in the paper, to the output, while keeping the Matrix A as a learnable parameter?
The problem here is the dimension of the input is $batchSizesampleDimension$ leading to $f(x)$ as a $batchSizebatchSize$ matrix, although it must be a $batchSize*1$ vector. Applying $f(x)$ to each sample separately delivers the right dimension, but also makes it impossible to learn A as a parameter, because it makes the differentiation impossible. My code for that looks like this:
class ConvexFunction(torch.nn.Module):
def __init__(self, input_size):
super().__init__()
A = torch.Tensor(input_size, input_size)
self.A = torch.nn.Parameter(A)
nn.init.orthogonal_(self.A)
def forward(self, x):
A_T = torch.transpose(self.A, dim0 = 0, dim1 = 1)
A_T_A = torch.matmul(A_T, self.A)
result = torch.zeros(x.size(dim = 0),1)
for i in range(x.size(dim = 0)):
result[i] = x[i] # A_T_A # x[i]
return result
The whole code for the network:
import torch
import torch.nn as nn
import pandas as pd
torch.manual_seed(0)
df = pd.read_csv("Strain_Stress_Pairs_XL_3columns.csv", sep = ',', decimal='.')
x = df.iloc[:,0:3].values
y = df.iloc[:,3:7].values
x = x *9.091
X = torch.from_numpy(x).type(torch.FloatTensor)
Y = torch.from_numpy(y).type(torch.FloatTensor)
X.requires_grad_(True)
means = Y.mean(dim=1, keepdim=True)
stds = Y.std(dim=1, keepdim=True)
for i in range(stds.size(dim = 0)):
if stds[i] < 1e-20:
stds[i] = 1e-20
Y = (Y - means) / stds
X_corr = torch.zeros(1,3, requires_grad = True).type(torch.FloatTensor)
Y_corr = torch.zeros_like(X_corr)
class ConvexFunction(torch.nn.Module):
def __init__(self, input_size):
super().__init__()
A = torch.Tensor(input_size, input_size)
self.A = torch.nn.Parameter(A)
nn.init.orthogonal_(self.A)
def forward(self, x):
A_T = torch.transpose(self.A, dim0 = 0, dim1 = 1)
A_T_A = torch.matmul(A_T, self.A)
result = torch.zeros(x.size(dim = 0),1)
for i in range(x.size(dim = 0)):
result[i] = x[i] # A_T_A # x[i]
return result
class LearnedSoftPlusSquared(torch.nn.Module):
def __init__(self, init_beta=1.0, threshold=20):
super().__init__()
self.log_beta = torch.nn.Parameter(torch.tensor(float(init_beta)).log())
self.threshold = 20
def forward(self, x):
beta = self.log_beta.exp()
beta_squared = beta**2
beta_squared_x = beta_squared * x
return torch.where(beta_squared_x < 20, 0.5 * ((torch.log1p(beta_squared_x.exp()) / beta_squared)**2), x)
class SoftPlusLinear(nn.Module):
def __init__(self, input_size, output_size, init_alpha=1.0, threshold=20):
super().__init__()
w = torch.Tensor(output_size, input_size)
self.w = nn.Parameter(w)
nn.init.orthogonal_(self.w)
b = torch.Tensor(output_size).fill_(0)
self.b = nn.Parameter(b)
self.log_alpha = torch.nn.Parameter(torch.tensor(float(init_alpha)).log())
self.threshold = 20
def forward(self, x):
alpha = self.log_alpha.exp()
alpha = alpha**2
alpha_weight = alpha * self.w
w_new = torch.log1p(alpha_weight.exp()) / alpha
return nn.functional.linear(x, w_new, bias=self.b)
class MyModule(nn.Module):
def __init__(self):
super().__init__()
input_size = 3
out_l1 = 9
out_l2 = 9
out_l3 = 6
out_l4 = 3
output_size = 1
w_skip_1 = torch.Tensor(input_size, out_l2)
self.w_skip_1 = nn.Parameter(w_skip_1)
nn.init.orthogonal_(self.w_skip_1)
w_skip_2 = torch.Tensor(input_size, out_l3)
self.w_skip_2 = nn.Parameter(w_skip_2)
nn.init.orthogonal_(self.w_skip_2)
w_skip_3 = torch.Tensor(input_size, out_l4)
self.w_skip_3 = nn.Parameter(w_skip_3)
nn.init.orthogonal_(self.w_skip_3)
matrix_conv = torch.Tensor(input_size,1)
self.matrix_conv = nn.Parameter(matrix_conv)
nn.init.orthogonal_(self.matrix_conv)
self.convex_layer = ConvexFunction(input_size)
self.l1 = nn.Linear(input_size,out_l1)
self.a1 = LearnedSoftPlusSquared()
self.l2 = SoftPlusLinear(out_l1,out_l2)
self.a2 = LearnedSoftPlusSquared()
self.l3 = SoftPlusLinear(out_l2,out_l3)
self.a3 = LearnedSoftPlusSquared()
self.l4 = SoftPlusLinear(out_l3,output_size)
def forward(self, x):
x_in = x
x = self.l1(x)
x = self.a1(x)
x = self.l2(x)
x = torch.add(x, torch.mm(x_in, self.w_skip_1))
x = self.a2(x)
x = self.l3(x)
x = torch.add(x, torch.mm(x_in, self.w_skip_2))
x = self.a3(x)
x = self.l4(x)#+ self.convex_layer(x_in) convex function not working!
return x
def final_loss_func(gradients_output_wrt_input, gradients_corrector, target):
sigma_k = torch.std(target, dim = 0)
loss = torch.mean(torch.sum(((gradients_output_wrt_input - gradients_corrector - target)/sigma_k)**2, dim = 1))
return loss
model = MyModule()
learning_rate = 0.00001
optimizer = torch.optim.Adagrad(model.parameters(), lr=learning_rate)
epochs = 20000+1
for epoch in range(epochs):
optimizer.zero_grad()
pred = model(X)
pred_corr = model(X_corr)
X_gradients = torch.autograd.grad(pred, X, retain_graph=True, grad_outputs=torch.ones_like(pred), create_graph = True)[0]
X_corr_gradients = torch.autograd.grad(pred_corr, X_corr, retain_graph=True, grad_outputs=torch.ones_like(pred_corr), create_graph = True)[0]
X_corr_gradients_no_grad = X_corr_gradients.detach()
loss = final_loss_func(X_gradients, X_corr_gradients_no_grad, Y)
optimizer.step()
loss.backward()
print(loss)
Thank you for your time.

Traceback : File "lane_segmentation.py", line 132, in <module> X, y = train_generator.__getitem__(0) line 107, in __getitem__ _img

I was getting error as :
Traceback (most recent call last):
File "/Users/dilipreddy/Downloads/Lane-Segmentation-master/lane_segmentation.py", line 132, in
X, y = train_generator.getitem(0)
File "/Users/dilipreddy/Downloads/Lane-Segmentation-master/lane_segmentation.py", line 107, in getitem
_img, _mask = self.load(id_name)
File "/Users/dilipreddy/Downloads/Lane-Segmentation-master/lane_segmentation.py", line 80, in load
id_name_actual, text, _ = id_name.split('.')
ValueError: not enough values to unpack (expected 3, got 2)
(base) dilipreddy#Dilips-MacBook-Pro Lane-Segmentation-master %
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import random
import sys
import tensorflow as tf
import keras
from keras.layers import *
from keras.models import *
# Part 1 - Data Preprocessing
def get_mask(img_path, label_path):
label_file = open(label_path, "r")
if label_file.mode == 'r':
contents = label_file.read()
lines_text = contents.split('\n')
x_coordinate, y_coordinate, lanes = [], [], []
for line_text in lines_text:
number_lines = line_text.split(" ")
number_lines.pop()
x = list([float(number_lines[i]) for i in range(len(number_lines)) if i % 2 == 0])
y = list([float(number_lines[i]) for i in range(len(number_lines)) if i % 2 != 0])
x_coordinate.append(x)
y_coordinate.append(y)
lanes.append(set(zip(x, y)))
lanes.pop()
img = cv2.imread(img_path)
mask = np.zeros_like(img)
# colors = [[255,0,0], [0,255,0], [0,0,255], [255,255,0]]
colors = [[255, 255, 255], [255, 255, 255], [255, 255, 255], [255, 255, 255]]
for i in range(len(lanes)):
cv2.polylines(img, np.int32([list(lanes[i])]), isClosed=False, color=colors[i], thickness=10)
label = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return label
img = get_mask("data/CULane/driver_161_90frame/06030819_0755.MP4/00000.jpg",
"data/CULane/driver_161_90frame/06030819_0755.MP4/00000.lines.txt")
plt.imshow(img)
print(img.shape)
import os
from tensorflow.keras.utils import Sequence
import os
from tensorflow.keras.utils import Sequence
class DataGenerator2D(Sequence):
"""Generates data for Keras
Sequence based data generator. Suitable for building data generator for training and prediction.
"""
def __init__(self, base_path, img_size=256, batch_size=1, shuffle=True):
self.base_path = base_path
self.img_size = img_size
self.id = os.listdir(os.path.join(base_path, "CULane"))
self.batch_size = batch_size
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
"""Denotes the number of batches per epoch
:return: number of batches per epoch
"""
return int(np.ceil(len(self.id) / float(self.batch_size)))
def __load__(self, id_name):
id_name_actual, text, _ = id_name.split('.')
image_path = os.path.join(self.base_path, "images", (id_name_actual + '.' + text + '.jpg'))
label_path = os.path.join(self.base_path, "labels", (id_name_actual + '.' + text + '.lines.txt'))
image = cv2.imread(image_path, 1) # Reading Image in RGB format
image = cv2.resize(image, (self.img_size, self.img_size))
# image = cv2.resize(image, (int(img.shape[1]/2), int(img.shape[0]/2)))
mask = get_mask(image_path, label_path)
mask = cv2.resize(mask, (self.img_size, self.img_size))
# mask = cv2.resize(mask, (int(img.shape[1]/2), int(img.shape[0]/2)))
# Normalizing the image
image = image / 255.0
mask = mask / 255.0
return image, mask
def __getitem__(self, index):
if (index + 1) * self.batch_size > len(self.id):
file_batch = self.id[index * self.batch_size:]
else:
file_batch = self.id[index * self.batch_size:(index + 1) * self.batch_size]
images, masks = [], []
for id_name in file_batch:
_img, _mask = self.__load__(id_name)
images.append(_img)
masks.append(_mask)
images = np.array(images)
masks = np.array(masks)
return images, masks
def on_epoch_end(self):
"""Updates indexes after each epoch
"""
self.indexes = np.arange(len(self.id))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def on_epoch_end(self):
"""Updates indexes after each epoch
"""
self.indexes = np.arange(len(self.id))
if self.shuffle == True:
np.random.shuffle(self.indexes)
train_generator = DataGenerator2D(base_path='data', img_size=256, batch_size=64, shuffle=False)
X, y = train_generator.__getitem__(0)
print(X.shape, y.shape)
fig = plt.figure(figsize=(17, 8))
columns = 4
rows = 3
for i in range(1, columns*rows + 1):
img = X[i-1]
fig.add_subplot(rows, columns, i)
plt.imshow(img)
plt.show()
fig = plt.figure(figsize=(17, 8))
columns = 4
rows = 3
for i in range(1, columns*rows + 1):
img = y[i-1]
fig.add_subplot(rows, columns, i)
plt.imshow(img)
plt.show()
# Part 2 - Model
def dice_coef(y_true, y_pred, smooth=1):
"""
Dice = (2*|X & Y|)/ (|X|+ |Y|)
= 2*sum(|A*B|)/(sum(A^2)+sum(B^2))
ref: https://arxiv.org/pdf/1606.04797v1.pdf
"""
intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
return (2. * intersection + smooth) / (K.sum(K.square(y_true),-1) + K.sum(K.square(y_pred),-1) + smooth)
def dice_coef_loss(y_true, y_pred):
return 1-dice_coef(y_true, y_pred)
reconstructed_model = tf.keras.models.load_model("pretrained models/UNET-BN-20-0.081170.hdf5",
custom_objects = {'dice_coef_loss': dice_coef_loss})
# Part 3 - Visualization
val_generator = DataGenerator2D('content/data/', img_size=256, batch_size=128, shuffle=True)
X, y = val_generator.__getitem__(10)
print(X.shape, y.shape)
plt.imshow(X[2])
predict = reconstructed_model.predict(X)
print(predict.shape)
img = cv2.cvtColor(predict[2], cv2.COLOR_GRAY2BGR)
plt.imshow(img)

plotly Scatter customdata oare only NaN

I would like to display additional data while hovering over a curve created by go.Scatter. With the script below correct x and y values are shown in the popup, but x^2 and cos are always shown as NaN. I would be very appreciative for any help.
import dash
import dash_html_components as html
import dash_core_components as dcc
import plotly.graph_objects as go
import numpy as np
x = np.mgrid[0.0:10.0:100j]
y = np.sin(x)
fig = go.Figure()
fig.add_trace(go.Scatter(x = x, y = y, line_width = 4,
customdata = [x**2, np.cos(x)],
hovertemplate = "<br>".join([
"x = %{x:,.1f}",
"y = %{y:,.1f}",
"x^2 = %{customdata[0]:,.1f}",
"cos = %{customdata[1]:,.1f}"
])
))
app = dash.Dash()
app.layout = html.Div([dcc.Graph(figure=fig)])
app.run_server()
import plotly.graph_objects as go
import numpy as np
x = np.mgrid[0.0:10.0:100j]
y = np.sin(x)
custom_data = np.stack((x**2, np.cos(x)), axis=-1)
fig = go.Figure()
fig.add_trace(go.Scatter(x = x, y = y, line_width = 4))
fig.update_traces(customdata=custom_data,
hovertemplate ="x: %{x}<br>"+\
"y: %{y}<br>"+\
"x**2: %{customdata[0]: .1f}<br>"+\
"cos: %{customdata[1]: .1f}")
fig.show()

Multiclass classification: Dimension out of range (expected to be in range of [-1, 0], but got 1)

When I use cross-entropy loss function for multiclass text classification, I get the error
Dimension out of range (expected to be in range of [-1, 0], but got 1)
This is my code:
def train(model, iterator):
...
for batch in iterator:
text, text_lengths = batch.Turn
optimizer.zero_grad()
predictions = model(text, text_lengths).squeeze(1)
loss = criterion(predictions, batch.label)
acc = categorical_accuracy(predictions, batch.label)
...
Dataset:
TEXT = data.Field(tokenize = 'spacy', include_lengths = True)
LABEL = data.LabelField(dtype = torch.long)
Forward:
def forward(self, text, text_lengths):
embedded = self.embedding(text)
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
packed_output, (hidden, cell) = self.rnn(packed_embedded)
hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
output = self.fc1(hidden)
output = self.dropout(self.fc2(output))
return output