Losses are increasing in Binary classification using gradient descent optimization method - binary

This my program for Binary classification using gradient descent optimization method. I am not sure about my loss function. The error in my case is incresing when plotted
def sigmoid_activation(x):
return 1.0 / (1 + np.exp(-x))
def predict(testX, W):
preds= sigmoid_activation(np.dot(testX,W))
# apply a step function to threshold (=0.5) the outputs to binary class
#labels
#start your code here
for i in range(len(preds)):
if preds[i]<0.5:
p.append(0)
if preds[i]>=0.5:
p.append(1)
return p
epochs = 50
alpha = 0.01
(X,y)=make_moons(n_samples=1000, noise = 0.15)
y=y.reshape(y.shape[0],1)
X = np.c_[X, np.ones((X.shape[0]))]
(trainX, testX, trainY, testY) = train_test_split(X, y,
test_size=0.5, random_state=42)
print("[INFO] training...")
W = np.random.randn(X.shape[1], 1)
losses = []
for epoch in np.arange(0, epochs):
#start your code here
Z=np.dot(trainX, W)
yhat= sigmoid_activation(Z)
error=trainY-yhat
loss = np.sum(error ** 2)
losses.append(loss)
gradient = trainX.T.dot(error) / trainX.shape[0]
W= W-alpha*gradient #moving in -ve direction
# check to see if an update should be displayed
if epoch == 0 or (epoch + 1) % 5 == 0:
print("[INFO] epoch={}, loss={:.7f}".format(int(epoch + 1),
loss))
# evaluate our model
print("[INFO] evaluating...")
preds = predict(testX, W)
print(classification_report(testY, preds))
# plot the (testing) classification data
plt.style.use("ggplot")
plt.figure()
plt.title("Data")
plt.scatter(testX[:, 0], testX[:, 1], marker="o", c=testY[:,0], s=30)
# construct a figure that plots the loss over time
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, epochs), losses)
plt.title("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()

Related

Why does loss history go to zero for momentum

I am trying to implement momentum for a linear regression model, I will like to save my plus at each point and plot against the iterations, but the loss decreases in the first few epochs and goes to zero, how do I solve this
''' def compute_cost(self, X, y):
loss = float((np.sum((X.dot(self.theta) - y) ** 2) ))/ 2
return loss
def fitmomentum(self, X, y,momentum = 0.9 ):
X = self.add_ones(X.values)
self.theta = np.zeros(X.shape[1])
self.cost_history = np.zeros(self.epoch)
self.momentum = momentum
self.velocity = np.zeros(self.epoch)
self.theta = self.momentum(g, self.velocity)
for i in range(len((X.T # (X # self.theta - y)))):
self.velocity[i] = self.momentum * self.velocity[i] + self.lr * ((X.T # (X # self.theta - y)))[i]
self.theta[i] += self.velocity[i]
self.cost_history[i] = self.compute_cost(X, y)
return self.theta, self.cost_history '''
This is what I see after printing the loss
'''array([4.20078101e+13, 6.06945342e+13, 7.01728125e+17, ...,
0.00000000e+00, 0.00000000e+00, 0.00000000e+00])'''
This is what the graph looks like..
plot of epochs against loss history

Poor results fine-tunned GoogLeNet, how to improve them?

I've trained the GooogLeNet from scratch on the MNIST dataset. It achieved very good results (top-1 accuracy of 99% on test set).
Now I want to do transfer learning in order to adapt it to the FashionMNIST dataset. For that I'm doing the following:
# Loading trained model on MNIST
googlenet = torch.load('googlenet-mnist.pth')
# Freeze the network
def freeze(net):
for param in net.parameters():
param.requires_grad = False
return net
# Override all the Linear layers and initialize them
# (including the ones that produce auxiliarity logits)
def forget_FC(net):
net.aux1.fc1 = nn.Linear(in_features=net.aux1.fc1.in_features, out_features=net.aux1.fc1.out_features, bias=True)
net.aux1.fc2 = nn.Linear(in_features=net.aux1.fc2.in_features, out_features=net.aux1.fc2.out_features, bias=True)
net.aux2.fc1 = nn.Linear(in_features=net.aux2.fc1.in_features, out_features=net.aux2.fc1.out_features, bias=True)
net.aux2.fc2 = nn.Linear(in_features=net.aux2.fc2.in_features, out_features=net.aux2.fc2.out_features, bias=True)
# Override the classification layer
net.fc = nn.Sequential(
nn.Linear(num_in_features, num_in_features),
nn.Linear(num_in_features, num_in_features),
nn.Linear(num_in_features, 10))
# Initialize weights auxiliarity logits branches
torch.nn.init.trunc_normal_(net.aux1.fc1.weight, mean=0.0, std=0.01, a=-2, b=2)
torch.nn.init.trunc_normal_(net.aux1.fc2.weight, mean=0.0, std=0.01, a=-2, b=2)
torch.nn.init.trunc_normal_(net.aux2.fc1.weight, mean=0.0, std=0.01, a=-2, b=2)
torch.nn.init.trunc_normal_(net.aux2.fc2.weight, mean=0.0, std=0.01, a=-2, b=2)
# Initialize weights each Linear module in the classification layer
for module in net.fc.modules():
if isinstance(module, nn.Linear):
torch.nn.init.trunc_normal_(module.weight, mean=0.0, std=0.01, a=-2, b=2)
return net
# The training algorithm
def train(net, train_iter, test_iter, num_epochs, lr, device, plot_title, fine_tune=False):
"""Train a model with a GPU.
"""
# def init_weights(m):
# if type(m) == nn.Linear or type(m) == nn.Conv2d:
# nn.init.xavier_uniform_(m.weight)
# net.apply(init_weights)
print('training on', device)
progress = ""
net.to(device)
if fine_tune:
optimizer = torch.optim.SGD(net.fc.parameters(), lr=lr, momentum=.9)
optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=.9)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=1, verbose=True)
loss = nn.CrossEntropyLoss()
animator = Animator(xlabel='epoch', xlim=[1, num_epochs], title=plot_title, ylim=[0, 1], figsize=(5,5),
legend=['train loss', 'train acc', 'val acc'])
timer, num_batches = d2l.Timer(), len(train_iter)
for epoch in range(num_epochs):
# Sum of training loss, sum of training accuracy, sum of top 5 training accuracy, no. of examples
metric = d2l.Accumulator(4)
net.train()
# Training
for i, (X, y) in enumerate(train_iter):
timer.start()
optimizer.zero_grad()
X, y = X.to(device), y.to(device)
# Mini-batch inference
y_hat = net(X)
# Take into account the auxiliarity logits (see link cell above)
if isinstance(y_hat, GoogLeNetOutputs):
aux_logit1, aux_logit2, y_hat = y_hat
l1 = loss(y_hat, y)
l2 = loss(aux_logit1, y)
l3 = loss(aux_logit2, y)
l = l1 + .3 * (l2 + l3)
else:
l = loss(y_hat, y)
l.backward()
optimizer.step()
# Training accuracies
with torch.no_grad():
acc_1, acc_5 = accuracy(y_hat, y)
metric.add(l * X.shape[0], acc_1, acc_5, X.shape[0])
timer.stop()
train_l = metric[0] / metric[3]
train_acc_1 = metric[1] / metric[3]
train_acc_5 = metric[2] / metric[3]
if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
animator.add(epoch + (i + 1) / num_batches,
(train_l, train_acc_1, None), plot_title)
# Validation, (validation loss computed when model in eval mode, is that correct?)
val_l, test_acc_1, test_acc_5 = evaluate_accuracy_gpu(net, test_iter)
scheduler.step(val_l)
animator.add(epoch + 1, (None, None, test_acc_1), plot_title)
# Un-comment to see memory consumption, modify batch size to see effects
# print(os.popen('nvidia-smi').read())
# break
progress += f"----\nEpoch {epoch}/{num_epochs}\n\ttrain loss={train_l}[{train_acc_1}]\tval loss={val_l} [{test_acc_1}]\n----"
print(progress)
print(f'loss={train_l:.3f}, train=[1-acc {train_acc_1:.3f}, 5-acc {train_acc_5:.3f}]'
f'test=[1-acc {test_acc_1:.3f}, 5-acc {test_acc_5:.3f}]')
print(f'{metric[3] * num_epochs / timer.sum():.1f} examples/sec '
f'on {str(device)}')
print(f'total training time: {timer.sum()} seconds')
With this approach 34% training accuracy is achieved. Honestly, I was expecting more close to results obtained in MNIST. What is wrong with my current approach?

Pytorch - Loss for Object Localization

I am trying to perform an object localization task with MNIST based on Andrew Ng's lecture here. I am taking the MNIST digits and randomly placing them into a 90x90 shaped image and predicting the digit and it's center point. When I train, I am getting very poor results and my question is about whether or not my loss function is set up correctly. I basically just take the CrossEntropy for the digit, the MSE for the coordinates, and then add them all up. Is this correct? I don't get any errors, but the performance is just horrendous.
My dataset is defined as follows (which returns the label and the x y coordinates of the center of the digit):
class CustomMnistDataset_OL(Dataset):
def __init__(self, df, test=False):
'''
df is a pandas dataframe with 28x28 columns for each pixel value in MNIST
'''
self.df = df
self.test = test
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
if self.test:
image = np.reshape(np.array(self.df.iloc[idx,:]), (28,28)) / 255.
else:
image = np.reshape(np.array(self.df.iloc[idx,1:]), (28,28)) / 255.
# create the new image
new_img = np.zeros((90, 90)) # images will be 90x90
# randomly select a bottom left corner to use for img
x_min, y_min = randrange(90 - image.shape[0]), randrange(90 - image.shape[0])
x_max, y_max = x_min + image.shape[0], y_min + image.shape[0]
x_center = x_min + (x_max-x_min)/2
y_center = y_min + (y_max-x_min)/2
new_img[x_min:x_max, y_min:y_max] = image
label = [int(self.df.iloc[idx,0]), x_center, y_center] # the label consists of the digit and the center of the number
sample = {"image": new_img, "label": label}
return sample['image'], sample['label']
My training function is set up as follows:
loss_fn = nn.CrossEntropyLoss()
loss_mse = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
def train(dataloader, model, loss_fn, loss_mse, optimizer):
model.train() # very important... This turns the model back to training mode
size = len(train_dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
X, y0, y1, y2 = X.to(device), y[0].to(device), y[1].to(device), y[2].to(device)
pred = model(X.float())
# DEFINE LOSS HERE -------
loss = loss_fn(pred[0], y0) + loss_mse(pred[1], y1.float()) + loss_mse(pred[2], y2.float())
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch*len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

Pytorch:Apply cross entropy loss with custom weight map

I am solving multi-class segmentation problem using u-net architecture in pytorch.
As specified in U-NET paper, I am trying to implement custom weight maps to counter class imbalances.
Below is the opertion which I want to apply -
Also, I reduced the batch_size=1 so that I can remove that dimension while passing it to precompute_to_masks function.
I tried the below approach-
def precompute_for_image(masks):
masks = masks.cpu()
cls = masks.unique()
res = torch.stack([torch.where(masks==cls_val, torch.tensor(1), torch.tensor(0)) for cls_val in cls])
return res
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
###################
# train the model #
###################
model.train()
for batch_idx, (data, target) in enumerate(final_train_loader):
# move to GPU
if use_cuda:
data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
output = model(data)
temp_target = precompute_for_image(target)
w = weight_map(temp_target)
loss = criterion(output,target)
loss = w*loss
loss.backward()
optimizer.step()
train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
return model
where weight_map is the function to calculate weight mask which I got from here
The issue, I am facing is I am getting memory error when I apply the following method. I am using 61gb RAM and Tesla V100 GPU.
I really think I am applying it in incorrect way.
How to do it?
I am omitting the non-essential details from the training loop.
Below is my weight_map function:
from skimage.segmentation import find_boundaries
w0 = 10
sigma = 5
def make_weight_map(masks):
"""
Generate the weight maps as specified in the UNet paper
for a set of binary masks.
Parameters
----------
masks: array-like
A 3D array of shape (n_masks, image_height, image_width),
where each slice of the matrix along the 0th axis represents one binary mask.
Returns
-------
array-like
A 2D array of shape (image_height, image_width)
"""
nrows, ncols = masks.shape[1:]
masks = (masks > 0).astype(int)
distMap = np.zeros((nrows * ncols, masks.shape[0]))
X1, Y1 = np.meshgrid(np.arange(nrows), np.arange(ncols))
X1, Y1 = np.c_[X1.ravel(), Y1.ravel()].T
for i, mask in enumerate(masks):
# find the boundary of each mask,
# compute the distance of each pixel from this boundary
bounds = find_boundaries(mask, mode='inner')
X2, Y2 = np.nonzero(bounds)
xSum = (X2.reshape(-1, 1) - X1.reshape(1, -1)) ** 2
ySum = (Y2.reshape(-1, 1) - Y1.reshape(1, -1)) ** 2
distMap[:, i] = np.sqrt(xSum + ySum).min(axis=0)
ix = np.arange(distMap.shape[0])
if distMap.shape[1] == 1:
d1 = distMap.ravel()
border_loss_map = w0 * np.exp((-1 * (d1) ** 2) / (2 * (sigma ** 2)))
else:
if distMap.shape[1] == 2:
d1_ix, d2_ix = np.argpartition(distMap, 1, axis=1)[:, :2].T
else:
d1_ix, d2_ix = np.argpartition(distMap, 2, axis=1)[:, :2].T
d1 = distMap[ix, d1_ix]
d2 = distMap[ix, d2_ix]
border_loss_map = w0 * np.exp((-1 * (d1 + d2) ** 2) / (2 * (sigma ** 2)))
xBLoss = np.zeros((nrows, ncols))
xBLoss[X1, Y1] = border_loss_map
# class weight map
loss = np.zeros((nrows, ncols))
w_1 = 1 - masks.sum() / loss.size
w_0 = 1 - w_1
loss[masks.sum(0) == 1] = w_1
loss[masks.sum(0) == 0] = w_0
ZZ = xBLoss + loss
return ZZ
Traceback of the error-
MemoryError Traceback (most recent call last)
<ipython-input-30-f0a595b8de7e> in <module>
1 # train the model
2 model_scratch = train(20, final_train_loader, unet, optimizer,
----> 3 criterion, train_on_gpu, 'model_scratch.pt')
<ipython-input-29-b481b4f3120e> in train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path)
24 loss = criterion(output,target)
25 target.requires_grad = False
---> 26 w = make_weight_map(target)
27 loss = W*loss
28 loss.backward()
<ipython-input-5-e75a6281476f> in make_weight_map(masks)
33 X2, Y2 = np.nonzero(bounds)
34 xSum = (X2.reshape(-1, 1) - X1.reshape(1, -1)) ** 2
---> 35 ySum = (Y2.reshape(-1, 1) - Y1.reshape(1, -1)) ** 2
36 distMap[:, i] = np.sqrt(xSum + ySum).min(axis=0)
37 ix = np.arange(distMap.shape[0])
MemoryError:
Your final_train_loader provides you with an input image data and the expected pixel-wise labeling target. I assume (following pytorch's conventions) that data is of shape B-3-H-W and of dtype=torch.float.
More importantly, target is of shape B-H-W and of dtype=torch.long.
On the other hand make_weight_map expects its input to be C-H-W (with C = number of classes, NOT batch size), of type numpy array.
Try providing make_weight_map the input mask as it expects it and see if you get similar errors.
I also recommend that you visualize the resulting weight map - to make sure your function does what you expect it to do.

Tensorflow: Use softmax in training ,got result W,b value always as zero?

I tried to write a tensorflow code to train samples for the first time, but I seems the weight factor W and b are always zero after every step of training.
The training data are very simple, that are 10000 samples (x,y) when 00.3, y=1. I imported these data from a csv file.
Traing data sotred in csv file shown as follows(There are 10000 data in total):
0.487801884,1;
0.457740109,1;
0.092949029,-1;
0.704023173,1;
0.07851864,-1;
But when I run this code and print W and b in each step, I found W, b is always zero, seems like they are not been trained. Training result:
W= [[ 0. 0.]]
b= [ 0. 0.]
Epoch: 0000000001 cost= 0.821999985 W= [[ 0. 0.]] b= [ 0. 0.]
Optimization Finished!
Accuracy: 1.0
I'm confused, could anyone help me to find what the problem is?Thank you very much!
Code is attched here:
#coding=utf-8
import tensorflow as tf
import numpy
import os
import csv
#training data sotred in csv file
filename=open('D:\Program Files (x86)\logistic\sample.csv','r')
reader=csv.reader(filename)
t_X,t_Y=[],[]
for i in reader:
t_X.append(i[0])
t_Y.append(i[1])
t_X=numpy.asarray(t_X)
t_Y=numpy.asarray(t_Y)
t_XT=numpy.transpose([t_X])
t_YT=numpy.transpose([t_Y])
#Parameters
learning_rate = 0.01
training_epochs = 1
batch_size=50
display_step = 1
#Input
n_samples = t_X.shape[0]
#print "n_samples:",n_samples
x = tf.placeholder(tf.float32, [None, 1])
y = tf.placeholder(tf.float32, [None, 1])
#Weight
W = tf.Variable(tf.zeros([1, 2]))
b = tf.Variable(tf.zeros([2]))
#model
pred = tf.nn.softmax(tf.matmul(x, W) + b)
cost = tf.reduce_mean(tf.square(y-pred))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost=0
total_batch=int(n_samples/batch_size)
i=0
#read training data and transfer it into (m,n)
for anc in range(total_batch):
m=numpy.asarray([t_X[i],t_X[i+1],t_X[i+2],t_X[i+3],t_X[i+4]])
n=numpy.asarray([t_Y[i],t_Y[i+1],t_Y[i+2],t_Y[i+3],t_Y[i+4]])
m=numpy.transpose([m])
n=numpy.transpose([n])
_,c=sess.run([optimizer,cost], feed_dict={x: m, y: n})
i=i+batch_size
avg_cost += c/total_batch
if (epoch+1)%display_step==0:
print ("Epoch:",'%010d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost),"W=",sess.run(W),"b=",sess.run(b))
print ("Optimization Finished!")
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Accuracy:", accuracy.eval(feed_dict={x: t_XT, y: t_YT})
)
Jep, you could expect this problem with this weight initialisation:
#Weight
W = tf.Variable(tf.zeros([1, 2]))
b = tf.Variable(tf.zeros([2]))
Your weights should be initialised randomly ;)