PyTorch: Confusion Matrix for Transfer Learning - deep-learning

I've been trying to plot a confusion matrix for the below code - check def train_alexnet(). But I keep getting this error:
IndexError: only integers, slices (`:`), ellipsis (`...`), None and long or byte Variables are valid indices (got float)
So, I tried converting my tensors to an integer tensor but then got the error:
ValueError: only one element tensors can be converted to Python scalars
Can someone suggest me what can be done to convert the tensors 'all_preds' and 'source_value' to tensors containing integer values? I found the torch no grad option but I am unaware as to how to use it because I'm new to pytorch.
Here's the link of the github repo that I'm trying to work with: https://github.com/syorami/DDC-transfer-learning/blob/master/DDC.py
from __future__ import print_function
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import warnings
warnings.filterwarnings('ignore')
import math
import model
import torch
import dataloader
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix
from plotcm import plot_confusion_matrix
from torch import nn
from torch import optim
from torch.autograd import Variable
cuda = torch.cuda.is_available()
def step_decay(epoch, learning_rate):
# learning rate step decay
# :param epoch: current training epoch
# :param learning_rate: initial learning rate
# :return: learning rate after step decay
initial_lrate = learning_rate
drop = 0.8
epochs_drop = 10.0
lrate = initial_lrate * math.pow(drop, math.floor((1 + epoch) / epochs_drop))
return lrate
def train_alexnet(epoch, model, learning_rate, source_loader):
# train source on alexnet
# :param epoch: current training epoch
# :param model: defined alexnet
# :param learning_rate: initial learning rate
# :param source_loader: source loader
# :return:
log_interval = 10
LEARNING_RATE = step_decay(epoch, learning_rate)
print(f'Learning Rate: {LEARNING_RATE}')
optimizer = optim.SGD([
{'params': model.features.parameters()},
{'params': model.classifier.parameters()},
{'params': model.final_classifier.parameters(), 'lr': LEARNING_RATE}
], lr=LEARNING_RATE / 10, momentum=MOMENTUM, weight_decay=L2_DECAY)
# enter training mode
model.train()
iter_source = iter(source_loader)
num_iter = len(source_loader)
correct = 0
total_loss = 0
clf_criterion = nn.CrossEntropyLoss()
all_preds = torch.tensor([])
source_value = torch.tensor([])
for i in range(1, num_iter):
source_data, source_label = iter_source.next()
# print("source label: ", source_label)
if cuda:
source_data, source_label = source_data.cuda(), source_label.cuda()
source_data, source_label = Variable(source_data), Variable(source_label)
optimizer.zero_grad()
##
source_preds = model(source_data)
preds = source_preds.data.max(1, keepdim=True)[1]
correct += preds.eq(source_label.data.view_as(preds)).sum()
#prediction label
all_preds = torch.cat(
(all_preds, preds)
,dim=0
)
#actual label
source_value = torch.cat(
(source_value,source_label)
,dim=0
)
loss = clf_criterion(source_preds, source_label)
total_loss += loss
loss.backward()
optimizer.step()
if i % log_interval == 0:
print('Train Epoch {}: [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, i * len(source_data), len(source_loader) * BATCH_SIZE,
100. * i / len(source_loader), loss.item()))
total_loss /= len(source_loader)
acc_train = float(correct) * 100. / (len(source_loader) * BATCH_SIZE)
# print('all preds= ',int(all_preds))
# print("source value", int(source_value))
stacked = torch.stack(
(
source_value
,(all_preds.argmax(dim=1))
)
,dim=1
)
print("stacked",stacked)
cmt = torch.zeros(3
,3, dtype=torch.float64)
with torch.no_grad():
for p in stacked:
tl, pl = p.tolist()
cmt[tl, pl] = cmt[tl, pl] + 1
print("cmt: ",cmt)
print('{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
SOURCE_NAME, total_loss.item(), correct, len(source_loader.dataset), acc_train))
def test_alexnet(model, target_loader):
# test target data on fine-tuned alexnet
# :param model: trained alexnet on source data set
# :param target_loader: target dataloader
# :return: correct num
# enter evaluation mode
clf_criterion = nn.CrossEntropyLoss()
model.eval()
test_loss = 0
correct = 0
for data, target in target_test_loader:
if cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
target_preds = model(data)
test_loss += clf_criterion(target_preds, target) # sum up batch loss
pred = target_preds.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
stacked = torch.stack(
(
target
,target_preds.argmax(dim=1)
)
,dim=1
)
print("stacked target",stacked)
test_loss /= len(target_loader)
print('{} set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
TARGET_NAME, test_loss.item(), correct, len(target_loader.dataset),
100. * correct / len(target_loader.dataset)))
return correct
def compute_confusion_matrix(preds, y):
#round predictions to the closest integer
rounded_preds = torch.round(torch.sigmoid(preds))
return confusion_matrix(y, rounded_preds)
if __name__ == '__main__':
ROOT_PATH = './v1234_combined/pets'
SOURCE_NAME = 'v123'
TARGET_NAME = 'v4'
BATCH_SIZE = 15
TRAIN_EPOCHS = 1
learning_rate = 1e-2
L2_DECAY = 5e-4
MOMENTUM = 0.9
source_loader = dataloader.load_training(ROOT_PATH, SOURCE_NAME, BATCH_SIZE)
#target_train_loader = dataloader.load_training(ROOT_PATH, TARGET_NAME, BATCH_SIZE)
target_test_loader = dataloader.load_testing(ROOT_PATH, TARGET_NAME, BATCH_SIZE)
print('Load data complete')
alexnet = model.Alexnet_finetune(num_classes=3)
print('Construct model complete')
# load pretrained alexnet model
alexnet = model.load_pretrained_alexnet(alexnet)
print('Load pretrained alexnet parameters complete\n')
if cuda: alexnet.cuda()
for epoch in range(1, TRAIN_EPOCHS + 1):
print(f'Train Epoch {epoch}:')
train_alexnet(epoch, alexnet, learning_rate, source_loader)
correct = test_alexnet(alexnet, target_test_loader)
print(len(source_loader.dataset))

In oder to conver all elements of a tensor from floats to ints, you need to use .to():
all_preds_int = all_preds.to(torch.int64)
Note that it appears as if your all_preds are the predicted class probabilities and not the actual labels. You might need to torch.argmax along the appropriate dimension. (BTW, the output of argmax is int - no need to convert).

Related

System exit 1 error after obtaining NaN losses from finetuning Mask R-CNN in Pytorch

I am following this tutorial from Pytorch for Finetuning a pre-trained model on my own dataset. I have my annotation in the COCO format in a json file, so, I first implemented the dataloader as follows:
import torch
import json
from torch.utils.data import Dataset
from pycocotools.coco import COCO
from PIL import Image
import os
import numpy as np
from torchvision import transforms
import Config
import transforms as T
from torchvision.transforms import functional as F
class CustomDataset(Dataset):
def __init__(self, root, json_file, transform=None):
self.root = root
with open(json_file) as f:
self.data = json.load(f)
self.transform = transform
self.image_ids = [img["id"] for img in self.data["images"]]
self.imgs = list(sorted(os.listdir(os.path.join(root, "Images"))))
self.masks = list(sorted(os.listdir(os.path.join(root, "Masks"))))
def __getitem__(self, idx):
# Get image ID
img_id = self.image_ids[idx]
img = next(image for image in self.data["images"] if image["id"] == img_id)
img_path = os.path.join(self.root, "Images")
mask_path = os.path.join(self.root, "Masks")
# Load image
image = Image.open(os.path.join(img_path, img['file_name'])).convert("RGB")
# extract annotations from the json file
annotations = [ann for ann in self.data["annotations"] if ann["image_id"] == img_id]
# extract labels from annotations
labels = [ann["label"] for ann in annotations]
# convert labels to integers
labels = [label for label in labels]
labels = torch.as_tensor(labels, dtype=torch.int64)
# extract boxes and convert them to format [x1, y1, x2, y2]
boxes = [ann["bbox"] for ann in annotations]
boxes = [[bbox[0], bbox[1], bbox[2], bbox[3]] for bbox in boxes]
num_objects = len(boxes)
# read the mask and include the number of objects in the first dimension
mask = np.array(Image.open(os.path.join(mask_path, img['file_name'])).convert("L"))
# Check if mask is empty
if mask.size == 0:
mask = np.zeros((num_objects, 1, 1), dtype=np.uint8)
else:
mask = np.expand_dims(mask, axis=0)
mask = np.repeat(mask, num_objects, axis=0)
# convert the binary mask array to a torch tensor
mask = torch.as_tensor(mask, dtype=torch.uint8)
# suppose all instances are not crowd
iscrowd = torch.zeros((num_objects,), dtype=torch.int64)
# convert bboxes to tensors
boxes = torch.as_tensor(boxes, dtype=torch.float32)
# calculate the area of the bounding box
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# convert id to tensor
image_id = torch.tensor([idx])
# create target dictionary
target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = mask
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
# apply the transform if any
if self.transform is not None:
image, target = self.transform(image, target)
return image, target
def __len__(self):
return len(self.imgs)
and I am using this code for training:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from engine import train_one_epoch
import utils
import transforms as T
from dataloader import CustomDataset
import Config
import torch
import utils
from tqdm import tqdm
from torch.optim.lr_scheduler import StepLR
from torchvision.transforms import functional as F
def get_instance_segmentation_model(num_classes):
# load an instance segmentation model pre-trained on COCO
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
# get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# now get the number of input features for the mask classifier
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
# and replace the mask predictor with a new one
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
hidden_layer,
num_classes)
return model
def get_transform(train):
transforms = []
# converts the image, a PIL image, into a PyTorch Tensor
transforms.append(T.PILToTensor())
if train:
# during training, randomly flip the training images
# and ground-truth for data augmentation
transforms.append(T.RandomHorizontalFlip(0.5))
return T.Compose(transforms)
json_path = 'annotations.json'
# use our dataset and defined transformations
dataset = CustomDataset(root = Config.Dataset_dir, json_file=json_path, transform = get_transform(train=True))
# for image, target in dataset:
# print(image.shape)
# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-500])
dataset_test = torch.utils.data.Subset(dataset, indices[-500:])
# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=1, shuffle=True, num_workers=4,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=4,
collate_fn=utils.collate_fn)
device = Config.DEVICE
# # our dataset has two classes only - background and person
num_classes = 2
# get the model using our helper function
model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.1,
momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.1)
# let's train it for 10 epochs
num_epochs = 10
for epoch in range(num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the test dataset
evaluate(model, data_loader_test, device=device)
This training code is as stated in the tutorial is using some helper functions which can be accessed from here. I have run the training code and the training is working for the first 10 samples in the data, but then it gives the following error:
Epoch: [0] [ 0/2759] eta: 13:29:50 lr: 0.000200 loss: -136.8811 (-136.8811) loss_classifier: 0.9397 (0.9397) loss_box_reg: 0.0017 (0.0017) loss_mask: -137.9142 (-137.9142) loss_objectness: 0.0859 (0.0859) loss_rpn_box_reg: 0.0057 (0.0057) time: 17.6117 data: 10.0775
Loss is nan, stopping training
{'loss_classifier': tensor(nan, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(nan, grad_fn=<DivBackward0>), 'loss_mask': tensor(nan, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_objectness': tensor(nan, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(nan, grad_fn=<DivBackward0>)}
An exception has occurred, use %tb to see the full traceback.
SystemExit: 1
This error is raised from the engine.py train_one_epoch function, especially from this part of the function:
with torch.cuda.amp.autocast(enabled=scaler is not None):
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
# reduce losses over all GPUs for logging purposes
loss_dict_reduced = utils.reduce_dict(loss_dict)
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
loss_value = losses_reduced.item()
if not math.isfinite(loss_value):
print(f"Loss is {loss_value}, stopping training")
print(loss_dict_reduced)
sys.exit(1)
Which indicates that the losses returned after the first loop are NaN ... What could be wrong here please? I am running out of ideas and don't know what's going wrong anymore.

issue with arcface ( 0 accuracy)

Hello guys I've joined a university-level image recognition competition.
In the test, they will give two images (people face) and my model need to detect pair of the image is the same person or not
My model is resnet18 with IR block and SE block. and it will use Arcface loss.
I can use only the MS1M dataset with a total of 86876 classes
The problem is that loss is getting better, but accuracy is 0 and not changing.
Here's part of code I'm working on.
Train
def train_model(model, net, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
for phase in ['train']:
if phase == 'train':
model.train() # Set model to training mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in notebook.tqdm(dataloader):
inputs = inputs.to(device)
labels = labels.to(device).long()
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
features = model(inputs)
outputs = net(features, labels)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / len(dataloader)
epoch_acc = running_corrects.double() / len(dataloader)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'train' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
torch.save({'epoch': epoch,
'mode_state_dict': model.state_dict(),
'fc_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scheduler': scheduler.state_dict(), # HERE IS THE CHANGE
}, f'/content/drive/MyDrive/inha_data/training_saver/training_stat{epoch}.pth')
print(f'finished {epoch} and saved model_save_{epoch}.pt')
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best train Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'model_save.pt')
return model
Parameters
train_dataset = MS1MDataset('train')
dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True,num_workers=4)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 디바이스 설정
num_classes = 86876
# normal classifier
# net = nn.Sequential(nn.Linear(512, num_classes))
# Feature extractor backbone, input is 112x112 image output is 512 feature vector
model_ft = resnet18(True)
#set metric
metric_fc = metrics.ArcMarginProduct(512, num_classes, s = 30.0, m = 0.50, easy_margin = False)
metric_fc.to(device)
# net = net.to(device)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = torch.optim.Adam([{'params': model_ft.parameters()}, {'params': metric_fc.parameters()}],
lr=0.1)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=4, gamma=0.1)
Arcface
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
import math
class ArcMarginProduct(nn.Module):
r"""Implement of large margin arc distance: :
Args:
in_features: size of each input sample
out_features: size of each output sample
s: norm of input feature
m: margin
cos(theta + m)
"""
def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
super(ArcMarginProduct, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.s = s
self.m = m
self.weight = Parameter(torch.FloatTensor(out_features, in_features))
nn.init.xavier_uniform_(self.weight)
self.easy_margin = easy_margin
self.cos_m = math.cos(m)
self.sin_m = math.sin(m)
self.th = math.cos(math.pi - m)
self.mm = math.sin(math.pi - m) * m
def forward(self, input, label):
# --------------------------- cos(theta) & phi(theta) ---------------------------
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = torch.where(cosine > 0, phi, cosine)
else:
phi = torch.where(cosine > self.th, phi, cosine - self.mm)
# --------------------------- convert label to one-hot ---------------------------
# one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
one_hot = torch.zeros(cosine.size(), device='cuda')
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
# -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4
output *= self.s
# print(output)
return output
dataset
data_transforms = {
'train': transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.125, contrast=0.125, saturation=0.125),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
#train_ms1_data = torchvision.datasets.ImageFolder('/content/drive/MyDrive/inha_data/train', transform = data_transforms)
class MS1MDataset(Dataset):
def __init__(self,split):
self.file_list = '/content/drive/MyDrive/inha_data/ID_List.txt'
self.images = []
self.labels = []
self.transformer = data_transforms['train']
with open(self.file_list) as f:
files = f.read().splitlines()
for i, fi in enumerate(files):
fi = fi.split()
image = "/content/" + fi[1]
label = int(fi[0])
self.images.append(image)
self.labels.append(label)
def __getitem__(self, index):
img = Image.open(self.images[index])
img = self.transformer(img)
label = self.labels[index]
return img, label
def __len__(self):
return len(self.images)
You can try to use a smaller m in ArcFace, even a minus value.

How to output the accuracy alongside with the loss when training the MNIST dataset after each epoch

from __future__ import print_function
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from tensorflow.examples.tutorials.mnist import input_data
import torch.optim as optim
import tensorflow.python.util.deprecation as deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False
import matplotlib.pyplot as plt
%matplotlib inline
from plot import plot_loss_and_acc
mnist = input_data.read_data_sets("MNIST_data", one_hot=False)
batch_size = 250
epoch_num = 10
lr = 0.0001
disp_freq = 20
def next_batch(train=True):
# Reads the next batch of MNIST images and labels and returns them
if train:
batch_img, batch_label = mnist.train.next_batch(batch_size)
else:
batch_img, batch_label = mnist.test.next_batch(batch_size)
batch_label = torch.from_numpy(batch_label).long() # convert the numpy array into torch tensor
batch_label = Variable(batch_label) # create a torch variable
batch_img = torch.from_numpy(batch_img).float() # convert the numpy array into torch tensor
batch_img = Variable(batch_img) # create a torch variable
return batch_img, batch_label
class MLP(nn.Module):
def __init__(self, n_features, n_classes):
super(MLP, self).__init__()
self.layer1 = nn.Linear(n_features, 128)
self.layer2 = nn.Linear(128, 128)
self.layer3 = nn.Linear(128, n_classes)
def forward(self, x, training=True):
# a neural network with 2 hidden layers
# x -> FC -> relu -> dropout -> FC -> relu -> dropout -> FC -> output
x = F.relu(self.layer1(x))
x = F.dropout(x, 0.5, training=training)
x = F.relu(self.layer2(x))
x = F.dropout(x, 0.5, training=training)
x = self.layer3(x)
return x
def predict(self, x):
# a function to predict the labels of a batch of inputs
x = F.softmax(self.forward(x, training=False))
return x
def accuracy(self, x, y):
# a function to calculate the accuracy of label prediction for a batch of inputs
# x: a batch of inputs
# y: the true labels associated with x
prediction = self.predict(x)
maxs, indices = torch.max(prediction, 1)
acc = 100 * torch.sum(torch.eq(indices.float(), y.float()).float())/y.size()[0]
print(acc.data)
return acc.data
# define the neural network (multilayer perceptron)
net = MLP(784, 10)
# calculate the number of batches per epoch
batch_per_ep = mnist.train.num_examples // batch_size
# define the loss (criterion) and create an optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=lr)
print(' ')
print("__________Training__________________")
xArray = []
yLoss = []
yAcc = []
for ep in range(epoch_num): # epochs loop
for batch_n in range(batch_per_ep): # batches loop
features, labels = next_batch()
# Reset gradients
optimizer.zero_grad()
# Forward pass
output = net(features)
loss = criterion(output, labels)
# Backward pass and updates
loss.backward() # calculate the gradients (backpropagation)
optimizer.step() # update the weights
if batch_n % disp_freq == 0:
print('epoch: {} - batch: {}/{} '.format(ep, batch_n, batch_per_ep))
xArray.append(ep)
yLoss.append(loss.data)
#yAcc.append(acc.data)
print('loss: ', loss.data)
print('__________________________________')
# test the accuracy on a batch of test data
features, labels = next_batch(train=False)
print("Result")
print('Test accuracy: ', net.accuracy(features, labels))
print('loss: ', loss.data)
accuracy = net.accuracy(features, labels)
#Loss Plot
# plotting the points
plt.plot(xArray, yLoss)
# naming the x axis
plt.xlabel('epoch')
# naming the y axis
plt.ylabel('loss')
# giving a title to my graph
plt.title('Loss Plot')
# function to show the plot
plt.show()
#Accuracy Plot
# plotting the points
plt.plot(xArray, yAcc)
# naming the x axis
plt.xlabel('epoch')
# naming the y axis
plt.ylabel(' accuracy')
# giving a title to my graph
plt.title('Accuracy Plot ')
# function to show the plot
plt.show()
I want to display the accuracy of my training dataset. I have managed to display and plot the loss but I didn't manage to do it for accuracy. I know I am missing 1 or 2 lines of code and I don't know how to do it.
I mean if I can display the accuracy alongside each epoch like the loss I can do the plotting myself.
Hi replace this code print('epoch: {} - batch: {}/{} '.format(ep, batch_n, batch_per_ep)) with
print('epoch: {} - batch: {}/{} - accuracy: {}'.format(ep, batch_n, batch_per_ep, net.accuracy(features,labels)))
Hope this helps.

Getting polynomial regression to overfit with TensorFlow

The Sklearn documentation contains an example of a polynomial regression which beautifully illustrates the idea of overfitting (link).
The third plot shows a 15th order polynomial that overfits the simulated data. I replicated this model in TensorFlow, but I cannot get it to overfit.
Even when tuning the learning rate and the numbers of learning epochs, I cannot get the model to overfit. What am I missing?
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
def true_fun(X):
return np.cos(1.5 * np.pi * X)
# Generate dataset
n_samples = 30
np.random.seed(0)
x_train = np.sort(np.random.rand(n_samples)) # Draw from uniform distribution
y_train = true_fun(x_train) + np.random.randn(n_samples) * 0.1
x_test = np.linspace(0, 1, 100)
y_true = true_fun(x_test)
# Helper function
def run_dir(base_dir, dirname='run'):
"Number log directories incrementally"
import os
import re
pattern = re.compile(dirname+'_(\d+)')
try:
previous_runs = os.listdir(base_dir)
except FileNotFoundError:
previous_runs = []
run_number = 0
for name in previous_runs:
match = pattern.search(name)
if match:
number = int(match.group(1))
if number > run_number:
run_number = number
run_number += 1
logdir = os.path.join(base_dir, dirname + '_%02d' % run_number)
return(logdir)
# Define the polynomial model
def model(X, w):
"""Polynomial model
param X: data
param y: coeficients in the polynomial regression
returns: Polynomial function Y(X, w)
"""
terms = []
for i in range(int(w.shape[0])):
term = tf.multiply(w[i], tf.pow(X, i))
terms.append(term)
return(tf.add_n(terms))
# Create the computation graph
order = 15
tf.reset_default_graph()
X = tf.placeholder("float")
Y = tf.placeholder("float")
w = tf.Variable([0.]*order, name="parameters")
lambda_reg = tf.placeholder('float', shape=[])
learning_rate_ph = tf.placeholder('float', shape=[])
y_model = model(X, w)
loss = tf.div(tf.reduce_mean(tf.square(Y-y_model)), 2) # Square error
loss_rg = tf.multiply(lambda_reg, tf.reduce_sum(tf.square(w))) # L2 pentalty
loss_total = tf.add(loss, loss_rg)
loss_hist1 = tf.summary.scalar('loss', loss)
loss_hist2 = tf.summary.scalar('loss_rg', loss_rg)
loss_hist3 = tf.summary.scalar('loss_total', loss_total)
summary = tf.summary.merge([loss_hist1, loss_hist2, loss_hist3])
train_op = tf.train.GradientDescentOptimizer(learning_rate_ph).minimize(loss_total)
init = tf.global_variables_initializer()
def train(sess, x_train, y_train, lambda_val=0, epochs=2000, learning_rate=0.01):
feed_dict={X: x_train, Y: y_train, lambda_reg: lambda_val, learning_rate_ph: learning_rate}
logdir = run_dir("logs/polynomial_regression2/")
writer = tf.summary.FileWriter(logdir)
sess.run(init)
for epoch in range(epochs):
_, summary_str = sess.run([train_op, summary], feed_dict=feed_dict)
writer.add_summary(summary_str, global_step=epoch)
final_cost, final_cost_rg, w_learned = sess.run([loss, loss_rg, w], feed_dict=feed_dict)
return final_cost, final_cost_rg, w_learned
def plot_test(w_learned, x_test, x_train, y_train):
y_learned = calculate_y(x_test, w_learned)
plt.scatter(x_train, y_train)
plt.plot(x_test, y_true, label="true function")
plt.plot(x_test, y_learned,'r', label="learned function")
#plt.title('$\lambda = {:03.2f}$'.format(lambda_values[i]))
plt.ylabel('y')
plt.xlabel('x')
plt.legend()
plt.show()
def calculate_y(x, w):
y = 0
for i in range(w.shape[0]):
y += w[i] * np.power(x, i)
return y
sess = tf.Session()
final_cost, final_cost_rg, w_learned = train(sess, x_train, y_train, lambda_val=0,
learning_rate=0.3, epochs=2000)
sess.close()
plot_test(w_learned, x_test, x_train, y_train)
I have same problem about this. When I do polynomial regression, I also can't overfit the data by using GD in Tensorflow.
Then I compare the coefficients(weights) of the model by using sklearn LinearRegression, I found when the polynomial degree is larger the coefficient of high order is very smaller(i.e. 1e-4), and the low order is relative large(i.e. 0.1).
That's mean when you using GD algorithm for searching the best value of weights, the high order coefficient become extreme sensitive about the value change, and the low order coefficient is not.
And I guess the best coefficient(overfit with data) of low order term is large, and of high order term is tiny. When you set large learning rate, it's impossible to find the right answer, and when you set tiny learning rate, you need lots of iterations.
It's obvious when you using GD algorithm with small data set to make overfit.

Simple Multilayer Perceptron model does not converge in TensorFlow

I am new to TensorFlow. Today I tried to implement my first model in TF but it returned strange results. I know that I am missing something here but I was not able to figure it out. Here is the story.
Model
I have a simple Multilayer Perceptron model with only a single hidden layer applied on MNIST databse. Layers are defined like [input(784) , hidden_layer(470) , output_layer(10)] with tanh as non-linearity for hidden layer and softmax as the loss for output layer. The optimizer I am using is Gradient Descent Algorithm with learning rate of 0.01. My mini batch size is 1 (I am training model with samples one by one).
My implementations :
First I implemented my model in C++ and got around 96% accuracy.Here is the repository : https://github.com/amin2ros/Artificog
I implemented the exact model in TensorFlow but surprisingly the model didn't converge at all. Here is the code.
Code:
import sys
import input_data
import matplotlib.pyplot as plt
from pylab import *
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
# Parameters
learning_rate = 0.1
training_epochs = 1
batch_size = 1
display_step = 1
# Network Parameters
n_hidden_1 = 470 # 1st layer num features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# Create model
def multilayer_perceptron(_X, _weights, _biases):
layer_1 = tf.tanh(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1']))
return tf.matmul(layer_1, _weights['out']) + _biases['out']
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax(pred)) # Softmax loss
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost) #
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
m= 0
total_batch = int(mnist.train.num_examples/batch_size)
counter=0
#print 'count = ' , total_batch
#sys.stdin.read(1)
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
label = tf.argmax(batch_ys,1).eval()[0]
counter+=1
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys})
wrong_prediction = tf.not_equal(tf.argmax(pred, 1), tf.argmax(y, 1))
missed=tf.cast(wrong_prediction, "float")
m += missed.eval({x: batch_xs, y: batch_ys})[0]
print "Sample #", counter , " - Label : " , label , " - Prediction :" , tf.argmax(pred, 1).eval({x: batch_xs, y: batch_ys})[0] ,\
"- Missed = " , m , " - Error Rate = " , 100 * float(m)/counter
print "Optimization Finished!"
I am very curious why this happens. Any help is appreciated.
Edit:
As commented below definition of cost function was incorrect so it should be like
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred,y))
Now model converges :)