Computation of loss is taking very much time on GPU in pytorch - deep-learning

I've been trying to implement DCGAN in pytorch. But during training, a single iteration of training loop takes more than 7-8 minutes on GPU on google collab. I can't understand what is wrong in the code. I have been trying many techniques to overcome this issue but nothing seems to be working..
Here's My training loop and it was taking more than 7-8 minutes on a single iteration:
device = torch.device("cuda:0")
dis = Discriminator().to(device)
Gen = Generator().to(device)
GAN_loss = nn.BCELoss().to(device)
D_optimizer = optim.Adam(dis.parameters(), lr = 0.0002, betas = (0.5, 0.999))
G_optimizer = optim.Adam(Gen.parameters(), lr = 0.0002, betas = (0.5, 0.999))
path = 'gdrive/My Drive/New_data/'
path2 = 'gdrive/My Drive/New_cropped/'
train_data_list = os.listdir(path)
train_data_len = len(train_data_list)
minibatch_size = 64
epochs = 10
G_losses = []
D_losses = []
final_itr = (train_data_len + minibatch_size - 1) // minibatch_size
data_list = [train_data_list[i * minibatch_size : (i + 1) * minibatch_size] for i in range(final_itr)]
for epoch in range(epochs):
for count, data in enumerate(data_list):
train_img = []
sample_img = []
for image in data:
img_train = cv2.imread(path + image).T/255
img_train = img_train.reshape(1, img_train.shape[0], img_train.shape[1], img_train.shape[2])
img_sample = cv2.imread(path2 + image,0).T/255
img_sample = img_sample.reshape(1, 1, img_sample.shape[0], img_sample.shape[1])
train_img.append(img_train)
sample_img.append(img_sample)
assert(img_sample.shape == (1, 1, 144, 144))
train_image = Variable(torch.from_numpy(np.concatenate(train_img, axis = 0)).cuda())
sample_image = Variable(torch.from_numpy(np.concatenate(sample_img, axis = 0)).cuda())
label = torch.full((train_image.shape[0],), real_label, device=device)
#Training the discriminator... minimizing -(log(D(x)) - log(1 - D(G(Z))))
dis.zero_grad()
Gen.zero_grad()
G_z = Gen(sample_image.detach())
disc_real_out = dis(train_image.detach()).view(-1)
error_real = GAN_loss(disc_real_out, label)
error_real.backward()
disc_fake_out = dis(G_z.detach()).view(-1)
label.fill_(fake_label)
error_fake = GAN_loss(disc_fake_out, label)
error_fake.backward()
total_disc_error = error_real + error_fake
D_optimizer.step()
#Training the Generator... maximizing log(D(G(Z))))
D_G_z = dis(G_z.detach()).view(-1)
label.fill_(real_label)
error_gen = GAN_loss(D_G_z, label)
error_gen.backward()
G_optimizer.step()
G_losses.append(error_gen.item())
D_losses.append(total_disc_error.item())
print("Discriminator Loss : ", total_disc_error.item(), "\t", "Generator Loss : ", error_gen.item())

Related

Models yields same prediction for all images in Inference Stage

I am using Transfer Learning (EfficientNet b0), to train a model, using Adam Optimizer and CrossEntropyLoss for a image classification task.
The model yields 96% val acc and 99% train accuracy. However Inference fails. All images tested always yield the same prediction values.
I tried changing the Learning Rate to a very small value, making the batch size smaller.
What am I doing wrong ?
Following is my train and infer code.
from efficientnet_pytorch import EfficientNet
from torch import nn
from torchvision import models
#using efficientnet model based transfer learning
class EffNet(nn.Module):
def __init__(self, numClasses):
self.numClasses = numClasses
self.effNet = {0: models.efficientnet_b0(pretrained = False, num_classes= self.numClasses),
1: models.efficientnet_b1(pretrained = False, num_classes= self.numClasses),
2: models.efficientnet_b2(pretrained = False, num_classes= self.numClasses),
3: models.efficientnet_b3(pretrained = False, num_classes= self.numClasses),
4: models.efficientnet_b4(pretrained = False, num_classes= self.numClasses),
5: models.efficientnet_b5(pretrained = False, num_classes= self.numClasses),
6: models.efficientnet_b6(pretrained = False, num_classes= self.numClasses),
7: models.efficientnet_b7(pretrained = False, num_classes= self.numClasses)
}
# self.effNet = {0: EfficientNet.from_name(model_name='efficientnet-b0', num_classes=self.numClasses),
# 1: EfficientNet.from_name(model_name='efficientnet-b1', num_classes=self.numClasses),
# 2: EfficientNet.from_name(model_name='efficientnet-b2', num_classes=self.numClasses),
# 3: EfficientNet.from_name(model_name='efficientnet-b3', num_classes=self.numClasses),
# 4: EfficientNet.from_name(model_name='efficientnet-b4', num_classes=self.numClasses),
# 5: EfficientNet.from_name(model_name='efficientnet-b5', num_classes=self.numClasses),
# 6: EfficientNet.from_name(model_name='efficientnet-b6', num_classes=self.numClasses),
# 7: EfficientNet.from_name(model_name='efficientnet-b7', num_classes=self.numClasses)
# }
def getEffnetClassification(self, num_layers, fine_tune):
effNet = self.effNet[num_layers]
if fine_tune:
print('[INFO]: Fine-tuning all layers...')
for params in effNet.parameters():
params.requires_grad = True
elif not fine_tune:
print('[INFO]: Freezing hidden layers...')
for params in effNet.parameters():
params.requires_grad = False
# # Change the final classification head.
n_features = effNet.classifier[1].in_features
effNet.classifier = nn.Linear(in_features=n_features, out_features=self.numClasses)
return effNet
from torchvision import models, transforms
import sys
from efficientNet import EffNet
class Network():
def __init__(self, num_classes):
self.numClasses = num_classes
def getNetwork(self, network_name, num_layers=2, fine_tune=False):
if "efficient" in network_name:
#network = EfficientNet.from_name(network_name)
nw = EffNet(self.numClasses)
print("In EffNet")
print(num_layers)
print(self.numClasses)
print(fine_tune)
network = nw.getEffnetClassification(num_layers=num_layers, fine_tune=fine_tune)
elif "resnet" in network_name:
nw = resNet()
network = nw.getResnetClassification(num_layers)
else:
try:
method = getattr(models, network_name)
except AttributeError:
raise NotImplementedError("Pytorch does not implement `{}`".format(method_name))
network = method(pretrained=False)
return network
Training Code::
print("Printing Phase")
print(phase)
torch.cuda.empty_cache()
gc.collect()
bestValLoss = float('inf')
bestAcc = 0
bestLoss = float('inf')
total_step = len(self.dataLoader[phase])
losses = list()
acc = list()
valLosses = list()
valAcc = list()
datadict = {}
if(patience!=None):
earlystop = EarlyStopping(patience = patience,verbose = True)
for epoch in range(self.epochs):
startTime = datetime.now()
print('Epoch {}/{}'.format(epoch+1, self.epochs))
print('-' * 10)
for phase in ['train', 'val']:
if phase == 'train':
self.model.train()
else:
self.model.eval()
running_loss = 0.0
running_corrects = 0
total=0
for batch_id, (imgName, inputs, labels) in enumerate(self.dataLoader[phase]): # Change Here
print("BatchID")
print(batch_id)
inputs = inputs.to(device)
labels = labels.to(device)
self.optimizer.zero_grad()
outputs = self.model(inputs)
#print(outputs)
#print(labels)
loss = self.criterion(outputs, labels.long())
##loss = self.criterion(outputs.float(), labels.float()) # This was changed only for BCEWithLogitsLoss. if you change Loss Type change to line above
if phase == 'train':
loss.backward()
self.optimizer.step()
_, preds = torch.max(outputs, 1)
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
total += labels.size(0)
epoch_loss = running_loss / len(self.dataLoader[phase])
epoch_acc = running_corrects.double() / total
print('{} loss: {:.4f}, acc: {:.4f}'.format(phase,
epoch_loss,
epoch_acc))
endTime = datetime.now()
print('{} loss: {:.4f}, acc: {:.4f}'.format(phase,
epoch_loss,
epoch_acc))
print('Time For Epoch {} :: {} seconds '.format(epoch, (endTime-startTime).total_seconds()))
losses.append(epoch_loss)
acc.append(epoch_acc.cpu().numpy().item())
print("Printing in Epoch")
print(losses)
print(acc)
if epoch_acc > bestAcc:
bestAcc = epoch_acc
bestModelWeights = copy.deepcopy(self.model.state_dict())
torch.save(bestModelWeights, self.path_model)
if epoch_loss < bestLoss:
bestLoss = epoch_loss
torch.save(self.model.state_dict(), self.path_model)
runningValAcc, runningValLoss = self.evalAndSave(bestValLoss, epoch)
valLosses.append(runningValLoss)
valAcc.append(runningValAcc.cpu().numpy().item())
print("Printing Val Data")
print(valLosses)
print(valAcc)
if runningValLoss < bestValLoss and patience:
earlystop(runningValLoss, self.model)
bestValLoss = runningValLoss
if earlystop.early_stop:
earlystop.save_checkpoint(runningValLoss, self.model)
print("Early Stopping")
break
print("Final Print in Trainer Before Return")
print("Training Losses")
print(losses)
print("Training Accuracy")
print(acc)
print("Validation Losses")
print(valLosses)
print("Validation Accuracy")
print(valAcc)
datadict["trainLoss"] = losses
datadict["trainAcc"] = acc
datadict["valLoss"] = valLosses
datadict["valAcc"] = valAcc
datadict["optimizer"] = self.optimizer
datadict["criterion"] = self.criterion
return self.model, datadict
Inference Script:::
with torch.no_grad():
self.model.eval()
#sm = nn.Softmax(dim = 1)
for batch_id, (imgPaths, inputs, labels) in enumerate(self.dataLoader):
for imgPath, image, label in zip(imgPaths, inputs, labels):
output= None
predictedClassName = None
predictions = None
topclass = None
topk = None
new_record = pd.DataFrame()
image = image.to(device)
label = label.to(device)
print("Printing Label")
print(label)
image_tensor = image.unsqueeze_(0)
output = self.model(image_tensor.cuda())
#print(output)
predictions = torch.exp(output.data)
#_, predictions = torch.max(output.data, 1)
#predictions = sm(output) #.softmax(output.data, dim=1)
topk, topclass = torch.max(predictions, 1)
print("Predictions")
print(predictions)
print("Checking")
print(topk)
print(topclass)
correct += torch.sum(topk == label.long()).cpu().numpy()
totalPredictions.extend(topk.cpu().numpy())
#print("Predictions")
#print(totalPredictions)
totalGT.extend([label.cpu().numpy().item()])
#print("Label")
#print([label.cpu().numpy()])
#print(label.cpu().numpy().item())
#k = output_.item()==label.item()
print("FileName")
print(imgPath)
className = self.dictClasses.get(label.item(), None)
predictedClassName = self.dictClasses.get(topclass.item(), None)
print("Predicted Class Names")
print(predictedClassName)
#print(predictedClassNameList)
predictedClassNameList.append(predictedClassName)
new_record = pd.DataFrame([[imgPath, topk.cpu().numpy().item(), predictedClassName]],columns=[ "FileName" , "Confidence" , "Classification"])
dfInfoTable = pd.concat([dfInfoTable,new_record])
cnt = cnt + 1
if cnt%100 == 0:
path = validationFigureLoc + "\\" + "ValidationOuput.csv"
dfInfoTable.to_csv(path, index=False, header=True)
print(cnt)

Regarding incorporating piecewise function in ode using octave code

Differential equation where M_v is a piecewise function
Differential equation where M_v is a piecewise function
Piecewise function
I have coded the differential equation in Octave using the RUnge-Kutta 4 (RK4) method. However, the result is not as desired due to the piecewise function being coded incorrectly in my view. Could anyone help to code piecewise function correctly to solve differential equation using rk4 method?
%vIRt code of the equations given in the manuscript
function x = pieceWise(s)
x = zeros (size (s));
ind = s > 0;
x(ind) = s(ind);
endfunction
% Defines variables
global tau_s taur_a beta_r I_ext J_inter J_intra Jr_a I_0 lp;
tau_s = 10; taur_a = 83; beta_r = 0.0175; Jr_a = 172.97; J_inter= 81; J_intra = 32.4; I_0= 0.29; I_ext = 20;
% step sizes
t0 = 0;
tfinal = 200;
h = 0.05;
n = ceil((tfinal-t0)/h)+1;
%initial conditions
s_r(1) = 0;
s_p(1) = 0.3556;
a_p(1) = 6.151;
a_r(1) = 0;
t(1) =0;
% functions handles
S_r = #(t,s_r,s_p,a_r,a_p) -(s_r/tau_s)+ beta_r*pieceWise(I_ext-J_intra*s_r-J_inter*s_p-a_r-I_0);
A_r = #(t,s_r,s_p,a_r,a_p) (-a_r+Jr_a*beta_r*pieceWise(I_ext-J_intra*s_r-J_inter*s_p-a_r-I_0))/taur_a;
S_p = #(t,s_r,s_p,a_p,a_r) -(s_p/tau_s)+ beta_r*pieceWise(I_ext-J_inter*s_r-J_intra*s_p-a_p-I_0);
A_p = #(t,s_r,s_p,a_p,a_r) (-a_p+Jr_a*beta_r*pieceWise((I_ext-J_inter*s_r-J_intra*s_p-a_p-I_0)))/taur_a;
for i = 1:n
%updates time
t(i+1) = t(i)+h;
%updates S_r, A_r, S_p and A_p
k1S_r = S_r(t(i), s_r(i), s_p(i), a_r(i), a_p(i));
k1A_r = A_r(t(i), s_r(i), s_p(i), a_r(i), a_p(i));
k1S_p = S_p(t(i), s_r(i), s_p(i), a_r(i), a_p(i));
k1A_p = A_p(t(i), s_r(i), s_p(i), a_r(i), a_p(i));
k2S_r = S_r(t(i)+h/2, s_r(i)+h/2*k1S_r, s_p(i)+h/2*k1S_p, a_r(i)+h/2*k1A_r, a_p(i)+h/2*k1A_p);
k2A_r = A_r(t(i)+h/2, s_r(i)+h/2*k1S_r, s_p(i)+h/2*k1S_p, a_r(i)+h/2*k1A_r, a_p(i)+h/2*k1A_p);
k2S_p = S_p(t(i)+h/2, s_r(i)+h/2*k1S_r, s_p(i)+h/2*k1S_p, a_r(i)+h/2*k1A_r, a_p(i)+h/2*k1A_p);
k2A_p = A_p(t(i)+h/2, s_r(i)+h/2*k1S_r, s_p(i)+h/2*k1S_p, a_r(i)+h/2*k1A_r, a_p(i)+h/2*k1A_p);
k3S_r = S_r(t(i)+h/2, s_r(i)+h/2*k2S_r, s_p(i)+h/2*k2S_p, a_r(i)+h/2*k2A_r, a_p(i)+h/2*k2A_p);
k3A_r = A_r(t(i)+h/2, s_r(i)+h/2*k2S_r, s_p(i)+h/2*k2S_p, a_r(i)+h/2*k2A_r, a_p(i)+h/2*k2A_p);
k3S_p = S_p(t(i)+h/2, s_r(i)+h/2*k2S_r, s_p(i)+h/2*k2S_p, a_r(i)+h/2*k2A_r, a_p(i)+h/2*k2A_p);
k3A_p = A_p(t(i)+h/2, s_r(i)+h/2*k2S_r, s_p(i)+h/2*k2S_p, a_r(i)+h/2*k2A_r, a_p(i)+h/2*k2A_p);
k4S_r = S_r(t(i)+h, s_r(i)+h*k3S_r, s_p(i)+h*k3S_p, a_r(i)+h*k3A_r, a_p(i)+h*k3A_p);
k4A_r = A_r(t(i)+h, s_r(i)+h*k3S_r, s_p(i)+h*k3S_p, a_r(i)+h*k3A_r, a_p(i)+h*k3A_p);
k4S_p = S_p(t(i)+h, s_r(i)+h*k3S_r, s_p(i)+h*k3S_p, a_r(i)+h*k3A_r, a_p(i)+h*k3A_p);
k4A_p = A_p(t(i)+h, s_r(i)+h*k3S_r, s_p(i)+h*k3S_p, a_r(i)+h*k3A_r, a_p(i)+h*k3A_p);
s_r(i+1) = s_r(i)+h/6*(k1S_r + 2*k2S_r + 2*k3S_r +k4S_r);
s_p(i+1) = s_p(i)+h/6*(k1S_p + 2*k2S_p + 2*k3S_p +k4S_p);
a_r(i+1) = a_r(i)+h/6*(k1A_r + 2*k2A_r + 2*k3A_r +k4A_r);
a_p(i+1) = a_p(i)+h/6*(k1A_p + 2*k2A_p + 2*k3A_p +k4A_p);
end
M_r = beta_r*pieceWise((I_ext-J_intra*s_r-J_inter*s_p-a_r-I_0));
M_p = beta_r*pieceWise((I_ext-J_inter*s_r-J_intra*s_p-a_p-I_0));
%plots
##plot(t,M_p)
##hold on
##plot(t,M_r)
##plot(t,s_r)
##hold on
##plot(t,s_p)
##hold on
##plot(t,a_p)
##hold on
##plot(t,a_r)
##title('a_p and a_r')
##legend('s_r', 's_p', 'position', 'best')
##xlim([0,200])
drawnow;
%print -deps rk4.eps

How to Create Parametric Survival Learner for MLR in R

I am following the instructions (https://mlr.mlr-org.com/articles/tutorial/create_learner.html) to create a parametric survival learner to use with MLR. My code is below.
When I try to make the MakeLearner(id = "AFT", "surv.parametric"), I get an error
dist is missing and no default is set even though I already specified the dist default in my code to be "weibull".
makeRLearner.surv.parametric = function() {
makeRLearnerSurv(
cl = "surv.parametric",
package = "survival",
par.set = makeParamSet(
makeDiscreteLearnerParam(id = "dist", default = "weibull",
values = c("weibull", "exponential", "lognormal", "loglogistic")),
),
properties = c("numerics", "factors", "weights", "prob", "rcens"),
name = "Parametric Survival Model",
short.name = "Parametric",
note = "This is created based on MLR3 surv.parametric learner"
)
}
trainLearner.surv.parametric = function (.learner, .task, .subset, .weights = NULL, ...)
{
f = getTaskFormula(.task)
data = getTaskData(.task, subset = .subset)
if (is.null(.weights)) {
mod = survival::survreg(formula = f, data = data, ...)
}
else {
mod = survival::survreg(formula = f, data = data, weights = .weights, ...)
}
mod
}
predictLearner.surv.parametric = function (.learner, .model, .newdata, ...)
{
survival::predict.survreg(.model$learner.model, newdata = .newdata, type = "response", ...)
}
Based on here, the prediction function needs to return linear predictors and that would be lp not response. Also, the cindex function of MLR does not seem to be consistent with the output of SurvReg. Based on this discussion, adding a minus seems to resolve the issue. So the prediction function would be as below.
predictLearner.surv.reg = function(.learner, .model, .newdata, ...) {
-predict(.model$learner.model, newdata = .newdata, type = "lp", ...)
}

PyTorch does not make initial weights random

I created a Neural Network that takes two greyscale images 14x14 pixels portraying a digit (from MNIST database) and returns 1 if the first digit is less or equal to the second digit, returns 0 otherwise. The code runs, but every time the initial weights are the same. They should be random
Forcing the initial weights to be random, by using the following line of code in the Net class, does not help.
torch.nn.init.normal_(self.layer1.weight, mean=0.0, std=0.01)
Here is the code of the "main.py" file:
import os; os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
import torch
import torch.nn as nn
from dlc_practical_prologue import *
class Net(nn.Module):
def __init__(self):
super().__init__()
self.layer1 = nn.Linear(2*14*14, 32)
#torch.nn.init.normal_(self.layer1.weight, mean=0.0, std=0.01)
#self.layer2 = nn.Linear(100, 100)
#self.layer3 = nn.Linear(100, 100)
self.layer2 = nn.Linear(32, 1)
def forward(self, x):
x = torch.relu(self.layer1(x))
#x = torch.relu(self.layer2(x))
#x = torch.relu(self.layer3(x))
x = torch.sigmoid(self.layer2(x))
return x
if __name__ == '__main__':
# Data initialization
N = 1000
train_input, train_target, train_classes, _, _, _, = generate_pair_sets(N)
_, _, _, test_input, test_target, test_classes = generate_pair_sets(N)
train_input = train_input.view(-1, 2*14*14)
test_input = test_input.view(-1, 2*14*14)
train_target = train_target.view(-1, 1)
test_target = test_target.view(-1, 1)
# I convert the type to torch.float32
train_input, train_target, train_classes, test_input, test_target, test_classes = \
train_input.type(torch.float32), train_target.type(torch.float32), train_classes.type(torch.long), \
test_input.type(torch.float32), test_target.type(torch.float32), test_classes.type(torch.long)
# Create the neural network
net = Net()
# Training
learning_rate = 0.01
# Use MSELoss
loss = nn.MSELoss()
# Use Adam optimizer
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
EPOCHS = 50
for param in net.parameters():
print(param)
for epoch in range(EPOCHS):
target_predicted = net(train_input)
l = loss(train_target, target_predicted) #loss = nn.MSELoss()
#l = loss(target_predicted, train_target)
l.backward()
optimizer.step()
optimizer.zero_grad()
#print(l)
# Testing
total = 1000
correct = 0
with torch.no_grad():
correct = ( test_target == net(test_input).round() ).sum()
print("Accuracy %.2f%%" % (correct / total * 100))
Here is the code for "dlc_practical_monologue.py":
import os; os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import torch
from torchvision import datasets
import argparse
import os
import urllib
######################################################################
parser = argparse.ArgumentParser(description='DLC prologue file for practical sessions.')
parser.add_argument('--full',
action='store_true', default=False,
help = 'Use the full set, can take ages (default False)')
parser.add_argument('--tiny',
action='store_true', default=False,
help = 'Use a very small set for quick checks (default False)')
parser.add_argument('--seed',
type = int, default = 0,
help = 'Random seed (default 0, < 0 is no seeding)')
parser.add_argument('--cifar',
action='store_true', default=False,
help = 'Use the CIFAR data-set and not MNIST (default False)')
parser.add_argument('--data_dir',
type = str, default = None,
help = 'Where are the PyTorch data located (default $PYTORCH_DATA_DIR or \'./data\')')
# Timur's fix
parser.add_argument('-f', '--file',
help = 'quick hack for jupyter')
args = parser.parse_args()
if args.seed >= 0:
torch.manual_seed(args.seed)
######################################################################
# The data
def convert_to_one_hot_labels(input, target):
tmp = input.new_zeros(target.size(0), target.max() + 1)
tmp.scatter_(1, target.view(-1, 1), 1.0)
return tmp
def load_data(cifar = None, one_hot_labels = False, normalize = False, flatten = True):
if args.data_dir is not None:
data_dir = args.data_dir
else:
data_dir = os.environ.get('PYTORCH_DATA_DIR')
if data_dir is None:
data_dir = './data'
if args.cifar or (cifar is not None and cifar):
print('* Using CIFAR')
cifar_train_set = datasets.CIFAR10(data_dir + '/cifar10/', train = True, download = True)
cifar_test_set = datasets.CIFAR10(data_dir + '/cifar10/', train = False, download = True)
train_input = torch.from_numpy(cifar_train_set.data)
train_input = train_input.transpose(3, 1).transpose(2, 3).float()
train_target = torch.tensor(cifar_train_set.targets, dtype = torch.int64)
test_input = torch.from_numpy(cifar_test_set.data).float()
test_input = test_input.transpose(3, 1).transpose(2, 3).float()
test_target = torch.tensor(cifar_test_set.targets, dtype = torch.int64)
else:
print('* Using MNIST')
######################################################################
# import torchvision
# raw_folder = data_dir + '/mnist/raw/'
# resources = [
# ("https://fleuret.org/dlc/data/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"),
# ("https://fleuret.org/dlc/data/train-labels-idx1-ubyte.gz", "d53e105ee54ea40749a09fcbcd1e9432"),
# ("https://fleuret.org/dlc/data/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"),
# ("https://fleuret.org/dlc/data/t10k-labels-idx1-ubyte.gz", "ec29112dd5afa0611ce80d1b7f02629c")
# ]
# os.makedirs(raw_folder, exist_ok=True)
# # download files
# for url, md5 in resources:
# filename = url.rpartition('/')[2]
# torchvision.datasets.utils.download_and_extract_archive(url, download_root=raw_folder, filename=filename, md5=md5)
######################################################################
mnist_train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
mnist_test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)
train_input = mnist_train_set.data.view(-1, 1, 28, 28).float()
train_target = mnist_train_set.targets
test_input = mnist_test_set.data.view(-1, 1, 28, 28).float()
test_target = mnist_test_set.targets
if flatten:
train_input = train_input.clone().reshape(train_input.size(0), -1)
test_input = test_input.clone().reshape(test_input.size(0), -1)
if args.full:
if args.tiny:
raise ValueError('Cannot have both --full and --tiny')
else:
if args.tiny:
print('** Reduce the data-set to the tiny setup')
train_input = train_input.narrow(0, 0, 500)
train_target = train_target.narrow(0, 0, 500)
test_input = test_input.narrow(0, 0, 100)
test_target = test_target.narrow(0, 0, 100)
else:
print('** Reduce the data-set (use --full for the full thing)')
train_input = train_input.narrow(0, 0, 1000)
train_target = train_target.narrow(0, 0, 1000)
test_input = test_input.narrow(0, 0, 1000)
test_target = test_target.narrow(0, 0, 1000)
print('** Use {:d} train and {:d} test samples'.format(train_input.size(0), test_input.size(0)))
if one_hot_labels:
train_target = convert_to_one_hot_labels(train_input, train_target)
test_target = convert_to_one_hot_labels(test_input, test_target)
if normalize:
mu, std = train_input.mean(), train_input.std()
train_input.sub_(mu).div_(std)
test_input.sub_(mu).div_(std)
return train_input, train_target, test_input, test_target
######################################################################
def mnist_to_pairs(nb, input, target):
input = torch.functional.F.avg_pool2d(input, kernel_size = 2)
a = torch.randperm(input.size(0))
a = a[:2 * nb].view(nb, 2)
input = torch.cat((input[a[:, 0]], input[a[:, 1]]), 1)
classes = target[a]
target = (classes[:, 0] <= classes[:, 1]).long()
return input, target, classes
######################################################################
def generate_pair_sets(nb):
if args.data_dir is not None:
data_dir = args.data_dir
else:
data_dir = os.environ.get('PYTORCH_DATA_DIR')
if data_dir is None:
data_dir = './data'
train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
train_input = train_set.data.view(-1, 1, 28, 28).float()
train_target = train_set.targets
test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)
test_input = test_set.data.view(-1, 1, 28, 28).float()
test_target = test_set.targets
return mnist_to_pairs(nb, train_input, train_target) + \
mnist_to_pairs(nb, test_input, test_target)
######################################################################
Note that I have to add the following line of code to run the code on Windows 10, while it is not necessary to run it on Linux.
import os; os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
Also on Linux I always get the same initial weights.
Please, can you help me?
Correct me if I'm wrong here but only the weights of the first layer should be the same each time you run this. The thing is when you import the dlc_practical_monologue.py there's this thing in it:
if args.seed >= 0:
torch.manual_seed(args.seed)
which fires up if the seed is >=0 (default is 0).
This should only initialize the first layer with the same weights for each run. Check if this is the case.
The solution was to delete the following lines from "dlv_practical_prologue.py":
if args.seed >= 0:
torch.manual_seed(args.seed)

Keras handling large dataset which cannot fit into memory

I'm working on facial expression recognition, and I'm using Keras. I've collected many datasets, and then I have applied data augmentation on the images, I've got about 500 000 images saved (as pixels) on a .csv file (same format as fer2013.csv).
This is the code I'm using :
def Zerocenter_ZCA_whitening_Global_Contrast_Normalize(list):
Intonumpyarray = numpy.asarray(list)
data = Intonumpyarray.reshape(img_width,img_height)
data2 = ZeroCenter(data)
data3 = zca_whitening(flatten_matrix(data2)).reshape(img_width,img_height)
data4 = global_contrast_normalize(data3)
data5 = numpy.rot90(data4,3)
return data5
def load_data():
train_x = []
train_y = []
val_x = []
val_y = []
test_x = []
test_y = []
f = open('ALL.csv')
csv_f = csv.reader(f)
for row in csv_f:
if str(row[2]) == "Training":
temp_list_train = []
for pixel in row[1].split():
temp_list_train.append(int(pixel))
data = Zerocenter_ZCA_whitening_Global_Contrast_Normalize(temp_list_train)
train_y.append(int(row[0]))
train_x.append(data.reshape(data_resh).tolist())
elif str(row[2]) == "PublicTest":
temp_list_validation = []
for pixel in row[1].split():
temp_list_validation.append(int(pixel))
data = Zerocenter_ZCA_whitening_Global_Contrast_Normalize(temp_list_validation)
val_y.append(int(row[0]))
val_x.append(data.reshape(data_resh).tolist())
elif str(row[2]) == "PrivateTest":
temp_list_test = []
for pixel in row[1].split():
temp_list_test.append(int(pixel))
data = Zerocenter_ZCA_whitening_Global_Contrast_Normalize(temp_list_test)
test_y.append(int(row[0]))
test_x.append(data.reshape(data_resh).tolist())
return train_x, train_y, val_x, val_y, test_x, test_y
And then I load data and feed them to the generator :
Train_x, Train_y, Val_x, Val_y, Test_x, Test_y = load_data()
Train_x = numpy.asarray(Train_x)
Train_x = Train_x.reshape(Train_x.shape[0],img_rows,img_cols)
Test_x = numpy.asarray(Test_x)
Test_x = Test_x.reshape(Test_x.shape[0],img_rows,img_cols)
Val_x = numpy.asarray(Val_x)
Val_x = Val_x.reshape(Val_x.shape[0],img_rows,img_cols)
Train_x = Train_x.reshape(Train_x.shape[0], img_rows, img_cols, 1)
Test_x = Test_x.reshape(Test_x.shape[0], img_rows, img_cols, 1)
Val_x = Val_x.reshape(Val_x.shape[0], img_rows, img_cols, 1)
Train_x = Train_x.astype('float32')
Test_x = Test_x.astype('float32')
Val_x = Val_x.astype('float32')
Train_y = np_utils.to_categorical(Train_y, nb_classes)
Test_y = np_utils.to_categorical(Test_y, nb_classes)
Val_y = np_utils.to_categorical(Val_y, nb_classes)
datagen = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True,
shear_range=0.03,
zoom_range=0.03,
vertical_flip=False)
datagen.fit(Train_x)
model.fit_generator(datagen.flow(Train_x, Train_y,
batch_size=batch_size),
samples_per_epoch=Train_x.shape[0],
nb_epoch=nb_epoch,
validation_data=(Val_x, Val_y))
When I run the code, RAM usage gets bigger and bigger until the pc freezes (I've have 16 Gb). It get stuck when loading_data() is called. Any solution for this problem that can fits my code ?
Seems to be a duplicate of this question. Basically, you'll have to use fit_generator() instead of fit() and pass in a function that loads the data into your model one batch at a time instead of all at once.