Predicted Image id and box from SSD - deep-learning

How to find predicted image id and Box from SSD, I am using this GitHub link here is the test function which I want to save the image id and box
def test(loader, net, criterion, device):
net.eval()
running_loss = 0.0
running_regression_loss = 0.0
running_classification_loss = 0.0
num = 0
for _, data in enumerate(loader):
images, boxes, labels = data
images = images.to(device)
boxes = boxes.to(device)
labels = labels.to(device)
num += 1
with torch.no_grad():
confidence, locations = net(images)
regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)
loss = regression_loss + classification_loss
running_loss += loss.item()
running_regression_loss += regression_loss.item()
running_classification_loss += classification_loss.item()
return running_loss / num, running_regression_loss / num, run

Assume
y = net(x)
detections = y.data
you can print the detection info with the following
# this will loop over predictions class by class
for i in range(detections.size(1)):
# this will loop over each detection in the class
for j in range(detections.size(2)):
score = detection[0,i,j,0]
coords = detections[0,i,j,1:]
print(f"Class id: {i} \t Score: {score} \t Coords: {coords}")
See the demo for more details.

Related

Why am I facing OOM in dali/pytorch inference pipeline?

I've a trained resnet 50 model and I'm using it for inference of medical images in jpeg2000 format. I'm using dali pipeline to speed up the process. But, I run into OOM after inferring 10 batches. I'm deleting tensors after each batch inference, still facing OOM.
Batch: 32,
image_size: 512 * 512,
GPU: P100 16GB.
#pipeline_def
def j2k_decode_pipeline():
jpegs, labels = fn.readers.file(files = j2kfiles)
images = fn.experimental.decoders.image(jpegs, device='mixed', output_type=types.RGB, dtype=DALIDataType.INT16)
images = fn.resize(
images,
resize_x=IMAGE_SIZE,
resize_y=IMAGE_SIZE,
resize_z=3,
interp_type=types.INTERP_LANCZOS3)
return images, labels
def get_preds_jk2(max_batch_size):
cnt = math.ceil(len(j2kfiles)/max_batch_size)-1
pipe = j2k_decode_pipeline(batch_size=max_batch_size, num_threads=2, device_id=0, debug=True)
pipe.build()
dali_iter = DALIGenericIterator(pipe, ['data', 'label'])
model_weights_path = '/model-2/model_weights.pth'
model = models.resnet50()
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)
model.load_state_dict(torch.load(model_weights_path))
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()
preds, labels = [], []
for i, data in enumerate(dali_iter):
print('batch ', i)
labels.append(data[0]['label'])
# Testing correctness of labels
imgs = data[0]['data']
im_temp = imgs - imgs.min()
im_temp = im_temp / im_temp.max()
im_temp = im_temp * 255
imgs_8bit = im_temp.type(torch.uint8).float()
imgs_tensor = imgs_8bit.permute(0,3,1,2)
imgs_tensor = imgs_tensor.to(device)
del imgs, im_temp, imgs_8bit
gc.collect()
torch.cuda.empty_cache()
with torch.no_grad():
outputs = model(imgs_tensor)
output_probs = torch.nn.functional.softmax(outputs, dim=1).data.cpu().numpy()[:,1]
preds.append(output_probs)
del imgs_tensor, outputs, output_probs
gc.collect()
torch.cuda.empty_cache()
if i == cnt:
print(i)
break
return preds, labels

RuntimeError: shape '[128, -1]' is invalid for input of size 378 pytorch

I'm running a spiking neural network for data that has 21 features with a batch size of 128. I get the following error after many iterations of training (this error doesn't arise immediately!):
RuntimeError: shape '[128, -1]' is invalid for input of size 378 pytorch
When I went to go print out what the shapes of the tensors are before, I get the following:
Train
torch.Size([128, 21])
Test
torch.Size([128, 21])
This is my network:
class SpikingNeuralNetwork(nn.Module):
"""
Parameters in SpikingNeuralNetwork class:
1. number_inputs: Number of inputs to the SNN.
2. number_hidden: Number of hidden layers.
3. number_outputs: Number of output classes.
4. beta: Decay rate.
"""
def __init__(self, number_inputs, number_hidden, number_outputs, beta):
super().__init__()
self.number_inputs = number_inputs
self.number_hidden = number_hidden
self.number_outputs = number_outputs
self.beta = beta
# Initialize layers
self.fc1 = nn.Linear(self.number_inputs, self.number_hidden) # Applies linear transformation to all input points
self.lif1 = snn.Leaky(beta = self.beta) # Integrates weighted input over time, emitting a spike if threshold condition is met
self.fc2 = nn.Linear(self.number_hidden, self.number_outputs) # Applies linear transformation to output spikes of lif1
self.lif2 = snn.Leaky(beta = self.beta) # Another spiking neuron, integrating the weighted spikes over time
"""
Forward propagation of SNN. The code below function will only be called once the input argument x
is explicitly passed into net.
#param x: input passed into the network
#return layer of output after applying final spiking neuron
"""
def forward(self, x):
num_steps = 25
# Initialize hidden states at t = 0
mem1 = self.lif1.init_leaky()
mem2 = self.lif2.init_leaky()
# Record the final layer
spk2_rec = []
mem2_rec = []
for step in range(num_steps):
cur1 = self.fc1(x)
spk1, mem1 = self.lif1(cur1, mem1)
cur2 = self.fc2(spk1)
spk2, mem2 = self.lif2(cur2, mem2)
spk2_rec.append(spk2)
mem2_rec.append(mem2)
return torch.stack(spk2_rec, dim = 0), torch.stack(mem2_rec, dim = 0)
This is my training loop:
def training_loop(net, train_loader, test_loader, dtype, device, optimizer):
num_epochs = 1
loss_history = []
test_loss_history = []
counter = 0
# Temporal dynamics
num_steps = 25
# Outer training loop
for epoch in range(num_epochs):
iter_counter = 0
train_batch = iter(train_loader)
# Minibatch training loop
for data, targets in train_batch:
data = data.to(device)
targets = targets.to(device)
# Forward pass
net.train()
print("Train")
print(data.size())
spk_rec, mem_rec = net(data.view(batch_size, -1))
# Initialize the loss and sum over time
loss_val = torch.zeros((1), dtype = dtype, device = device)
for step in range(num_steps):
loss_val += loss_function(mem_rec[step], targets.long().flatten().to(device))
# Gradient calculation and weight update
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
# Store loss history for future plotting
loss_history.append(loss_val.item())
# Test set
with torch.no_grad():
net.eval()
test_data, test_targets = next(iter(test_loader))
test_data = test_data.to(device)
test_targets = test_targets.to(device)
# Test set forward pass
print("Test")
print(test_data.size())
test_spk, test_mem = net(test_data.view(batch_size, -1))
# Test set loss
test_loss = torch.zeros((1), dtype = dtype, device = device)
for step in range(num_steps):
test_loss += loss_function(test_mem[step], test_targets.long().flatten().to(device))
test_loss_history.append(test_loss.item())
# Print train/test loss and accuracy
if counter % 50 == 0:
train_printer(epoch, iter_counter, counter, loss_history, data, targets, test_data, test_targets)
counter = counter + 1
iter_counter = iter_counter + 1
return loss_history, test_loss_history
The error occurs on spk_rec, mem_rec = net(data.view(batch_size, -1)).
The code was adopted from https://snntorch.readthedocs.io/en/latest/tutorials/tutorial_5.html, where it was originally used for the MNIST dataset. However, I am not working with an image dataset. I am working with a dataset that has 21 features and predicts just one target (with 100 classes). I tried to change data.view(batch_size, -1) and test_data.view(batch_size, -1) to data.view(batch_size, 21) and test_data.view(batch_size, 21) based on some other forum answers that I saw, and my program is running for now through the training loop. Does anyone have any suggestions for how I can run through the training with no errors?
EDIT: I now get the error RuntimeError: shape '[128, 21]' is invalid for input of size 378 from spk_rec, mem_rec = net(data.view(batch_size, -1)).
Here are my DataLoaders:
train_loader = DataLoader(dataset = train, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test, batch_size = batch_size, shuffle = True)
My batch size is 128.
Tryng to run it by myself to try to solve your problem I luck also: net params and snn.snn.Leaky
import torch
from torch import nn
from torch.utils.data import DataLoader
class SpikingNeuralNetwork(nn.Module):
"""
Parameters in SpikingNeuralNetwork class:
1. number_inputs: Number of inputs to the SNN.
2. number_hidden: Number of hidden layers.
3. number_outputs: Number of output classes.
4. beta: Decay rate.
"""
def __init__(self, number_inputs, number_hidden, number_outputs, beta):
super().__init__()
self.number_inputs = number_inputs
self.number_hidden = number_hidden
self.number_outputs = number_outputs
self.beta = beta
# Initialize layers
self.fc1 = nn.Linear(self.number_inputs,
self.number_hidden) # Applies linear transformation to all input points
self.lif1 = snn.Leaky(
beta=self.beta) # Integrates weighted input over time, emitting a spike if threshold condition is met
self.fc2 = nn.Linear(self.number_hidden,
self.number_outputs) # Applies linear transformation to output spikes of lif1
self.lif2 = snn.Leaky(beta=self.beta) # Another spiking neuron, integrating the weighted spikes over time
"""
Forward propagation of SNN. The code below function will only be called once the input argument x
is explicitly passed into net.
#param x: input passed into the network
#return layer of output after applying final spiking neuron
"""
def forward(self, x):
num_steps = 25
# Initialize hidden states at t = 0
mem1 = self.lif1.init_leaky()
mem2 = self.lif2.init_leaky()
# Record the final layer
spk2_rec = []
mem2_rec = []
for step in range(num_steps):
cur1 = self.fc1(x)
spk1, mem1 = self.lif1(cur1, mem1)
cur2 = self.fc2(spk1)
spk2, mem2 = self.lif2(cur2, mem2)
spk2_rec.append(spk2)
mem2_rec.append(mem2)
return torch.stack(spk2_rec, dim=0), torch.stack(mem2_rec, dim=0)
batch_size = 2
train = torch.rand(128, 21)
test = torch.rand(128, 21)
train_loader = DataLoader(dataset=train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test, batch_size=batch_size, shuffle=True)
net = SpikingNeuralNetwork(number_inputs=1)
loss_function = nn.CrossEntropyLoss()
optimizer = nn.optim.Adam(net.parameters(), lr=0.1)
def training_loop(net, train_loader, test_loader, dtype, device, optimizer):
num_epochs = 1
loss_history = []
test_loss_history = []
counter = 0
# Temporal dynamics
num_steps = 25
# Outer training loop
for epoch in range(num_epochs):
iter_counter = 0
train_batch = iter(train_loader)
# Minibatch training loop
for data, targets in train_batch:
data = data.to(device)
targets = targets.to(device)
# Forward pass
net.train()
print("Train")
print(data.size())
spk_rec, mem_rec = net(data.view(batch_size, -1))
# Initialize the loss and sum over time
loss_val = torch.zeros((1), dtype=dtype, device=device)
for step in range(num_steps):
loss_val += loss_function(mem_rec[step], targets.long().flatten().to(device))
# Gradient calculation and weight update
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
# Store loss history for future plotting
loss_history.append(loss_val.item())
# Test set
with torch.no_grad():
net.eval()
test_data, test_targets = next(iter(test_loader))
test_data = test_data.to(device)
test_targets = test_targets.to(device)
# Test set forward pass
print("Test")
print(test_data.size())
test_spk, test_mem = net(test_data.view(batch_size, -1))
# Test set loss
test_loss = torch.zeros((1), dtype=dtype, device=device)
for step in range(num_steps):
test_loss += loss_function(test_mem[step], test_targets.long().flatten().to(device))
test_loss_history.append(test_loss.item())
# Print train/test loss and accuracy
if counter % 50 == 0:
train_printer(epoch, iter_counter, counter, loss_history, data, targets, test_data, test_targets)
counter = counter + 1
iter_counter = iter_counter + 1
return loss_history, test_loss_history
Your code works just fine on the MNIST dataset, so I think it might be a problem with how the DataLoader is being called. My guess is that the total dataset is not evenly divisible by your batch_size. If this is true, then you have two options:
Instead of spk_rec, mem_rec = net(data.view(batch_size, -1)), try spk_rec, mem_rec = net(data.flatten(1)) which preserves the first dimension of your data.
Alternatively, you may need to set drop_last=True in the DataLoader functions.

issue with arcface ( 0 accuracy)

Hello guys I've joined a university-level image recognition competition.
In the test, they will give two images (people face) and my model need to detect pair of the image is the same person or not
My model is resnet18 with IR block and SE block. and it will use Arcface loss.
I can use only the MS1M dataset with a total of 86876 classes
The problem is that loss is getting better, but accuracy is 0 and not changing.
Here's part of code I'm working on.
Train
def train_model(model, net, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
for phase in ['train']:
if phase == 'train':
model.train() # Set model to training mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in notebook.tqdm(dataloader):
inputs = inputs.to(device)
labels = labels.to(device).long()
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
features = model(inputs)
outputs = net(features, labels)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step()
epoch_loss = running_loss / len(dataloader)
epoch_acc = running_corrects.double() / len(dataloader)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'train' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
torch.save({'epoch': epoch,
'mode_state_dict': model.state_dict(),
'fc_state_dict': net.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scheduler': scheduler.state_dict(), # HERE IS THE CHANGE
}, f'/content/drive/MyDrive/inha_data/training_saver/training_stat{epoch}.pth')
print(f'finished {epoch} and saved model_save_{epoch}.pt')
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best train Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
torch.save(model.state_dict(), 'model_save.pt')
return model
Parameters
train_dataset = MS1MDataset('train')
dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True,num_workers=4)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 디바이스 설정
num_classes = 86876
# normal classifier
# net = nn.Sequential(nn.Linear(512, num_classes))
# Feature extractor backbone, input is 112x112 image output is 512 feature vector
model_ft = resnet18(True)
#set metric
metric_fc = metrics.ArcMarginProduct(512, num_classes, s = 30.0, m = 0.50, easy_margin = False)
metric_fc.to(device)
# net = net.to(device)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = torch.optim.Adam([{'params': model_ft.parameters()}, {'params': metric_fc.parameters()}],
lr=0.1)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=4, gamma=0.1)
Arcface
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
import math
class ArcMarginProduct(nn.Module):
r"""Implement of large margin arc distance: :
Args:
in_features: size of each input sample
out_features: size of each output sample
s: norm of input feature
m: margin
cos(theta + m)
"""
def __init__(self, in_features, out_features, s=30.0, m=0.50, easy_margin=False):
super(ArcMarginProduct, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.s = s
self.m = m
self.weight = Parameter(torch.FloatTensor(out_features, in_features))
nn.init.xavier_uniform_(self.weight)
self.easy_margin = easy_margin
self.cos_m = math.cos(m)
self.sin_m = math.sin(m)
self.th = math.cos(math.pi - m)
self.mm = math.sin(math.pi - m) * m
def forward(self, input, label):
# --------------------------- cos(theta) & phi(theta) ---------------------------
cosine = F.linear(F.normalize(input), F.normalize(self.weight))
sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = torch.where(cosine > 0, phi, cosine)
else:
phi = torch.where(cosine > self.th, phi, cosine - self.mm)
# --------------------------- convert label to one-hot ---------------------------
# one_hot = torch.zeros(cosine.size(), requires_grad=True, device='cuda')
one_hot = torch.zeros(cosine.size(), device='cuda')
one_hot.scatter_(1, label.view(-1, 1).long(), 1)
# -------------torch.where(out_i = {x_i if condition_i else y_i) -------------
output = (one_hot * phi) + ((1.0 - one_hot) * cosine) # you can use torch.where if your torch.__version__ is 0.4
output *= self.s
# print(output)
return output
dataset
data_transforms = {
'train': transforms.Compose([
transforms.RandomHorizontalFlip(),
transforms.ColorJitter(brightness=0.125, contrast=0.125, saturation=0.125),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
#train_ms1_data = torchvision.datasets.ImageFolder('/content/drive/MyDrive/inha_data/train', transform = data_transforms)
class MS1MDataset(Dataset):
def __init__(self,split):
self.file_list = '/content/drive/MyDrive/inha_data/ID_List.txt'
self.images = []
self.labels = []
self.transformer = data_transforms['train']
with open(self.file_list) as f:
files = f.read().splitlines()
for i, fi in enumerate(files):
fi = fi.split()
image = "/content/" + fi[1]
label = int(fi[0])
self.images.append(image)
self.labels.append(label)
def __getitem__(self, index):
img = Image.open(self.images[index])
img = self.transformer(img)
label = self.labels[index]
return img, label
def __len__(self):
return len(self.images)
You can try to use a smaller m in ArcFace, even a minus value.

Pytorch - Loss for Object Localization

I am trying to perform an object localization task with MNIST based on Andrew Ng's lecture here. I am taking the MNIST digits and randomly placing them into a 90x90 shaped image and predicting the digit and it's center point. When I train, I am getting very poor results and my question is about whether or not my loss function is set up correctly. I basically just take the CrossEntropy for the digit, the MSE for the coordinates, and then add them all up. Is this correct? I don't get any errors, but the performance is just horrendous.
My dataset is defined as follows (which returns the label and the x y coordinates of the center of the digit):
class CustomMnistDataset_OL(Dataset):
def __init__(self, df, test=False):
'''
df is a pandas dataframe with 28x28 columns for each pixel value in MNIST
'''
self.df = df
self.test = test
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
if self.test:
image = np.reshape(np.array(self.df.iloc[idx,:]), (28,28)) / 255.
else:
image = np.reshape(np.array(self.df.iloc[idx,1:]), (28,28)) / 255.
# create the new image
new_img = np.zeros((90, 90)) # images will be 90x90
# randomly select a bottom left corner to use for img
x_min, y_min = randrange(90 - image.shape[0]), randrange(90 - image.shape[0])
x_max, y_max = x_min + image.shape[0], y_min + image.shape[0]
x_center = x_min + (x_max-x_min)/2
y_center = y_min + (y_max-x_min)/2
new_img[x_min:x_max, y_min:y_max] = image
label = [int(self.df.iloc[idx,0]), x_center, y_center] # the label consists of the digit and the center of the number
sample = {"image": new_img, "label": label}
return sample['image'], sample['label']
My training function is set up as follows:
loss_fn = nn.CrossEntropyLoss()
loss_mse = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
def train(dataloader, model, loss_fn, loss_mse, optimizer):
model.train() # very important... This turns the model back to training mode
size = len(train_dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
X, y0, y1, y2 = X.to(device), y[0].to(device), y[1].to(device), y[2].to(device)
pred = model(X.float())
# DEFINE LOSS HERE -------
loss = loss_fn(pred[0], y0) + loss_mse(pred[1], y1.float()) + loss_mse(pred[2], y2.float())
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch*len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

Caffe's transformer.preprocessing takes too long to complete

I wrote a simple script to test a model using PyCaffe, but I noticed it is extremely slow! even on GPU! My test set has 82K samples of size 256x256 and when I ran the code which is given below, it takes hours to complete.
I even used batches of images instead of individual ones, yet nothing changes. Currently, it has been running for the past 5 hours, and only 50K samples are processed! What should I do to make it faster?
Can I completely avoid using transformer.preprocessing? if so how?
Here is the snippet:
#run on gpu
caffe.set_mode_gpu()
#Extract mean from the mean image file
mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
f = open(args.mean, 'rb')
mean_blobproto_new.ParseFromString(f.read())
mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
f.close()
predicted_lables = []
true_labels = []
misclassified =[]
class_names = ['unsafe','safe']
count = 0
correct = 0
batch=[]
plabe_ls = []
batch_size = 50
net1 = caffe.Net(args.proto, args.model, caffe.TEST)
transformer = caffe.io.Transformer({'data': net1.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mean_image[0].mean(1).mean(1))
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2,1,0))
net1.blobs['data'].reshape(batch_size, 3,224, 224)
data_blob_shape = net1.blobs['data'].data.shape
data_blob_shape = list(data_blob_shape)
i=0
mu = np.array([ 104, 117, 123])#imagenet mean
#check and see if its lmdb or leveldb
if(args.db_type.lower() == 'lmdb'):
lmdb_env = lmdb.open(args.db_path)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
count += 1
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
if(count % 5000 == 0):
print('count: ',count)
if(i < batch_size):
i+=1
inf= key,image,label
batch.append(inf)
if(i >= batch_size):
#process n image
ims=[]
for x in range(len(batch)):
ims.append(transformer.preprocess('data',batch[x][1]))# - mean_image[0].mean(1).mean(1) )
net1.blobs['data'].data[...] = ims[:]
out_1 = net1.forward()
plbl = np.asarray( out_1['pred'])
plbl = plbl.argmax(axis=1)
for j in range(len(batch)):
if (plbl[j] == batch[j][2]):
correct+=1
else:
misclassified.append(batch[j][0])
predicted_lables.append(plbl[j])
true_labels.append(batch[j][2])
batch.clear()
i=0
Update:
By replacing
for x in range(len(batch)):
ims.append(transformer.preprocess('data',batch[x][1]))
net1.blobs['data'].data[...] = ims[:]
with
for x in range(len(batch)):
img = batch[x][1]
ims.append(img[:,0:224,0:224])
82K samples were processed in less than a minute. The culprit is indeed the preprocess method and I have no idea why it acts like this!
Anyway, I can't use mean file this way. I tried to do
ims.append(img[:,0:224,0:224] - mean.mean(1).mean(1))
as well but faced with this error:
ValueError: operands could not be broadcast together with shapes (3,224,224) (3,)
I also need to find a better way for cropping the image, I don't know if I need to resize it back to 224? or I should use crops just like caffe?
I finally made it! here is the code that runs much faster:
predicted_lables=[]
true_labels = []
misclassified =[]
class_names = ['unsafe','safe']
count =0
correct = 0
batch = []
plabe_ls = []
batch_size = 50
cropx = 224
cropy = 224
i = 0
# Extract mean from the mean image file
mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
f = open(args.mean, 'rb')
mean_blobproto_new.ParseFromString(f.read())
mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
f.close()
caffe.set_mode_gpu()
net1 = caffe.Net(args.proto, args.model, caffe.TEST)
net1.blobs['data'].reshape(batch_size, 3, 224, 224)
data_blob_shape = net1.blobs['data'].data.shape
#check and see if its lmdb or leveldb
if(args.db_type.lower() == 'lmdb'):
lmdb_env = lmdb.open(args.db_path)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
count += 1
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.float32)
#key,image,label
#buffer n image
if(count % 5000 == 0):
print('{0} samples processed so far'.format(count))
if(i < batch_size):
i += 1
inf= key,image,label
batch.append(inf)
#print(key)
if(i >= batch_size):
#process n image
ims=[]
for x in range(len(batch)):
img = batch[x][1]
#img has c,h,w shape! its already gone through transpose
#and channel swap when it was being saved into lmdb!
#method I: crop the both the image and mean file
#ims.append(img[:,0:224,0:224] - mean_image[0][:,0:224,0:224] )
#Method II : resize the image to the desired size(crop size)
#img = caffe.io.resize_image(img.transpose(2,1,0), (224, 224))
#Method III : use center crop just like caffe does in test time
#center crop
c,w,h = img.shape
startx = h//2 - cropx//2
starty = w//2 - cropy//2
img = img[:, startx:startx + cropx, starty:starty + cropy]
#transpose the image so we can subtract from mean
img = img.transpose(2,1,0)
img -= mean_image[0].mean(1).mean(1)
#transpose back to the original state
img = img.transpose(2,1,0)
ims.append(img)
net1.blobs['data'].data[...] = ims[:]
out_1 = net1.forward()
plabe_ls = out_1['pred']
plbl = np.asarray(plabe_ls)
plbl = plbl.argmax(axis=1)
for j in range(len(batch)):
if (plbl[j] == batch[j][2]):
correct += 1
else:
misclassified.append(batch[j][0])
predicted_lables.append(plbl[j])
true_labels.append(batch[j][2])
batch.clear()
i = 0
Though I'm not getting the exact accuracy, but pretty close to it (out of 98.65 I get 98.61%! I don't know what is causing this difference !)
Update
The reason transformer.preprocess took too long to complete was because of its resize_image() method. resize_image needs the image to be in the form of of H,W,C, whereas the images have already been transposed and channelswapped (to the form of c,w,h) in my case (I was reading off an lmdb dataset), and this caused the resize_image() to resort to its slowest method of resizing the image thus taking 0.6 second for each image to be processed.
Now knowing this, transposing the image into the correct dimensions, would solve this issue. Meaning I had to do:
ims.append(transformer.preprocess('data',img.transpose(2,1,0)))
Note that, still it is slower than the above method, but it's much much faster than before!