Caffe's transformer.preprocessing takes too long to complete - caffe

I wrote a simple script to test a model using PyCaffe, but I noticed it is extremely slow! even on GPU! My test set has 82K samples of size 256x256 and when I ran the code which is given below, it takes hours to complete.
I even used batches of images instead of individual ones, yet nothing changes. Currently, it has been running for the past 5 hours, and only 50K samples are processed! What should I do to make it faster?
Can I completely avoid using transformer.preprocessing? if so how?
Here is the snippet:
#run on gpu
caffe.set_mode_gpu()
#Extract mean from the mean image file
mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
f = open(args.mean, 'rb')
mean_blobproto_new.ParseFromString(f.read())
mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
f.close()
predicted_lables = []
true_labels = []
misclassified =[]
class_names = ['unsafe','safe']
count = 0
correct = 0
batch=[]
plabe_ls = []
batch_size = 50
net1 = caffe.Net(args.proto, args.model, caffe.TEST)
transformer = caffe.io.Transformer({'data': net1.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_mean('data', mean_image[0].mean(1).mean(1))
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2,1,0))
net1.blobs['data'].reshape(batch_size, 3,224, 224)
data_blob_shape = net1.blobs['data'].data.shape
data_blob_shape = list(data_blob_shape)
i=0
mu = np.array([ 104, 117, 123])#imagenet mean
#check and see if its lmdb or leveldb
if(args.db_type.lower() == 'lmdb'):
lmdb_env = lmdb.open(args.db_path)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
count += 1
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.uint8)
if(count % 5000 == 0):
print('count: ',count)
if(i < batch_size):
i+=1
inf= key,image,label
batch.append(inf)
if(i >= batch_size):
#process n image
ims=[]
for x in range(len(batch)):
ims.append(transformer.preprocess('data',batch[x][1]))# - mean_image[0].mean(1).mean(1) )
net1.blobs['data'].data[...] = ims[:]
out_1 = net1.forward()
plbl = np.asarray( out_1['pred'])
plbl = plbl.argmax(axis=1)
for j in range(len(batch)):
if (plbl[j] == batch[j][2]):
correct+=1
else:
misclassified.append(batch[j][0])
predicted_lables.append(plbl[j])
true_labels.append(batch[j][2])
batch.clear()
i=0
Update:
By replacing
for x in range(len(batch)):
ims.append(transformer.preprocess('data',batch[x][1]))
net1.blobs['data'].data[...] = ims[:]
with
for x in range(len(batch)):
img = batch[x][1]
ims.append(img[:,0:224,0:224])
82K samples were processed in less than a minute. The culprit is indeed the preprocess method and I have no idea why it acts like this!
Anyway, I can't use mean file this way. I tried to do
ims.append(img[:,0:224,0:224] - mean.mean(1).mean(1))
as well but faced with this error:
ValueError: operands could not be broadcast together with shapes (3,224,224) (3,)
I also need to find a better way for cropping the image, I don't know if I need to resize it back to 224? or I should use crops just like caffe?

I finally made it! here is the code that runs much faster:
predicted_lables=[]
true_labels = []
misclassified =[]
class_names = ['unsafe','safe']
count =0
correct = 0
batch = []
plabe_ls = []
batch_size = 50
cropx = 224
cropy = 224
i = 0
# Extract mean from the mean image file
mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
f = open(args.mean, 'rb')
mean_blobproto_new.ParseFromString(f.read())
mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
f.close()
caffe.set_mode_gpu()
net1 = caffe.Net(args.proto, args.model, caffe.TEST)
net1.blobs['data'].reshape(batch_size, 3, 224, 224)
data_blob_shape = net1.blobs['data'].data.shape
#check and see if its lmdb or leveldb
if(args.db_type.lower() == 'lmdb'):
lmdb_env = lmdb.open(args.db_path)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
count += 1
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.float32)
#key,image,label
#buffer n image
if(count % 5000 == 0):
print('{0} samples processed so far'.format(count))
if(i < batch_size):
i += 1
inf= key,image,label
batch.append(inf)
#print(key)
if(i >= batch_size):
#process n image
ims=[]
for x in range(len(batch)):
img = batch[x][1]
#img has c,h,w shape! its already gone through transpose
#and channel swap when it was being saved into lmdb!
#method I: crop the both the image and mean file
#ims.append(img[:,0:224,0:224] - mean_image[0][:,0:224,0:224] )
#Method II : resize the image to the desired size(crop size)
#img = caffe.io.resize_image(img.transpose(2,1,0), (224, 224))
#Method III : use center crop just like caffe does in test time
#center crop
c,w,h = img.shape
startx = h//2 - cropx//2
starty = w//2 - cropy//2
img = img[:, startx:startx + cropx, starty:starty + cropy]
#transpose the image so we can subtract from mean
img = img.transpose(2,1,0)
img -= mean_image[0].mean(1).mean(1)
#transpose back to the original state
img = img.transpose(2,1,0)
ims.append(img)
net1.blobs['data'].data[...] = ims[:]
out_1 = net1.forward()
plabe_ls = out_1['pred']
plbl = np.asarray(plabe_ls)
plbl = plbl.argmax(axis=1)
for j in range(len(batch)):
if (plbl[j] == batch[j][2]):
correct += 1
else:
misclassified.append(batch[j][0])
predicted_lables.append(plbl[j])
true_labels.append(batch[j][2])
batch.clear()
i = 0
Though I'm not getting the exact accuracy, but pretty close to it (out of 98.65 I get 98.61%! I don't know what is causing this difference !)
Update
The reason transformer.preprocess took too long to complete was because of its resize_image() method. resize_image needs the image to be in the form of of H,W,C, whereas the images have already been transposed and channelswapped (to the form of c,w,h) in my case (I was reading off an lmdb dataset), and this caused the resize_image() to resort to its slowest method of resizing the image thus taking 0.6 second for each image to be processed.
Now knowing this, transposing the image into the correct dimensions, would solve this issue. Meaning I had to do:
ims.append(transformer.preprocess('data',img.transpose(2,1,0)))
Note that, still it is slower than the above method, but it's much much faster than before!

Related

Google Colab RAM issue with semi-supervised CNN model training

I'm trying to training a binary classifier by transfer learning on EfficientNet. Since I have lots of unlabeled data, I use semi-supervised method to generate multiple "pseudo labeled" data before the model go through each epoch.
Since Colab has its limits of RAM, I delete some large variables(like numpy arrays, dataset, dataloader...) in each loop, however the RAM still increase in every loop like the picture shown below.
Below is my Training loop which consists of 3 main structure: semi-supervised, training loop, validation loop.
I'm not sure which step cause the RAM to keep increase in each epoch.
(1) semi-supervised part
for epoch in range(n_epochs):
print(f"[ Epoch | {epoch + 1:03d}/{n_epochs:03d} ]")
if do_semi:
model.eval()
dataset_0 = []
dataset_1 = []
for img in pseudo_loader:
with torch.no_grad():
logits = model(img.to(device))
probs = softmax(logits)
# Filter the data and construct a new dataset.
for i in range(len(probs)):
p = probs[i].tolist()
idx = p.index(max(p))
if p[idx] >= threshold:
if idx == 0:
dataset_0.append(img[i].numpy().reshape(128, 128, 3))
else:
dataset_1.append(img[i].numpy().reshape(128, 128, 3))
# stratified sampling with labels
len_0, len_1 = len(dataset_0), len(dataset_1)
print('label 0: ', len_0)
print('label 1: ', len_1)
# since there may be RAM memory error, restrict to 1000
if len_0 > 1000:
dataset_0 = random.sample(dataset_0, 1000)
if len_1 > 1000:
dataset_1 = random.sample(dataset_1, 1000)
if len_0 == len_1:
pseudo_x = np.array(dataset_0 + dataset_1)
pseudo_y = ['0' for _ in range(len(dataset_0))] + ['1' for _ in range(len(dataset_1))]
elif len_0 > len_1:
dataset_0 = random.sample(dataset_0, len(dataset_1))
pseudo_x = np.array(dataset_0 + dataset_1)
pseudo_y = ['0' for _ in range(len(dataset_0))] + ['1' for _ in range(len(dataset_1))]
else:
dataset_1 = random.sample(dataset_1, len(dataset_0))
pseudo_x = np.array(dataset_0 + dataset_1)
pseudo_y = ['0' for _ in range(len(dataset_0))] + ['1' for _ in range(len(dataset_1))]
if len(pseudo_x) != 0:
new_dataset = CustomTensorDataset(pseudo_x, np.array(pseudo_y), 'pseudo')
else:
new_dataset = []
# print how many pseudo label data added
print('Total number of pseudo labeled data are added: ', len(new_dataset))
# release RAM
dataset_0 = None
dataset_1 = None
pseudo_x = None
pseudo_y = None
del dataset_0, dataset_1, pseudo_x, pseudo_y
gc.collect()
# Turn off the eval mode.
model.train()
concat_dataset = ConcatDataset([train_set, new_dataset])
train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True)
i'm quiet sure the problem happened in semi-supervised part, since RAM usage did not increase when no apply semi-supervised part.
Thanks for your helps!!

Predicted Image id and box from SSD

How to find predicted image id and Box from SSD, I am using this GitHub link here is the test function which I want to save the image id and box
def test(loader, net, criterion, device):
net.eval()
running_loss = 0.0
running_regression_loss = 0.0
running_classification_loss = 0.0
num = 0
for _, data in enumerate(loader):
images, boxes, labels = data
images = images.to(device)
boxes = boxes.to(device)
labels = labels.to(device)
num += 1
with torch.no_grad():
confidence, locations = net(images)
regression_loss, classification_loss = criterion(confidence, locations, labels, boxes)
loss = regression_loss + classification_loss
running_loss += loss.item()
running_regression_loss += regression_loss.item()
running_classification_loss += classification_loss.item()
return running_loss / num, running_regression_loss / num, run
Assume
y = net(x)
detections = y.data
you can print the detection info with the following
# this will loop over predictions class by class
for i in range(detections.size(1)):
# this will loop over each detection in the class
for j in range(detections.size(2)):
score = detection[0,i,j,0]
coords = detections[0,i,j,1:]
print(f"Class id: {i} \t Score: {score} \t Coords: {coords}")
See the demo for more details.

Is num_epochs limited in tensorflow's csv file reader string_input_producer()?

I have a dummy csv file (y=-x+1)
x,y
1,0
2,-1
3,-2
I try to feed that into a linear regression model. Since I have only so few examples, I want to iterate the training like 1000 times over that file, so I set num_epochs=1000.
However, it seems that Tensorflow limits this number. It works fine if I use num_epochs=5 or 10, but beyond 33 it is capped to 33 epochs. Is that true or am Im doing anything wrong?
# model = W*x+b
...
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
# reading input from csv
filename_queue = tf.train.string_input_producer(["/tmp/testinput.csv"], num_epochs=1000)
reader = tf.TextLineReader(skip_header_lines=1)
...
col_x, col_label = tf.decode_csv(csv_row, record_defaults=record_defaults)
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
while True:
try:
input_x, input_y = sess.run([col_x, col_label])
sess.run(train, feed_dict={x:input_x, y:input_y})
...
Side question, do I need to do:
input_x, input_y = sess.run([col_x, col_label])
sess.run(train, feed_dict={x:input_x, y:input_y})
I have tried sess.run(train, feed_dict={x:col_x, y:col_y}) directly to avoid the friction but it doesn't work (they are nodes, and feed_dict expects regular data)
The following snippets works perfectly (with your input):
import tensorflow as tf
filename_queue = tf.train.string_input_producer(["/tmp/input.csv"], num_epochs=1000)
reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = reader.read(filename_queue)
col_x, col_label = tf.decode_csv(csv_row, record_defaults=[[0], [0]])
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
num = 0
try:
while True:
sess.run([col_x, col_label])
num += 1
except:
print(num)
Which gives the following output:
edb#lapelidb:/tmp$ python csv.py
3000

Getting different accuracies using different caffe classes(98.65 vs 98.1 vs 98.20)

When I train and then test my model using Caffe's command line interface, I get e.g. 98.65% whereas when I myself write code(given below) to calculate accuracy from the same pre-trained model, I get e.g 98.1% using Caffe.Net.
Everything is straight forward and I have no idea what is causing the issue.
I also tried using Caffe.Classifier and its predict method, and yet get another lesser accuracy(i.e. 98.20%!)
Here is the snippet of code I wrote:
import sys
import caffe
import numpy as np
import lmdb
import argparse
from collections import defaultdict
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import roc_curve, auc
import random
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--proto', help='path to the network prototxt file(deploy)', type=str, required=True)
parser.add_argument('--model', help='path to your caffemodel file', type=str, required=True)
parser.add_argument('--mean', help='path to the mean file(.binaryproto)', type=str, required=True)
#group = parser.add_mutually_exclusive_group(required=True)
parser.add_argument('--db_type', help='lmdb or leveldb', type=str, required=True)
parser.add_argument('--db_path', help='path to your lmdb/leveldb dataset', type=str, required=True)
args = parser.parse_args()
predicted_lables=[]
true_labels = []
misclassified =[]
class_names = ['unsafe','safe']
count=0
correct = 0
batch=[]
plabe_ls=[]
batch_size = 50
cropx = 224
cropy = 224
i = 0
multi_crop = False
use_caffe_classifier = True
caffe.set_mode_gpu()
# Extract mean from the mean image file
mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
f = open(args.mean, 'rb')
mean_blobproto_new.ParseFromString(f.read())
mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
f.close()
net = caffe.Classifier(args.proto, args.model,
mean = mean_image[0].mean(1).mean(1),
image_dims = (224, 224))
net1 = caffe.Net(args.proto, args.model, caffe.TEST)
net1.blobs['data'].reshape(batch_size, 3,224, 224)
data_blob_shape = net1.blobs['data'].data.shape
#check and see if its lmdb or leveldb
if(args.db_type.lower() == 'lmdb'):
lmdb_env = lmdb.open(args.db_path)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
count += 1
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum).astype(np.float32)
#key,image,label
#buffer n image
if(count % 5000 == 0):
print('{0} samples processed so far'.format(count))
if(i < batch_size):
i+=1
inf= key,image,label
batch.append(inf)
#print(key)
if(i >= batch_size):
#process n image
ims=[]
for x in range(len(batch)):
img = batch[x][1]
#img has c,w,h shape! its already gone through transpose and channel swap when it was being saved into lmdb!
#Method III : use center crop just like caffe does in test time
if (use_caffe_classifier != True):
#center crop
c,w,h = img.shape
startx = h//2 - cropx//2
starty = w//2 - cropy//2
img = img[:, startx:startx + cropx, starty:starty + cropy]
#transpose the image so we can subtract from mean
img = img.transpose(2,1,0)
img -= mean_image[0].mean(1).mean(1)
#transpose back to the original state
img = img.transpose(2,1,0)
ims.append(img)
else:
ims.append(img.transpose(2,1,0))
if (use_caffe_classifier != True):
net1.blobs['data'].data[...] = ims[:]
out_1 = net1.forward()
plabe_ls = out_1['pred']
else:
out_1 = net.predict(np.asarray(ims), oversample=multi_crop)
plabe_ls = out_1
plbl = np.asarray(plabe_ls)
plbl = plbl.argmax(axis=1)
for j in range(len(batch)):
if (plbl[j] == batch[j][2]):
correct+=1
else:
misclassified.append(batch[j][0])
predicted_lables.append(plbl[j])
true_labels.append(batch[j][2])
batch.clear()
i = 0
sys.stdout.write("\rAccuracy: %.2f%%" % (100.*correct/count))
sys.stdout.flush()
print(", %i/%i corrects" % (correct, count))
What is causing this difference in accuracies ?
More information :
I am using Python3.5 on windows.
I read images from an lmdb dataset.
The images have 256x256 and center cropped with the size 224x224.
It is finetuned on GoogleNet.
For the Caffe.predict to work well I had to change classify.py
In training, I just use Caffes defaults, such as random crops at training and center crop at test-time.
Changes:
changed line 35 to:
self.transformer.set_transpose(in_, (2, 1, 0))
and line 99 to :
predictions = predictions.reshape((len(predictions) // 10, 10, -1))
1) First off, you need to revert Line 35 (32?) of classify.py: self.transformer.set_transpose(in_, (2, 1, 0)) back to the original
self.transformer.set_transpose(in_, (2, 0, 1)). So it expects HWC and transforms internally to CHW for downstream processing.
2) Run your Classifier branch as it is. You're likely to get a bad result. Please check this. If so, it means the image database is not CWH as you've commented, but actually CHW. After you've confirmed this, make the change to your Classifier branch: ims.append(img.transpose(2,1,0)) to become ims.append(img.transpose(1,2,0)). Re-test your Classifier branch. The result should be 98.2% (goto Step 3) or 98.65% (try Step 4).
3) If your result in Step 3 is 98.2%, also undo your the second change to classify.py. Theoretically, as your images have even height/width so // and / should have no difference. If it does differ or crashes, something is seriously wrong with your image database -- your assumption of the image size is incorrect. You need to check these. They could be off by a pixel or so, and could explain the slight discrepancies in accuracy.
4) If your result in Step 3 is 98.65%, then you need to make changes to the Caffe.Net branch of your code. The database images are CHW, so you need to make the first transpose: img = img.transpose(1,2,0) and the second transpose after mean subtraction to img = img.transpose(2,0,1). Then run your Caffe.Net branch. If you still get 98.1% as before, you should check that mean subtraction is performed correctly by your network.
In Steps (2) and (4), it's possible to get worse results, which means that the problem is likely a difference in mean subtraction for your trained Net vs your expectations in Python code. Check this.
About your 98.2% for the caffe.Classifier:
If you look at lines 78 - 80, the center crop is done along crop_dims , not img_dims. If you further look at line 42 on the caffe.Classifier constructor, the crop_dims are never user-determined. It's determined by the size of the Net's input blobs. Lastly, it you look at line 70, the img_dims are used to resize the images prior to center cropping. So what's happening with your setup is: a) The images are first getting resized to 224 x 224, then uselessly getting center cropped to 224 x 224 ( I assume this is the HxW for your Net ). You obviously will get results poorer than 98.65%. What you need to do is to change the img_dims = (256, 256). That prevents resizing. The crop will be picked up automatically from your Net and you should get your 98.65%.

Constant Runtime Error -Python - Tkinter

My problem here is that I get a Runtime Error every time I run my code. Also, I am unable to bind my "Repeat" button with the "RUN" function. I do not understand why the button will not work and I am also unsure why the "self.canvas.delete()" does not work and why the new frame does not open when the puzzle has been solved (when all of the disks have been moved to a new pole) and why it opens straight away.
from Tkinter import *
import sys
import time
def Hanoi(n, src, dst, tmp, move):
if n == 0:
return n
else:
Hanoi(n-1, src, tmp, dst, move)
move(src, dst)
Hanoi(n-1, tmp, dst, src, move)
class hanoi():
master = Tk()
width = 2000
height = 400
time = 0.1
big_sep = 30
small_sep = 5
def __init__(self):
self.User_Input = input("Please enter the amount of disks")
self.canvas = Canvas(hanoi.master, height = hanoi.height, width = hanoi.width)
self.canvas.pack()
self.poles = [[], [], []]
self.Add_Objects()
def Add_Objects(self):
floor_size = 1000-2*hanoi.small_sep
self.disk_height = (400 - 4*hanoi.big_sep) / self.User_Input
self.disk_width = (floor_size - 4*hanoi.small_sep) / 3
disk_h = self.disk_height
disk_w = self.disk_width
global disk_h
global disk_w
width = hanoi.width
height = hanoi.height
small_sep = hanoi.small_sep
big_sep = hanoi.big_sep
#create poles
bar_pos = [ 4*small_sep + disk_w/2 + (disk_w+small_sep)*i for i in range(0,3) ]
for x in bar_pos:
print x
self.canvas.create_rectangle(x-small_sep, big_sep, x+small_sep, 400-big_sep-1, fill='black')
#create bases
for x in bar_pos:
print i
self.canvas.create_rectangle(x-(13*small_sep), 370, x+(13*small_sep), 410, fill = "brown")
#create disks
for i in range(self.User_Input):
print 2*small_sep + 4*small_sep*i, height-big_sep-1-disk_h*i,2*small_sep + disk_w - 4*small_sep*i, height-big_sep-1-disk_h*(i+1)
disk = self.canvas.create_rectangle(2*small_sep + 4*small_sep*i, 400-big_sep-1-disk_h*i,2*small_sep + disk_w - 4*small_sep*i, 400-big_sep-1-disk_h*(i+1),fill='blue')
self.poles[0].append(disk)
self.canvas.update()
def Move_Object(self, origin, dest):
print len(self.poles[dest]), self.User_Input-1
current_disk = self.poles[origin].pop()
self.poles[dest].append(current_disk)
x_distance = (dest - origin) * (disk_w)
y_distance = ((len(self.poles[origin]) - len(self.poles[dest])+1) * disk_h)
self.canvas.move(current_disk, x_distance, y_distance)
self.canvas.update()
time.sleep(hanoi.time)
if len(self.poles[dest]) == self.User_Input:
self.canvas.delete()
new_frame = Frame(hanoi.master, width = 300, height = 300)
new_frame.pack()
print "Yayar"
newb = Button(new_frame, text = "Replay", )
newb.place(relx = 0.5, rely = 0.5)
newb.bind("<ButtonPress-1>", self.RUN)
def RUN(self):
Hanoi(self.User_Input, 0, 2, 1, self.Move_Object)
if __name__ == "__main__":
print "hello"
hanoi().RUN()