read leveldb(.ldb)features extracted using caffe - caffe

I have used the following command for feature extraction:
./build/tools/extract_features.bin models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel examples/_temp/imagenet_val.prototxt fc7 examples/_temp/features 10 leveldb GPU 0
the I used the following code to read leveldb features:
https://15519285443829437810.googlegroups.com/attach/b71d5c99c447fc2a/main.cpp?part=0.1&view=1&vt=ANaJVrHM26ydsY5Z2ognvhBaFtDzXnE_SiVf19DLkrNpf9Q34t5O4bJWy0nHH_HBnBAVx5wZusXd1joX93JBK0_r7XKEIc-5odz9_HPHV1RUo8MD3zNHgoY
everything is OK and I have one .ldb file now.
How can I read this .ldb file? Can I change it to .text?

I have used the following code :
import caffe
import leveldb
import numpy as np
from caffe.proto import caffe_pb2
db = leveldb.LevelDB('/home/deep/rahim/caffe-master/examples/_temp/features')
datum = caffe_pb2.Datum()
for key, value in db.RangeIter():
datum.ParseFromString(value)
label = datum.label
data = caffe.io.datum_to_array(datum)
image = np.transpose(data, (1,2,0))
np.save('feature.txt',image)
Then pass the feature.txt.npy to the following code which convert .npy to .txt:
import struct
import numpy as np
import os
def parseNPY(path, fileJustName):
# load from the file
inputFile = os.path.join(path, fileJustName + ".npy")
matrices = np.load(inputFile)
outputfile = os.path.join(path, fileJustName)
for m in range(matrices.shape[0]):
# file name for this matrix
outFileFull = outputfile + "-" + str(m) + ".txt"
# output matrix to a numbered file
np.savetxt(outFileFull, matrices[m], fmt="%i", delimiter="\t")
mypath = "/home/deep/rahim/caffe-master/python/"
for path, paths, filenames in os.walk(mypath):
# translate all filenames.
for filename in filenames:
fileJustName, fileExtension = os.path.splitext(filename)
if fileExtension == ".npy":
print(os.path.join(path, fileJustName))
parseNPY(path, fileJustName)

Related

How can i convert json file from labelme interface to png or image format file?

When i used the labeling the images from labelme interface as output i get json file.but i need to in image format like png,bmp,jpeg after labeling. can anyone suggest me any code ?
import json
from PIL import Image
with open('your,json') as f:
data = json.load(f)
# Load the file path from the json
imgpath = data['yourkey']
# Place the image path into the open method
img = Image.open(imgpath)
Based on the tutorial of the original repository, you can use labelme_json_to_dataset <<JSON_PATH>> -o <<OUTPUT_FOLDER_PATH>>.
To run it on python / jupyter, you can use:
import os
def labelme_json_to_dataset(json_path):
os.system("labelme_json_to_dataset "+json_path+" -o "+json_path.replace(".","_"))
If you need to do it for multiple images, just loop the function.
Based on the issue, labelme_json_to_dataset behavior can be reimplemented by using either labelme2voc.py or labelme2coco.py.
You also could use other implementation like labelme2Datasets
You also can implement your own modification of labelme_json_to_dataset using labelme library. Basically, you use label_file = labelme.LabelFile(filename=filename) followed by img = labelme.utils.img_data_to_arr(label_file.imageData). An example of a process would be like this:
import labelme
import os
import glob
def labelme2images(input_dir, output_dir, force=False, save_img=False, new_size=False):
"""
new_size_width, new_size_height = new_size
"""
if save_img:
_makedirs(path=osp.join(output_dir, "images"), force=force)
if new_size:
new_size_width, new_size_height = new_size
print("Generating dataset")
filenames = glob.glob(osp.join(input_dir, "*.json"))
for filename in filenames:
# base name
base = osp.splitext(osp.basename(filename))[0]
label_file = labelme.LabelFile(filename=filename)
img = labelme.utils.img_data_to_arr(label_file.imageData)
h, w = img.shape[0], img.shape[1]
if save_img:
if new_size:
img_pil = Image.fromarray(img).resize((new_size_height, new_size_width))
else:
img_pil = Image.fromarray(img)
img_pil.save(osp.join(output_dir, "images", base + ".jpg"))

How to merge multiple JSON files reading from S3, convert to single .csv and store in S3?

Input :
There are 5 part JSON files named as test_par1.json, test_part2.json, test_part3.json, test_part4.json, test_part5.json in s3://test/json_files/data/.
Expected Output :
Single csv file
Explanation : All of the json files are having same number of columns with same structure. They are basically part files of same source.
I want to merge/re partition all of them and convert them into a csv file and store it in S3.
import pandas as pd
import os
import boto3
import numpy
# Boto3 clients
resource = boto3.resource('s3')
client = boto3.client('s3')
session = boto3.session.Session()
bucket = 'test'
path = 'json_files/data/'
delimiter = '/'
suffix = '.json'
json_files = client.list_objects(Bucket=bucket, Prefix=path, Delimiter=delimiter)
#print(inter_files)
for obj in inter_files['Contents']:
#print(obj)
obj = client.get_object(Bucket=bucket, Key=obj['Key'])
#print(obj)
df = pd.read_json(obj["Body"], lines=True)
print(df)

How do i convert JSON VGG file annotations into YOLOv3 annotation format?

I am currently working on a deep learning model for plate detection using YOLOv3 object detector, i used VGG Image Annotator on 1470 images and exported them on both JSON and CSV format :
VGG annotation in JSON format
VGG annotation in CSV format
As you can see i used polygons and rectangles because some of the plates had awkward shapes, i tried converting them into YOLOv3 format annotations but i am having trouble doing so.
Any help will be much appreciated.
The Yolo v3 I knew of has annotations in the format of
[classID, x_center, y_center, w, h], except classID is an integer, all the rest four numbers are real between 0 and 1 normalized by image_height (H) and image_width (W), respectively. So to get a [x_min, y_min, x_max, y_max], one need to
Correct the offset
[x_0, y_0, x_1, y_1] = [x_center - w/2, y_center - h/2, x_center + w/2, y_center + h/2]
Apply the size
[x_min, y_min, x_max, y_max] = [x_0, y_0, x_1, y_1] \dot_product [W H W H]
The following Python script allows you to convert JSON VGG file annotations into YOLOv3 annotation format.
from PIL import Image
from os import path, makedirs
import os
import re
import pandas as pd
import sys
import argparse
def get_parent_dir(n=1):
"""returns the n-th parent dicrectory of the current
working directory"""
current_path = os.path.dirname(os.path.abspath(__file__))
for k in range(n):
current_path = os.path.dirname(current_path)
return current_path
sys.path.append(os.path.join(get_parent_dir(1), "Utils"))
from Convert_Format import convert_vott_csv_to_yolo
Data_Folder = os.path.join(get_parent_dir(1), "Data")
VoTT_Folder = os.path.join(
Data_Folder, "Source_Images", "Training_Images", "vott-csv-export"
)
VoTT_csv = os.path.join(VoTT_Folder, "Annotations-export.csv")
YOLO_filename = os.path.join(VoTT_Folder, "data_train.txt")
model_folder = os.path.join(Data_Folder, "Model_Weights")
classes_filename = os.path.join(model_folder, "data_classes.txt")
if __name__ == "__main__":
# surpress any inhereted default values
parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS)
"""
Command line options
"""
parser.add_argument(
"--VoTT_Folder",
type=str,
default=VoTT_Folder,
help="Absolute path to the exported files from the image tagging step with VoTT. Default is "
+ VoTT_Folder,
)
parser.add_argument(
"--VoTT_csv",
type=str,
default=VoTT_csv,
help="Absolute path to the *.csv file exported from VoTT. Default is "
+ VoTT_csv,
)
parser.add_argument(
"--YOLO_filename",
type=str,
default=YOLO_filename,
help="Absolute path to the file where the annotations in YOLO format should be saved. Default is "
+ YOLO_filename,
)
FLAGS = parser.parse_args()
# Prepare the dataset for YOLO
multi_df = pd.read_csv(FLAGS.VoTT_csv)
labels = multi_df["label"].unique()
labeldict = dict(zip(labels, range(len(labels))))
multi_df.drop_duplicates(subset=None, keep="first", inplace=True)
train_path = FLAGS.VoTT_Folder
convert_vott_csv_to_yolo(
multi_df, labeldict, path=train_path, target_name=FLAGS.YOLO_filename
)
# Make classes file
file = open(classes_filename, "w")
# Sort Dict by Values
SortedLabelDict = sorted(labeldict.items(), key=lambda x: x[1])
for elem in SortedLabelDict:
file.write(elem[0] + "\n")
file.close()

Python: How to save *.dat-files as *.csv-files to new folder

I have a folder with lots of *.dat files (which were created with the program IDL). I am able to take one single file, convert it to a *.csv file and save it in a different (already existing) folder:
import idlsave
import csv
input_file = idlsave.read("C:/Users/RAW/06211714.dat")
n = input_file["raw"]
with open("C:/Users/CSV/06211714.csv", "w", newline='') as f:
writer = csv.writer(f)
writer.writerows(n)
The line input_file = idlsave.read("C:/Users/RAW/06211714.dat") shows the following output:
Available variables: raw class ['numpy.recarray']
So, this works fine for just taking one file, but I am looking for a way to take all *.dat files at once and convert each of them to a *.csv file with their original name.
I was thinking of something like this, but it didn't work:
import glob
for filename in glob.glob("C:/Users/RAW/*.dat"):
for element in filename:
i = idlsave.read(element)
n = i["raw"]
with open("C:/Users/CSV/*.csv", "w", newline='') as f:
writer = csv.writer(f)
writer.writerows(n)
Can someone please give me some advice?
Thanks.
import csv
import idlsave
from os import listdir
from os.path import isfile, join, splitext
dat_folder = "/folder/to/dat/files/"
csv_folder = "/folder/to/save/new/csv/files/"
onlyfilenames = [f for f in listdir(dat_folder) if isfile(join(dat_folder,f))]
for fullfilename in onlyfilenames:
file_name, file_extension = splitext(fullfilename)
if file_extension == ".dat":
input_file = idlsave.read(dat_folder + fullfilename)
n = input_file["raw"]
with open(join(csv_folder, file_name + ".csv"), "w", newline='') as f:
writer = csv.writer(f)
writer.writerows(n)

Open JSON files in different directory - Python3, Windows, pathlib

I am trying to open JSON files located in a directory other than the current working directory (cwd). My setting: Python3.5 on Windows (using Anaconda).
from pathlib import *
import json
path = Path("C:/foo/bar")
filelist = []
for f in path.iterdir():
filelist.append(f)
for file in filelist:
with open(file.name) as data_file:
data = json.load(data_file)
In this setting I have these values:
file >> C:\foo\bar\0001.json
file.name >> 0001.json
However, I get the following error message:
---> 13 with open(file.name) as data_file:
14 data = json.load(data_file)
FileNotFoundError: [Errno 2] No such file or directory: '0001.json'
Here is what I tried so far:
Use .joinpath() to add the directory to the file name in the open command:
with open(path.joinpath(file.name)) as data_file:
data = json.load(data_file)
TypeError: invalid file: WindowsPath:('C:/foo/bar/0001.json')
Used .resolve() as that works for me to load CSV files into Pandas. Did not work here.
for file in filelist:
j = Path(path, file.name).resolve()
with open(j) as data_file:
data = json.load(data_file)
Since I'm on Windows write path as (and yes, the file is in that directory):
path = Path("C:\\foo\\bar") #resulted in the same FileNotFoundError above.
Instantiate path like this:
path = WindowsPath("C:/foo/bar")
#Same TypeError as above for both '\\' and '/'
The accepted answer has a lot of redundants - re-collected generator and mixed with statement with pathlib.Path.
pathlib.Path is awesome solution to handle paths especially if we want to create scripts which may work with Linux and Windows.
# modules
from pathlib import Path
import json
# static values
JSON_SUFFIXES = [".json", ".js", ".other_suffix"]
folder_path = Path("C:/users/user/documents")
for file_path in folder_path.iterdir():
if file_path.suffix in JSON_SUFFIXES:
data = json.loads(file_path.read_bytes())
Just adding modification for new users. pathlib.Path works with Python3.
Complete solution; thanks #eryksun:
from pathlib import *
import json
path = Path("C:/foo/bar")
filelist = []
for f in path.iterdir():
filelist.append(f)
for file in filelist:
with open(str(file) as data_file:
data = json.load(data_file)
This line works as well:
with file.open() as data_file: