When i used the labeling the images from labelme interface as output i get json file.but i need to in image format like png,bmp,jpeg after labeling. can anyone suggest me any code ?
import json
from PIL import Image
with open('your,json') as f:
data = json.load(f)
# Load the file path from the json
imgpath = data['yourkey']
# Place the image path into the open method
img = Image.open(imgpath)
Based on the tutorial of the original repository, you can use labelme_json_to_dataset <<JSON_PATH>> -o <<OUTPUT_FOLDER_PATH>>.
To run it on python / jupyter, you can use:
import os
def labelme_json_to_dataset(json_path):
os.system("labelme_json_to_dataset "+json_path+" -o "+json_path.replace(".","_"))
If you need to do it for multiple images, just loop the function.
Based on the issue, labelme_json_to_dataset behavior can be reimplemented by using either labelme2voc.py or labelme2coco.py.
You also could use other implementation like labelme2Datasets
You also can implement your own modification of labelme_json_to_dataset using labelme library. Basically, you use label_file = labelme.LabelFile(filename=filename) followed by img = labelme.utils.img_data_to_arr(label_file.imageData). An example of a process would be like this:
import labelme
import os
import glob
def labelme2images(input_dir, output_dir, force=False, save_img=False, new_size=False):
"""
new_size_width, new_size_height = new_size
"""
if save_img:
_makedirs(path=osp.join(output_dir, "images"), force=force)
if new_size:
new_size_width, new_size_height = new_size
print("Generating dataset")
filenames = glob.glob(osp.join(input_dir, "*.json"))
for filename in filenames:
# base name
base = osp.splitext(osp.basename(filename))[0]
label_file = labelme.LabelFile(filename=filename)
img = labelme.utils.img_data_to_arr(label_file.imageData)
h, w = img.shape[0], img.shape[1]
if save_img:
if new_size:
img_pil = Image.fromarray(img).resize((new_size_height, new_size_width))
else:
img_pil = Image.fromarray(img)
img_pil.save(osp.join(output_dir, "images", base + ".jpg"))
This line:
else:
#add to this
nutrients_totals_df = pd.read_json(total_nutrients_json, orient='split')
is throwing the error.
I write my json like:
nutrients_json = nutrients_df.to_json(date_format='iso', orient='split')
Then I stash it in a hidden div or dcc.Storage in one callback and get it in another callback. How do I fix this error?
When I read json files that i've written with Pandas, I use the function below and call inside of json.loads().
def read_json_file_from_local(fullpath):
"""read json file from local"""
with open(fullpath, 'rb') as f:
data = f.read().decode('utf-8')
return data
df = json.loads(read_json_file_from_local(fullpath))
I'm trying to get a unit test working that validates a function that reads credentials from a JSON-encoded file. Since the credentials themselves aren't fixed, the unit test needs to provide some and then test that they are correctly retrieved.
Here is the credentials function:
def read_credentials():
basedir = os.path.dirname(__file__)
with open(os.path.join(basedir, "authentication.json")) as f:
data = json.load(f)
return data["bot_name"], data["bot_password"]
and here is the test:
def test_credentials(self):
with patch("builtins.open", mock_open(
read_data='{"bot_name": "name", "bot_password": "password"}\n'
)):
name, password = shared.read_credentials()
self.assertEqual(name, "name")
self.assertEqual(password, "password")
However, when I run the test, the json code blows up with a decode error. Looking at the json code itself, I'm struggling to see why the mock test is failing because json.load(f) simply calls f.read() then calls json.loads().
Indeed, if I change my authentication function to the following, the unit test works:
def read_credentials():
# Read the authentication file from the current directory and create a
# HTTPBasicAuth object that can then be used for future calls.
basedir = os.path.dirname(__file__)
with open(os.path.join(basedir, "authentication.json")) as f:
content = f.read()
data = json.loads(content)
return data["bot_name"], data["bot_password"]
I don't necessarily mind leaving my code in this form, but I'd like to understand if I've got something wrong in my test that would allow me to keep my function in its original form.
Stack trace:
Traceback (most recent call last):
File "test_shared.py", line 56, in test_credentials
shared.read_credentials()
File "shared.py", line 60, in read_credentials
data = json.loads(content)
File "/home/philip/.local/share/virtualenvs/atlassian-webhook-basic-3gOncDp4/lib/python3.6/site-packages/flask/json/__init__.py", line 205, in loads
return _json.loads(s, **kwargs)
File "/usr/lib/python3.6/json/__init__.py", line 367, in loads
return cls(**kw).decode(s)
File "/usr/lib/python3.6/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python3.6/json/decoder.py", line 357, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
I had the same issue and got around it by mocking json.load and builtins.open:
import json
from unittest.mock import patch, MagicMock
# I don't care about the actual open
p1 = patch( "builtins.open", MagicMock() )
m = MagicMock( side_effect = [ { "foo": "bar" } ] )
p2 = patch( "json.load", m )
with p1 as p_open:
with p2 as p_json_load:
f = open( "filename" )
print( json.load( f ) )
Result:
{'foo': 'bar'}
I had the exact same issue and solved it. Full code below, first the function to test, then the test itself.
The original function I want to test loads a json file that is structured like a dictionary, and checks to see if there's a specific key-value pair in it:
def check_if_file_has_real_data(filepath):
with open(filepath, "r") as f:
data = json.load(f)
if "fake" in data["the_data"]:
return False
else:
return True
But I want to test this without loading any actual file, exactly as you describe. Here's how I solved it:
from my_module import check_if_file_has_real_data
import mock
#mock.patch("my_module.json.load")
#mock.patch("my_module.open")
def test_check_if_file_has_real_data(mock_open, mock_json_load):
mock_json_load.return_value = dict({"the_data": "This is fake data"})
assert check_if_file_has_real_data("filepath") == False
mock_json_load.return_value = dict({"the_data": "This is real data"})
assert check_if_file_has_real_data("filepath") == True
The mock_open object isn't called explicitly in the test function, but if you don't include that decorator and argument you get a filepath error when the with open part of the check_if_file_has_real_data function tries to run using the actual open function rather than the MagicMock object that's been passed into it.
Then you overwrite the response provided by the json.load mock with whatever you want to test.
I am writing record to Kinesis Firehose stream that is eventually written to a S3 file by Amazon Kinesis Firehose.
My record object looks like
ItemPurchase {
String personId,
String itemId
}
The data is written to S3 looks like:
{"personId":"p-111","itemId":"i-111"}{"personId":"p-222","itemId":"i-222"}{"personId":"p-333","itemId":"i-333"}
NO COMMA SEPERATION.
NO STARTING BRACKET as in a Json Array
[
NO ENDING BRACKET as in a Json Array
]
I want to read this data get a list of ItemPurchase objects.
List<ItemPurchase> purchases = getPurchasesFromS3(IOUtils.toString(s3ObjectContent))
What is the correct way to read this data?
It boggles my mind that Amazon Firehose dumps JSON messages to S3 in this manner, and doesn't allow you to set a delimiter or anything.
Ultimately, the trick I found to deal with the problem was to process the text file using the JSON raw_decode method
This will allow you to read a bunch of concatenated JSON records without any delimiters between them.
Python code:
import json
decoder = json.JSONDecoder()
with open('giant_kinesis_s3_text_file_with_concatenated_json_blobs.txt', 'r') as content_file:
content = content_file.read()
content_length = len(content)
decode_index = 0
while decode_index < content_length:
try:
obj, decode_index = decoder.raw_decode(content, decode_index)
print("File index:", decode_index)
print(obj)
except JSONDecodeError as e:
print("JSONDecodeError:", e)
# Scan forward and keep trying to decode
decode_index += 1
I also had the same problem, here is how I solved.
replace "}{" with "}\n{"
line split by "\n".
input_json_rdd.map(lambda x : re.sub("}{", "}\n{", x, flags=re.UNICODE))
.flatMap(lambda line: line.split("\n"))
A nested json object has several "}"s, so split line by "}" doesn't solve the problem.
I've had the same issue.
It would have been better if AWS allowed us to set a delimiter but we can do it on our own.
In my use case, I've been listening on a stream of tweets, and once receiving a new tweet I immediately put it to Firehose.
This, of course, resulted in a 1-line file which could not be parsed.
So, to solve this, I have concatenated the tweet's JSON with a \n.
This, in turn, let me use some packages that can output lines when reading stream contents, and parse the file easily.
Hope this helps you.
I think the best ways to tackle this is to first create a properly formatted json file containing well separated json objects within them. In my case I added ',' to the events which was pushed into the firehose. Then After a file is saved in s3, all the files will contain json object separated by some delimitter(comma- in our case). Another thing that must be added are '[' and ']' at the beginning and end of the file. Then you have a proper json file containing multiple json objects. Parsing them will be possible now.
If the input source for the firehose is an Analytics application, this concatenated JSON without a delimiter is a known issue as cited here. You should have a lambda function as here that outputs JSON objects in multiple lines.
I used a transformation Lambda to add a line break at the end of every record
def lambda_handler(event, context):
output = []
for record in event['records']:
# Decode from base64 (Firehose records are base64 encoded)
payload = base64.b64decode(record['data'])
# Read json as utf-8
json_string = payload.decode("utf-8")
# Add a line break
output_json_with_line_break = json_string + "\n"
# Encode the data
encoded_bytes = base64.b64encode(bytearray(output_json_with_line_break, 'utf-8'))
encoded_string = str(encoded_bytes, 'utf-8')
# Create a deep copy of the record and append to output with transformed data
output_record = copy.deepcopy(record)
output_record['data'] = encoded_string
output_record['result'] = 'Ok'
output.append(output_record)
print('Successfully processed {} records.'.format(len(event['records'])))
return {'records': output}
Use this simple Python code.
input_str = '''{"personId":"p-111","itemId":"i-111"}{"personId":"p-222","itemId":"i-222"}{"personId":"p-333","itemId":"i-333"}'''
data_str = "[{}]".format(input_str.replace("}{","},{"))
data_json = json.loads(data_str)
And then (if you want) convert to Pandas.
import pandas as pd
df = pd.DataFrame().from_records(data_json)
print(df)
And this is result
itemId personId
0 i-111 p-111
1 i-222 p-222
2 i-333 p-333
If there's a way to change the way data is written, please separate all the records by a line. That way you can read the data simply, line by line. If not, then simply build a scanner object which takes "}" as a delimiter and use the scanner to read. That would do the job.
You can find the each valid JSON by counting the brackets. Assuming the file starts with a { this python snippet should work:
import json
def read_block(stream):
open_brackets = 0
block = ''
while True:
c = stream.read(1)
if not c:
break
if c == '{':
open_brackets += 1
elif c == '}':
open_brackets -= 1
block += c
if open_brackets == 0:
yield block
block = ''
if __name__ == "__main__":
c = 0
with open('firehose_json_blob', 'r') as f:
for block in read_block(f):
record = json.loads(block)
print(record)
This problem can be solved with a JSON parser that consumes objects one at a time from a stream. The raw_decode method of the JSONDecoder exposes just such a parser, but I've written a library that makes it straightforward to do this with a one-liner.
from firehose_sipper import sip
for entry in sip(bucket=..., key=...):
do_something_with(entry)
I've added some more details in this blog post
In Spark, we had the same problem. We're using the following:
from pyspark.sql.functions import *
#udf
def concatenated_json_to_array(text):
final = "["
separator = ""
for part in text.split("}{"):
final += separator + part
separator = "}{" if re.search(r':\s*"([^"]|(\\"))*$', final) else "},{"
return final + "]"
def read_concatenated_json(path, schema):
return (spark.read
.option("lineSep", None)
.text(path)
.withColumn("value", concatenated_json_to_array("value"))
.withColumn("value", from_json("value", schema))
.withColumn("value", explode("value"))
.select("value.*"))
It works as follows:
Read the data as one string per file (no delimiters!)
Use a UDF to introduce the JSON array and split the JSON objects by introducing a comma. Note: be careful not to break any strings with }{ in them!
Parse the JSON with a schema into DataFrame fields.
Explode the array into separate rows
Expand the value object into column.
Use it like this:
from pyspark.sql.types import *
schema = ArrayType(
StructType([
StructField("type", StringType(), True),
StructField("value", StructType([
StructField("id", IntegerType(), True),
StructField("joke", StringType(), True),
StructField("categories", ArrayType(StringType()), True)
]), True)
])
)
path = '/mnt/my_bucket_name/messages/*/*/*/*/'
df = read_concatenated_json(path, schema)
I've written more details and considerations here: Parsing JSON data from S3 (Kinesis) with Spark. Do not just split by }{, as it can mess up your string data! For example: { "line": "a\"r}{t" }.
You can use below script.
If streamed data size is not over buffer size that you set, each file of s3 have one pair of brackets([]) and comma.
import base64
print('Loading function')
def lambda_handler(event, context):
output = []
for record in event['records']:
print(record['recordId'])
payload = base64.b64decode(record['data']).decode('utf-8')+',\n'
# Do custom processing on the payload here
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': base64.b64encode(payload.encode('utf-8'))
}
output.append(output_record)
last = len(event['records'])-1
print('Successfully processed {} records.'.format(len(event['records'])))
start = '['+base64.b64decode(output[0]['data']).decode('utf-8')
end = base64.b64decode(output[last]['data']).decode('utf-8')+']'
output[0]['data'] = base64.b64encode(start.encode('utf-8'))
output[last]['data'] = base64.b64encode(end.encode('utf-8'))
return {'records': output}
Using JavaScript Regex.
JSON.parse(`[${item.replace(/}\s*{/g, '},{')}]`);
I've found some data that someone is downloading into a JSON file (I think! - I'm a newb!). The file contains data on nearly 600 football players.
Here you can find the file
In the past, I have downloaded the json file and then used this code:
import csv
import json
json_data = open("file.json")
data = json.load(json_data)
f = csv.writer(open("fix_hists.csv","wb+"))
arr = []
for i in data:
fh = data[i]["fixture_history"]
array = fh["all"]
for j in array:
try:
j.insert(0,str(data[i]["first_name"]))
except:
j.insert(0,'error')
try:
j.insert(1,data[i]["web_name"])
except:
j.insert(1,'error')
try:
f.writerow(j)
except:
f.writerow(['error','error'])
json_data.close()
Sadly, when I do this now in command prompt, i get the following error:
Traceback (most recent call last):
File"fix_hist.py", line 12 (module)
fh = data[i]["fixture_history"]
TypeError: list indices must be integers, not str
Can this be fixed or is there another way I can grab some of the data and convert it to .csv? Specifically the 'Fixture History'? and then 'First'Name', 'type_name' etc.
Thanks in advance for any help :)
Try this tool: http://www.convertcsv.com/json-to-csv.htm
You will need to configure a few things, but should be easy enough.