Parsing nested json to find value of an object - json

I am trying to find the value of statement object in below json file.
I am trying below code but its erroring out.
import cx_Oracle
import json
import pandas as pd
f = open('test.json')
records = json.load(f)
pd.json_normalize(records, record_path = [‘ddl’], meta =['op_type', 'op_ts','pos','xid'])
Above gives SyntaxError: invalid character '‘' (U+2018).
test.json is as follows:
[
{
"op_type": "DDL",
"op_ts": "2023-02-16T05:30:04.000Z",
"pos": "G-AQAAAMQNAAAAAAAAAAAAAAAAAAAGAAoAAA==8057790.6.13.989",
"xid": "0.6.13.989",
"ddl": {
"object": {
"catalog": "",
"schema": "TKGGU1",
"object": "SRCTAB2"
},
"statement": "create table srctab2"
}
}
]

You're using the wrong quotes:
pd.json_normalize(records, record_path = [‘ddl’], ...
^ ^
| |
Don't use ‘ and ’ but use ' instead.
import cx_Oracle
import json
import pandas as pd
f = open('test.json')
records = json.load(f)
pd.json_normalize(records, record_path = ['ddl'], meta =['op_type', 'op_ts', 'pos', 'xid'])

Related

JSON Webscraper returning empty array

Im trying to parse a website using the following code:
import requests
r = requests.get('https://www.finn.no/realestate/homes/search.html?sort=PUBLISHED_DESC')
print(r.json())
However, it appears that it just returns an empty array.
I tried putting it in a dict and catching the response using
import sys, json
struct = {}
try:
dataform = str(r).strip("'<>() ").replace('\'', '\"')
struct = json.loads(dataform)
except:
print(repr(r))
print(sys.exc_info())
struct
And the code returns:
<Response [200]>
(<class 'json.decoder.JSONDecodeError'>, JSONDecodeError('Expecting value: line 1 column 1 (char 0)') ....
Now you're trying to treat HTML document as Json, so obviously thats not what you want. The page's Json data is embedded inside one <script> element so you can use beautifulsoup to locate it and json module to parse it:
import json
import requests
from bs4 import BeautifulSoup
r = requests.get(
"https://www.finn.no/realestate/homes/search.html?sort=PUBLISHED_DESC"
)
soup = BeautifulSoup(r.content, "html.parser")
data = soup.select_one("#__NEXT_DATA__")
data = json.loads(data.text)
# pretty print the data:
print(json.dumps(data, indent=4))
Prints:
{
"props": {
"pageProps": {
"search": {
"docs": [
{
"type": "realestate",
"ad_id": 276867609,
"main_search_key": "SEARCH_ID_REALESTATE_NEWBUILDINGS",
"heading": "Unik anledning! \u00d8nsker du \u00e5 bo med \"leilighetsf\u00f8lelse\" rett ved bysentrum, og likevel ha plass til storfamilien?",
"location": "Fauchaldsgate 2, Gj\u00f8vik",
"image": {
"url": "https://images.finncdn.no/dynamic/default/2022/11/vertical-0/22/9/276/867/609_1157963344.jpg",
"path": "2022/11/vertical-0/22/9/276/867/609_1157963344.jpg",
"height": 1280,
"width": 1920,
"aspect_ratio": 1.5
},
...and so on.

Python - parsing JSON and f string

I am trying to write a small JSON script that parses JSON files. I need to include multiple variables in the code but currently, I'm stuck since f string does not seem to be working as I expected. Here is an example code:
import json
test = 10
json_data = f'[{"ID": {test},"Name":"Pankaj","Role":"CEO"}]'
json_object = json.loads(json_data)
json_formatted_str = json.dumps(json_object, indent=2)
print(json_formatted_str)
The above code returns an error:
json_data = f'[{"ID": { {test} },"Name":"Pankaj","Role":"CEO"}]'
ValueError: Invalid format specifier
Could you, please let me know how can I add variables to the JSON?
Thank you.
You can put extra{ and } to your string:
import json
test = 10
json_data = f'[{{"ID": {test},"Name":"Pankaj","Role":"CEO"}}]'
json_object = json.loads(json_data)
json_formatted_str = json.dumps(json_object, indent=2)
print(json_formatted_str)
Prints:
[
{
"ID": 10,
"Name": "Pankaj",
"Role": "CEO"
}
]

JSON list items to dataframe

I am using an API that has changed its spec and now my JSON feed is slightly different.
BEFORE:
{"code":2000,"message":"SUCCESS","data":
{"1":
{"id":1,
"name":"Amanda",
"score":"57.36%",
"average":"53.47%"
}
}
}
Then, I used something to this effect:
import json
import pandas as pd
jsonfile = 'file.json'
with open(jsonfile) as j:
data = json.load(j)
rows = [v for k, v in data["data"].items()]
df = pd.DataFrame(rows, columns=['id', 'name', 'score', 'average'])
Source AFTER:
{"status":"success","code":0,"data":
{"data":
[
{
"id":1,
"name":"Robert",
"score":"48.85%",
"average":"40.52%"
}
]
}
}
So I'm attempting to adjust using some of the resources:
Convert JSON list to pandas dataframe
JSON to pandas DataFrame
how to convert json data with list of list to dataframe using python pandas
What I've tried so far:
import json
import pandas as pd
from pandas import json_normalize
jsonfile = 'file.json'
with open(jsonfile) as j:
data = json.load(j)
df = json_normalize(data, ['data'])
I've also tried:
df = pd.DataFrame.from_records(data)
I get the following:
TypeError: {....} for path data. Must be list or null.
What am I missing here?

python3: Extract same type value from Json file

The following is my Json file which is decoded on base64.
response={"response": [{"objcontent": [{"title": "Pressure","rowkeys": [
"lat",
"lon",
"Pressure"
],
"rowvalues": [
[
"WxsArK0NV0A=",
"uaQCWFxSM0A=",
"ncvggc7lcUA6MVVLnZiMQH6msaA+0yhANzLp2RsZhkBwobfXt9BXQKtxbnjV+IFARq3fVqOWiEBwyyvmt+V9QDGg7k8YUHpA4IZm9W/De0A="
],
[
"WxsArK0NV0A=",
"HqJT4w7RUkA=",
"BfPox4I5ikCLVYxUxWqIQIFwlJFA+IVAJeQ6gBLyhEBB0QlkoGiCQDOkvnAZUm1AkGbWKEgza0A+FCkwH4phQHwSRSY+iVRAKcvC4pRliEA="
],
[
"WxsArK0NV0A=",
"G5rYdw0NXkA=",
"C9dhhIVrg0B2hCvzOoKKQMrMWhll5o5AIujgxBB0ZkD8+EipfXx0QOXh0LLycH5ATdtxKqbtdkAw66X3l/VhQLqvZBbd13FAjKl2+8UUjUA="
],
[
"WxsArK0NV0A=",
"PTvsm55daEA=",
"W+wyHC12dUCrvSLM1d6BQMfay0ZjbYpAjnk4Ecc8dkDH35pL429xQPTOwkF6Z41Aci5JATkXjUBQ6Wjlp3RQQFlpNGmsNHpAFf0DUor+dUA="
]]}]}]}
I decoded the values and use these values to draw a plot.following is the code.
import base64
import struct
import numpy as np
import pylab as pl
for response_i in response['response']:
for row in response_i['objcontent'][0]['rowvalues']:
for item in row[:]:
decoded=base64.b64decode(item)
if len(decoded)<9:
a=struct.unpack('d',decoded)
else:
decoded=base64.b64decode(item)
a=struct.unpack('10d',decoded)
last=np.array(a)
pl.show(pl.plot(last))
but i would like to saparate the value of each list. in the 'row keys' there are 3 elements [ "lat", "lon", "Pressure"] accordingly there are 3 values in each list of rowvalues.
My question is how can I separate the different values in rowvalues and add them in each group of rowkeys.
so, at the end I suppose to have 3 list which included all the decoded values.
'lat': [WxsArK0NV0A=,WxsArK0NV0A=,WxsArK0NV0A=,WxsArK0NV0A=]
'lon': [uaQCWFxSM0A=,HqJT4w7RUkA=,G5rYdw0NXkA=,PTvsm55daEA=]
'pressure': [ncvggc7lcUA6MVVLnZiMQH6msaA+0yhANzLp2RsZhkBwobfXt9BXQKtxbnjV+IFARq3fVqOWiEBwyyvmt+V9QDGg7k8YUHpA4IZm9W/De0A=, BfPox4I5ikCLVYxUxWqIQIFwlJFA+IVAJeQ6gBLyhEBB0QlkoGiCQDOkvnAZUm1AkGbWKEgza0A+FCkwH4phQHwSRSY+iVRAKcvC4pRliEA=, C9dhhIVrg0B2hCvzOoKKQMrMWhll5o5AIujgxBB0ZkD8+EipfXx0QOXh0LLycH5ATdtxKqbtdkAw66X3l/VhQLqvZBbd13FAjKl2+8UUjUA=, W+wyHC12dUCrvSLM1d6BQMfay0ZjbYpAjnk4Ecc8dkDH35pL429xQPTOwkF6Z41Aci5JATkXjUBQ6Wjlp3RQQFlpNGmsNHpAFf0DUor+dUA=]
One approach would be to manually sort the data, like so:
from collections import defaultdict
from base64 import b64decode
import json
d = defaultdict(list)
js = ''
with open(json_file) as f:
js = b64decode(f.read()).decode()
js = json.loads(js)
response = js['response']['obj_content'][0]
for i, col_name in enumerate(response['row_keys']):
for row_val in ['row_values']:
d[col_name].append(row_val[i])
defaultdict automatically creates a new list when a key is called that previously didn't exist, which makes your code slightly sleeker.
Another option would be to use pandas.DataFrame and load data like so:
import pandas as pd
response = json_file['response']['obj_content'][0]
df = pd.DataFrame(response['row_values'], columns= response['row_keys'])
The neat thing about pandas is, that it's quite expansive in its features; for example, you could plot your data using the previously created DataFrame like so:
df.plot()

Hello Getting parsing json file

I need some help here parsing a json data :
My json File contain this
{
"message": "{\"gender\":\"MADAME\",\"Polo\":\"POTA\",\"last_name\":\"pato\",\"email\":\"pato.pota#mailler.com\",\"subject\":\"toto claim\",\"sub_subject\":\"Claim insurance car\",\"question\":\"claim for red car\",\"store\":\"claiming for incident\"}",
"context": [
],
"level": 200,
"level_name": "INFO",
"channel": "mailer",
"datetime": {
"date": "2016-09-19 11:00:26.795353",
"timezone_type": 3,
},
"extra": [
]
}
Python Code.
import os
import json
def Get_running_dir():
path = os.getcwd()
file = path + "\json_data.txt"
print(file)
with open(file, 'r') as f:
data = f.read()
data_json = json.loads(data)
print(data_json)
print(type(data_json))
Get_running_dir()
The issue is { print(type(data_json))} this is a dict right.
Once I call this print(data_json['message']['gender'])
<class 'dict'>
Traceback (most recent call last):
File "Extract_log.py", line 29, in <module>
Get_running_dir()
File "Extract_log.py", line 25, in Get_running_dir
print(data_json['message']['gender'])
TypeError: string indices must be integers
I need some help to parse this file please help me.
Thanking you in advance.
Regards,
I figured how to work with the json, this out today.
import os
import json
def Get_running_dir():
path = os.getcwd()
file = path + "\json_data.txt"
print(file)
with open(file, 'r') as f:
data = f.read()
data_json = json.loads(data)
# My error was here:
print(data_json['message']) # This convert to String.
msg = json.loads(data_json['message']) # THIS CONVERT THE STRING TO #Dict.
# this way i can access its keys.
# Like this.
print(msg['gender'] ,msg['first_name'], msg['last_name'])