Get line number while parsing a JSON file - json

I am using google Gson library to parse JSON files, which are ipython note book files. Is it possible to collect the line number where a JSON object or array starts or end.
JsonReader reader = new JsonReader(new FileReader(notebookFile));
Gson gson = new GsonBuilder().create();
// Read file in stream mode
reader.beginObject();
while (reader.hasNext()) {
String name = reader.nextName();
if (name.equals("cells")) {
//can we determine line number of name
reader.beginArray();
.....
}
....
}
Part of a notebook:
"metadata": {
"name": "5-Scatterplots"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"store = pd.HDFStore('/Volumes/FreshBooks/data/store.h5')\n",
"may07 = store['may07']\n",
"may08 = store['may08']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},

You can accomplish this in python using https://pypi.org/project/json-cfg/
Here's a recursive strategy for printing the line number for each key and each value.
import jsoncfg
from jsoncfg.config_classes import ConfigJSONObject, ConfigJSONArray, ConfigJSONScalar
def recursivePrint(element):
if isinstance(element, ConfigJSONObject):
# Dictionary
for key, value in element:
print(f"key \"{key}\" at line {jsoncfg.node_location(element[key]).line}")
recursivePrint(element[key])
elif isinstance(element, ConfigJSONArray):
# Array
for item in element:
recursivePrint(item)
elif isinstance(element, ConfigJSONScalar):
value = element()
if isinstance(value, str):
value = value.strip()
print(f"value \"{value}\" at line {jsoncfg.node_location(element).line}")
parsed = jsoncfg.load_config("example.json")
recursivePrint(parsed)
Screenshot of results

Full disclosure: I'm the maintainer of the package below.
There is now a new Python package that solves this use case: https://github.com/open-alchemy/json-source-map
Installation: pip install json_source_map
For example, in your case:
from json_source_map import calculate
source = '''
{
"metadata": {
"name": "5-Scatterplots"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\\n",
"store = pd.HDFStore('/Volumes/FreshBooks/data/store.h5')\\n",
"may07 = store['may07']\\n",
"may08 = store['may08']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
}
]
}
]
}
'''
print(calculate(source))
This prints:
{
"": Entry(
value_start=Location(line=1, column=0, position=1),
value_end=Location(line=27, column=1, position=568),
key_start=None,
key_end=None,
),
"/metadata": Entry(
value_start=Location(line=2, column=14, position=17),
value_end=Location(line=4, column=3, position=51),
key_start=Location(line=2, column=2, position=5),
key_end=Location(line=2, column=12, position=15),
),
"/metadata/name": Entry(
value_start=Location(line=3, column=12, position=31),
value_end=Location(line=3, column=28, position=47),
key_start=Location(line=3, column=4, position=23),
key_end=Location(line=3, column=10, position=29),
),
"/nbformat": Entry(
value_start=Location(line=5, column=14, position=67),
value_end=Location(line=5, column=15, position=68),
key_start=Location(line=5, column=2, position=55),
key_end=Location(line=5, column=12, position=65),
),
"/nbformat_minor": Entry(
value_start=Location(line=6, column=20, position=90),
value_end=Location(line=6, column=21, position=91),
key_start=Location(line=6, column=2, position=72),
key_end=Location(line=6, column=18, position=88),
),
"/worksheets": Entry(
value_start=Location(line=7, column=16, position=109),
value_end=Location(line=26, column=3, position=566),
key_start=Location(line=7, column=2, position=95),
key_end=Location(line=7, column=14, position=107),
),
"/worksheets/0": Entry(
value_start=Location(line=8, column=4, position=115),
value_end=Location(line=25, column=5, position=562),
key_start=None,
key_end=None,
),
"/worksheets/0/cells": Entry(
value_start=Location(line=9, column=15, position=132),
value_end=Location(line=24, column=7, position=556),
key_start=Location(line=9, column=6, position=123),
key_end=Location(line=9, column=13, position=130),
),
"/worksheets/0/cells/0": Entry(
value_start=Location(line=10, column=8, position=142),
value_end=Location(line=23, column=9, position=548),
key_start=None,
key_end=None,
),
"/worksheets/0/cells/0/cell_type": Entry(
value_start=Location(line=11, column=23, position=167),
value_end=Location(line=11, column=29, position=173),
key_start=Location(line=11, column=10, position=154),
key_end=Location(line=11, column=21, position=165),
),
"/worksheets/0/cells/0/collapsed": Entry(
value_start=Location(line=12, column=23, position=198),
value_end=Location(line=12, column=28, position=203),
key_start=Location(line=12, column=10, position=185),
key_end=Location(line=12, column=21, position=196),
),
"/worksheets/0/cells/0/input": Entry(
value_start=Location(line=13, column=19, position=224),
value_end=Location(line=18, column=11, position=425),
key_start=Location(line=13, column=10, position=215),
key_end=Location(line=13, column=17, position=222),
),
"/worksheets/0/cells/0/input/0": Entry(
value_start=Location(line=14, column=12, position=238),
value_end=Location(line=14, column=35, position=261),
key_start=None,
key_end=None,
),
"/worksheets/0/cells/0/input/1": Entry(
value_start=Location(line=15, column=12, position=275),
value_end=Location(line=15, column=72, position=335),
key_start=None,
key_end=None,
),
"/worksheets/0/cells/0/input/2": Entry(
value_start=Location(line=16, column=12, position=349),
value_end=Location(line=16, column=38, position=375),
key_start=None,
key_end=None,
),
"/worksheets/0/cells/0/input/3": Entry(
value_start=Location(line=17, column=12, position=389),
value_end=Location(line=17, column=36, position=413),
key_start=None,
key_end=None,
),
"/worksheets/0/cells/0/language": Entry(
value_start=Location(line=19, column=22, position=449),
value_end=Location(line=19, column=30, position=457),
key_start=Location(line=19, column=10, position=437),
key_end=Location(line=19, column=20, position=447),
),
"/worksheets/0/cells/0/metadata": Entry(
value_start=Location(line=20, column=22, position=481),
value_end=Location(line=20, column=24, position=483),
key_start=Location(line=20, column=10, position=469),
key_end=Location(line=20, column=20, position=479),
),
"/worksheets/0/cells/0/outputs": Entry(
value_start=Location(line=21, column=21, position=506),
value_end=Location(line=21, column=23, position=508),
key_start=Location(line=21, column=10, position=495),
key_end=Location(line=21, column=19, position=504),
),
"/worksheets/0/cells/0/prompt_number": Entry(
value_start=Location(line=22, column=27, position=537),
value_end=Location(line=22, column=28, position=538),
key_start=Location(line=22, column=10, position=520),
key_end=Location(line=22, column=25, position=535),
),
}
This tells you the line, column and character position for the start and end location for each value in the JSON document.

Related

Dash: Get the post request query string parameters inside callback

I have a multi-page dash application.
app.py
pages
page_one.py
page_two.py
In app.py, I have
#callback(
Output(component_id="page-content", component_property="children"),
Input(component_id="url", component_property="pathname"),
)
def display_page(pathname):
if pathname == "/one":
return page_one.layout
elif pathname == "/two":
return page_two.layout
In page_one.py, I have few callbacks.
For eg:
Callback-1
#callback(
output=Output("paragraph_id", "children"),
inputs=Input("button_id", "n_clicks"),
background=True,
running=[
(Output("button_id", "disabled"), True, False),
(Output("cancel_button_id", "disabled"), False, True),
(
Output("paragraph_id", "style"),
{"visibility": "hidden"},
{"visibility": "visible"},
),
(
Output("progress_bar", "style"),
{"visibility": "visible"},
{"visibility": "hidden"},
),
],
cancel=Input("cancel_button_id", "n_clicks"),
progress=[Output("progress_bar", "value"), Output("progress_bar", "max")],
prevent_initial_call=True,
)
def update_progress_A(set_progress, n_clicks):
# Here I want to access the post request query string parameters
some code for- A
Callback-2
#callback(
output=Output("paragraph_id", "children"),
inputs=Input("button_id", "n_clicks"),
background=True,
running=[
(Output("button_id", "disabled"), True, False),
(Output("cancel_button_id", "disabled"), False, True),
(
Output("paragraph_id", "style"),
{"visibility": "hidden"},
{"visibility": "visible"},
),
(
Output("progress_bar", "style"),
{"visibility": "visible"},
{"visibility": "hidden"},
),
],
cancel=Input("cancel_button_id", "n_clicks"),
progress=[Output("progress_bar", "value"), Output("progress_bar", "max")],
prevent_initial_call=True,
)
def update_progress_B(set_progress, n_clicks):
# Here I want to access the post request query string parameters
some code for- B
Is it possible to get the post request query string parameters inside the callback?
Please suggest.
Updated: 1
I have docker services as below:
redis, postgres, web, celery, nginx
I have implemented demo code from dash website
#callback(
output=Output("paragraph_id", "children"),
inputs=Input("button_id", "n_clicks"),
background=True,
running=[
(Output("button_id", "disabled"), True, False),
(Output("cancel_button_id", "disabled"), False, True),
(
Output("paragraph_id", "style"),
{"visibility": "hidden"},
{"visibility": "visible"},
),
(
Output("progress_bar", "style"),
{"visibility": "visible"},
{"visibility": "hidden"},
),
],
cancel=Input("cancel_button_id", "n_clicks"),
progress=[Output("progress_bar", "value"), Output("progress_bar", "max")],
prevent_initial_call=True,
)
def update_progress(set_progress, n_clicks):
total = 5
# print("INPUTS: ", callback_context.inputs)
# print("INPUTS: ", callback_context.inputs_list)
for i in range(total + 1):
set_progress((str(i), str(total)))
time.sleep(1)
return f"Clicked {n_clicks} times"
When services are up, I observed following things in terminal:
For celery:
celery | [tasks]
celery | . long_callback_d0a7a49bfe3d30bfdd7a43d2a22db75123ff2fa9
After button clicking:
celery | [2022-12-27 20:18:43,090: INFO/MainProcess] Task long_callback_d0a7a49bfe3d30bfdd7a43d2a22db75123ff2fa9[b663bb71-6962-44a3-ae2c-00eb7d8869bf] received
proxy | 172.18.0.1 - - [27/Dec/2022:20:18:44 +0000] "POST /_dash-update-component?cacheKey=6067e37a0e1f0c07b6f9ff0d7ef0b208ae2783ba&job=b663bb71-6962-44a3-ae2c-00eb7d8869bf
celery | [2022-12-27 20:18:49,119: INFO/ForkPoolWorker-2] Task long_callback_d0a7a49bfe3d30bfdd7a43d2a22db75123ff2fa9[b663bb71-6962-44a3-ae2c-00eb7d8869bf] succeeded in 6.027637779999964s: None
This is working fine.
I am trying to get/access job parameter inside update_progress()
so that I can use this job parameter to pass in the result.AsyncResult(job) which is celery function inside update_progress() to get status, state of the running task, which I am trying to use to update progress bar.
Usecases:
Upload dataset: I'm using dcc.Upload() and file is getting uploaded successfully. e.g. user uploads data.csv
Create train, test, dev dataset files: I'm using dbc.Button() where user clicks it and all 3 files are getting created successfully [I have used sklearn]. e.g. we get train.csv, test.csv and dev.csv
Model training: I'm using dbc.Button() where user clicks it and model is getting generated and stored successfully. e.g. model.joblib

Getting Dash longcallback details

just beginning with Dash 2.0 from plotly. Mainly to take advantage of longcallbacks. I try to get callback's id without success (i.e. the id I see in the worker when executing long call). Also struggling to get its state, ready(), successful() etc.
What I've got so far:
#app.long_callback(
output=Output("paragraph_id", "children"),
inputs=Input("button_id", "n_clicks"),
running=[
(Output("button_id", "disabled"), True, False),
(Output("cancel_button_id", "disabled"), False, True),
(
Output("paragraph_id", "style"),
{"visibility": "hidden"},
{"visibility": "visible"},
),
(
Output("progress_bar", "style"),
{"visibility": "visible"},
{"visibility": "hidden"},
),
],
cancel=[Input("cancel_button_id", "n_clicks")],
progress=[Output("progress_bar", "value"), Output("progress_bar", "max")],
prevent_initial_call=True
)
def update_progress(set_progress, n_clicks):
currentProgress = check_progress.delay()
i = 0
total = 15
while currentProgress.ready() == False:
time.sleep(1)
print("currentProgress.STATE")
print(currentProgress.state)
set_progress((str(i + 1), str(total)))
i += 1
return [f"Clicked {n_clicks} times" + " " + currentProgress.id]
#celery_app.task(bind=True)
def check_progress(self):
time.sleep(15)
return
I can manage to get these when executing celery task - check_progress(). How do I get id of update_progress() long callback?

python : Parsing json file into list of dictionaries

I have the following json file annotations
and here is a screenshot form it.tree structure of the json file
I want to parse it and extract the following info
here is a link which I take this screenshot form it Standard Dataset Dicts
I tried to use this code which is not working as expected.
def get_buildings_dicts(img_dir):
json_file = os.path.join(img_dir, "annotations.json")
with open(json_file) as f:
imgs_anns = json.load(f)
dataset_dicts = []
for idx, v in enumerate(imgs_anns):
record = {}
filename = os.path.join(img_dir, v["imagePath"])
height, width = cv2.imread(filename).shape[:2]
record["file_name"] = filename
record["image_id"] = idx
record["height"] = height
record["width"] = width
annos = v["shapes"][idx]
objs = []
for anno in annos:
# assert not anno["region_attributes"]
anno = anno["shape_type"]
px = anno["points"][0]
py = anno["points"][1]
poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
poly = [p for x in poly for p in x]
obj = {
"bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
"bbox_mode": BoxMode.XYXY_ABS,
"segmentation": [poly],
"category_id": 0,
}
objs.append(obj)
record["annotations"] = objs
dataset_dicts.append(record)
return dataset_dicts
here is an expected output of the final dict items:
{
"file_name": "balloon/train/34020010494_e5cb88e1c4_k.jpg",
"image_id": 0,
"height": 1536,
"width": 2048,
"annotations": [
{
"bbox": [994, 619, 1445, 1166],
"bbox_mode": <BoxMode.XYXY_ABS: 0>,
"segmentation": [[1020.5, 963.5, 1000.5, 899.5, 994.5, 841.5, 1003.5, 787.5, 1023.5, 738.5, 1050.5, 700.5, 1089.5, 663.5, 1134.5, 638.5, 1190.5, 621.5, 1265.5, 619.5, 1321.5, 643.5, 1361.5, 672.5, 1403.5, 720.5, 1428.5, 765.5, 1442.5, 800.5, 1445.5, 860.5, 1441.5, 896.5, 1427.5, 942.5, 1400.5, 990.5, 1361.5, 1035.5, 1316.5, 1079.5, 1269.5, 1112.5, 1228.5, 1129.5, 1198.5, 1134.5, 1207.5, 1144.5, 1210.5, 1153.5, 1190.5, 1166.5, 1177.5, 1166.5, 1172.5, 1150.5, 1174.5, 1136.5, 1170.5, 1129.5, 1153.5, 1122.5, 1127.5, 1112.5, 1104.5, 1084.5, 1061.5, 1037.5, 1032.5, 989.5, 1020.5, 963.5]],
"category_id": 0
}
]
}
I think the only tricky part is dealing with the nested lists but a handful of coprehensions can probably make life easier for us.
Try:
import json
new_images = []
with open("merged_file.json", "r") as file_in:
for index, image in enumerate( json.load(file_in)):
#height, width = cv2.imread(filename).shape[:2]
height, width = 100, 100
new_images.append({
"image_id": index,
"filename": image["imagePath"],
"height": height,
"width": width,
"annotations": [
{
"category_id": 0,
#"bbox_mode": BoxMode.XYXY_ABS,
"bbox_mode": 0,
"bbox": [
min(x for x,y in shape["points"]),
min(y for x,y in shape["points"]),
max(x for x,y in shape["points"]),
max(y for x,y in shape["points"])
],
"segmentation": [coord for point in shape["points"] for coord in point]
}
for shape in image["shapes"]
],
})
print(json.dumps(new_images, indent=2))

How to increase my container width to accomodate more items

I am building a dashboard using Plotly Dash. I am using bootstrap.min.css , I would like to increase the width of my container so that I can accommodate two graphs , in a single row.
My second graphs(Line graph) , has more width hence unable to align them in a single row.
I have attached the snapshot below,
DASH UI CODE :
# the style arguments for the sidebar. We use position:fixed and a fixed width
SIDEBAR_STYLE = {
"top": 0,
"left": 0,
"bottom": 0,
"width": "16rem",
"padding": "2rem 1rem",
"background-color": "#f8f9fa",
"position": "fixed",
"color":"#000",
}
# the styles for the main content position it to the right of the sidebar and
# add some padding.
CONTENT_STYLE = {
"margin-left": "18rem",
"margin-right": "2rem",
"padding": "2rem 1rem",
}
sidebar = html.Div(
[
html.H2("Plate", className="display-4"),
html.Hr(),
html.P(
"A simple dashboard", className="lead"
),
dbc.Nav(
[
dbc.NavLink("Dashboard", href="/dashboard", id="page-1-link"),
dbc.NavLink("Analytics", href="/page-2", id="page-2-link"),
dbc.NavLink("Page 3", href="/page-3", id="page-3-link"),
html.Hr(),
dbc.NavLink("Logout", href="/logout", id="page-4-link"),
],
vertical=True,
pills=True,
),
],
style=SIDEBAR_STYLE,
)
content = html.Div(id='page-content' , className ='container' ,style=CONTENT_STYLE)
app.layout = html.Div([dcc.Location(id="url"), sidebar, content])
app.config.suppress_callback_exceptions = True
# this callback uses the current pathname to set the active state of the
# corresponding nav link to true, allowing users to tell see page they are on
#app.callback(
[Output(f"page-{i}-link", "active") for i in range(1, 4)],
[Input("url", "pathname")],
)
def toggle_active_links(pathname):
if pathname == "/" or pathname == "/dashboard":
# Treat page 1 as the homepage / index
return True, False, False
return [pathname == f"/page-{i}" for i in range(1, 4)]
#app.callback(Output("page-content", "children"), [Input("url", "pathname")])
def render_page_content(pathname):
if pathname in ["/", "/page-1", "/dashboard"]:
dashBoard = html.Div([
html.Div([dcc.DatePickerRange(
id='my-date-picker-range',
min_date_allowed=dt(minDate[0],minDate[1],minDate[2]),
max_date_allowed=dt(maxDate[0],maxDate[1],maxDate[2]),
initial_visible_month=dt(maxDate[0],maxDate[1],maxDate[2]),
start_date=dt(minDate[0],minDate[1],minDate[2]).date(),
end_date=dt(maxDate[0],maxDate[1],maxDate[2]).date()
),
html.Button(id="date-button" , children ="Analyze" , n_clicks = 0, className = 'btn btn-outline-success')
], className = 'row'),
html.Div([
html.Br(),
html.Div([
html.H4(['Category Overview'] , className = 'display-4'),
html.Br(),
html.Br(),
], className = 'row'),
html.Div([
html.Div([dcc.Graph(id='categoryPerformance',figure = dict(data=ge.returnCategoryOverviewBarGraph(df)[0],
layout=ge.returnCategoryOverviewBarGraph(df)[1]))
], className = 'col'),
html.Div([dcc.Graph(id='categoryPerformanceTrend')
], className = 'col')
], className = 'row'),
html.Hr(),
html.Div([
html.Div([
dcc.Dropdown(id = 'category-dd', options = category_items, value = 'Food')
], className = 'col-6 col-md-4'),
html.Div([
dcc.Slider(id = 'headCount' , min = 5, max=20 , step = 5 , value = 5, marks = {i: 'Count {}'.format(i) for i in range(5,21,5)})
], className = 'col-12 col-sm-6 col-md-8')
], className = 'row'),
html.Div([
html.Br(),
html.Br(),
html.Div([
dcc.Graph(id ='idvlCategoryPerformanceBest')
], className ='col'),
html.Div([
dcc.Graph(id ='idvlCategoryPerformanceLeast')
], className = 'col')
], className = 'row')
])
] , className='container')
return dashBoard
I have zero knowledge in frontend / css , any help is much appreciated. Thanks !

Creating a json string from a loadings object in R

After performing a factor analysis the loadings object looks like this:
Loadings:
Factor1 Factor2
IV1 0.844 -0.512
IV2 0.997
IV3 -0.235
IV4 -0.144
IV5 0.997
Factor1 Factor2
SS loadings 1.719 1.333
Proportion Var 0.344 0.267
Cumulative Var 0.344 0.610
I can target the factors themselves using print(fit$loadings[,1:2])to get the following.
Factor1 Factor2
IV1 0.84352949 -0.512090197
IV2 0.01805673 0.997351400
IV3 0.05877499 -0.234710743
IV4 0.09088599 -0.144251843
IV5 0.99746785 0.008877643
I would like to create a json string that would look something like the following.
"loadings": {
"Factor1": {
"IV1": 0.84352949, "IV2":0.01805673, "IV3":0.05877499, "IV4": 0.09088599, "IV5": 0.99746785
},
"Factor2": {
"IV1": -0.512090197, "IV2": 0.997351400, "IV3": -0.234710743, "IV4": -0.144251843, "IV5": 0.008877643
}
}
I have tried accessing the individual properties using unclass(), hoping that I could then loop through and put them into a string,have not had any luck ( using loads <- loadings(fit) and <- names(unclass(loads)) names shows up as "null")
Just seconding #GSee's comment (+1) and #dickoa's answer (+1) with a closer example:
Creating some demo data for reproducible example (you should also provide one in all your Qs):
> fit <- princomp(~ ., data = USArrests, scale = FALSE)
Load RJSONIO/rjson packages:
> library(RJSONIO)
Transform your data to fit your needs:
> res <- list(loadings = apply(fit$loadings, 2, list))
Return JSON:
> cat(toJSON(res))
{
"loadings": {
"Comp.1": [
{
"Murder": -0.041704,
"Assault": -0.99522,
"UrbanPop": -0.046336,
"Rape": -0.075156
}
],
"Comp.2": [
{
"Murder": 0.044822,
"Assault": 0.05876,
"UrbanPop": -0.97686,
"Rape": -0.20072
}
],
"Comp.3": [
{
"Murder": 0.079891,
"Assault": -0.06757,
"UrbanPop": -0.20055,
"Rape": 0.97408
}
],
"Comp.4": [
{
"Murder": 0.99492,
"Assault": -0.038938,
"UrbanPop": 0.058169,
"Rape": -0.072325
}
]
}
}>
You can do something along these lines
require(RJSONIO) ## or require(rjson)
pca <- prcomp(~ ., data = USArrests, scale = FALSE)
export <- list(loadings = split(pca$rotation, rownames(pca$rotation)))
cat(toJSON(export))
## {
## "loadings": {
## "Assault": [ 0.99522, -0.05876, -0.06757, 0.038938 ],
## "Murder": [ 0.041704, -0.044822, 0.079891, -0.99492 ],
## "Rape": [ 0.075156, 0.20072, 0.97408, 0.072325 ],
## "UrbanPop": [ 0.046336, 0.97686, -0.20055, -0.058169 ]
## }
## }
If you want to export it :
cat(toJSON(export), file = "loadings.json")
If it doesn't really suit your need, just modify the data structure (export object) to the output you want.