Python Pandas dataframe to JSON file - json

My dataframe is:
import pandas as pd
from tabulate import tabulate
data0 = {'dir':[0,'','',90,'','','']}
data1 = {'dist':['0 to 1h','1h to 2h','2h to 3h','0 to 1h','1h to 2h','2h to 3h','> 3h']}
data2 = {'max':[-0.271, -0.17 , -0.034, -0.322, -0.208, -0.057, 0.018]}
data3 = {'min':[-0.441, -0.339, -0.203, -0.491, -0.378, -0.227, -0.151]}
df0 = pd.DataFrame(data0)
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)
df3 = pd.DataFrame(data3)
pressure = []
pressure.append(df0)
pressure.append(df1)
pressure.append(df2)
pressure.append(df3)
df = pd.concat(pressure, axis=1)
print(tabulate(df, headers='keys', showindex=False))
It is like this:
I want the dataframe to be exported to a JSON file in the way below. I am sorry that I don't know how to express it in English. So, how to do it? Thank you.
{
"0": [
{
"max": {
"0 to 1h": -0.271
},
"min": {
"0 to 1h": -0.441
}
},
{
"max": {
"1h to 2h": -0.17
},
"min": {
"1h to 2h": -0.339
}
},
{
"max": {
"2h to 3h": -0.034
},
"min": {
"2h to 3h": -0.203
}
}
],
"90": [
{
"max": {
"0 to 1h": -0.322
},
"min": {
"0 to 1h": -0.491
}
},
{
"max": {
"1h to 2h": -0.208
},
"min": {
"1h to 2h": -0.378
}
},
{
"max": {
"2h to 3h": -0.057
},
"min": {
"2h to 3h": -0.227
}
},
{
"max": {
" > 3h ": 0.018
},
"min": {
" > 3h ": -0.151
}
}
]
}

Here is one way o do it:
# Put values in dicts
df["items"] = df.apply(
lambda x: {"max": {x["dist"]: x["max"]}, "min": {x["dist"]: x["min"]}}, axis=1
)
# Add missing values
df["dir"] = df["dir"].replace("", pd.NA).fillna(method="ffill")
# Make a dict of target values
data = {
key: value["items"]
for key, value in df[["dir", "items"]]
.groupby("dir")
.agg(list)
.to_dict("index")
.items()
}
with open("file.json", "w") as f:
json.dump(data, f)
In file.json:
{
"0": [
{
"max": {
"0 to 1h": -0.271
},
"min": {
"0 to 1h": -0.441
}
},
{
"max": {
"1h to 2h": -0.17
},
"min": {
"1h to 2h": -0.339
}
},
{
"max": {
"2h to 3h": -0.034
},
"min": {
"2h to 3h": -0.203
}
}
],
"90": [
{
"max": {
"0 to 1h": -0.322
},
"min": {
"0 to 1h": -0.491
}
},
{
"max": {
"1h to 2h": -0.208
},
"min": {
"1h to 2h": -0.378
}
},
{
"max": {
"2h to 3h": -0.057
},
"min": {
"2h to 3h": -0.227
}
},
{
"max": {
"> 3h": 0.018
},
"min": {
"> 3h": -0.151
}
}
]
}

Related

How to sort hash in ruby recursively

I have a huge hash/json in below format. Sample is given.
How we can sort this result hash based on the "total" key in descending order?
Nb: The last level nested node will not have have response and total keys. They will have metrics as keys.
result = {
"account_id_1": {
"total": 1000,
"response": {
"location_1": {
"total": 300,
"response": {
"service_1": { "metrics": { "cost": 100} },
"service_2": { "metrics": { "cost": 100 } },
"service_3": { "metrics": { "cost": 100 } },
}
},
"location_2": {
"total": 500,
"response": {
"service_1": { "metrics": { "cost": 300 } },
"service_2": { "metrics": { "cost": 150 } },
"service_3": { "metrics": { "cost": 50 } },
}
},
"location_3": {
"total": 200,
"response": {
"service_1": { "metrics": { "cost": 75 } },
"service_2": { "metrics": { "cost": 75 } },
"service_3": { "metrics": { "cost": 50 } },
}
}
}
},
"account_id_2": {
"total": 2000,
"response": {
"location_1": {
"total": 300,
"response": {
"service_1": { "metrics": { "cost": 100 } },
"service_2": { "metrics": { "cost": 100 } },
"service_3": { "metrics": { "cost": 100 } },
}
},
"location_2": {
"total": 500,
"response": {
"service_1": { "metrics": { "cost": 300 } },
"service_2": { "metrics": { "cost": 150 } },
"service_3": { "metrics": { "cost": 50 } },
}
},
"location_3": {
"total": 1200,
"response": {
"service_1": { "metrics": { "cost": 1075 } },
"service_2": { "metrics": { "cost": 75 } },
"service_3": { "metrics": { "cost": 50 } },
}
}
}
}
}
Expected result:
result = {
"account_id_2": {
"total": 2000,
"response": {
"location_3": {
"total": 1200,
"response": {
"service_1": { "metrics": { "cost": 1075 } },
"service_2": { "metrics": { "cost": 75 } },
"service_3": { "metrics": { "cost": 50 } },
}
},
"location_2": {
"total": 500,
"response": {
"service_1": { "metrics": { "cost": 300 } },
"service_2": { "metrics": { "cost": 150 } },
"service_3": { "metrics": { "cost": 50 } },
}
},
"location_1": {
"total": 300,
"response": {
"service_1": { "metrics": { "cost": 100 } },
"service_2": { "metrics": { "cost": 100 } },
"service_3": { "metrics": { "cost": 100 } },
}
}
}
},
"account_id_1": {
"total": 1000,
"response": {
"location_2": {
"total": 500,
"response": {
"service_1": { "metrics": { "cost": 300 } },
"service_2": { "metrics": { "cost": 150 } },
"service_3": { "metrics": { "cost": 50 } },
}
},
"location_1": {
"total": 300,
"response": {
"service_1": { "metrics": { "cost": 100} },
"service_2": { "metrics": { "cost": 100 } },
"service_3": { "metrics": { "cost": 100 } },
}
},
"location_3": {
"total": 200,
"response": {
"service_1": { "metrics": { "cost": 75 } },
"service_2": { "metrics": { "cost": 75 } },
"service_3": { "metrics": { "cost": 50 } },
}
}
}
}
}
Personally I would create a class to handle the custom sorting so that we can implement our own <=> (Spaceship Operator)
Something like: (Working Example: https://replit.com/#engineersmnky/ThankfulAutomaticRam#main.rb)
class AccountLocaleMetricSorter
include Comparable
def self.sort(h)
h.map do |name,values|
new(name: name, data: values)
end.sort.reduce({}) {|m,o| m.merge(o.to_h)}
end
attr_reader :name, :data, :o_data
def initialize(name: , data: )
#name = name
#o_data = data
#data = parse_response(data)
end
def to_h
return {name.to_sym => o_data} unless o_data.key?(:total)
{name.to_sym => {
total: o_data[:total],
response: data.to_h
}
}
end
def <=>(other)
if o_data.key?(:total) && o_data.key?(:total)
other.o_data[:total] <=> o_data[:total]
elsif other.o_data.key?(:metrics) && o_data.key?(:metrics)
other.o_data.dig(:metrics,:cost) <=> o_data.dig(:metrics,:cost)
else
0
end
end
private
def parse_response(response)
return response unless response.key?(:response)
self.class.sort(response[:response])
end
end
Usage as:
pp AccountLocaleMetricSorter.sort(result)
Output:
{:account_id_2=>
{:total=>2000,
:response=>
{:location_3=>
{:total=>1200,
:response=>
{:service_1=>{:metrics=>{:cost=>1075}},
:service_2=>{:metrics=>{:cost=>75}},
:service_3=>{:metrics=>{:cost=>50}}}},
:location_2=>
{:total=>500,
:response=>
{:service_1=>{:metrics=>{:cost=>300}},
:service_2=>{:metrics=>{:cost=>150}},
:service_3=>{:metrics=>{:cost=>50}}}},
:location_1=>
{:total=>300,
:response=>
{:service_1=>{:metrics=>{:cost=>100}},
:service_2=>{:metrics=>{:cost=>100}},
:service_3=>{:metrics=>{:cost=>100}}}}}},
:account_id_1=>
{:total=>1000,
:response=>
{:location_2=>
{:total=>500,
:response=>
{:service_3=>{:metrics=>{:cost=>300}},
:service_2=>{:metrics=>{:cost=>150}},
:service_1=>{:metrics=>{:cost=>50}}}},
:location_1=>
{:total=>300,
:response=>
{:service_1=>{:metrics=>{:cost=>100}},
:service_2=>{:metrics=>{:cost=>100}},
:service_3=>{:metrics=>{:cost=>100}}}},
:location_3=>
{:total=>200,
:response=>
{:service_1=>{:metrics=>{:cost=>75}},
:service_2=>{:metrics=>{:cost=>75}},
:service_3=>{:metrics=>{:cost=>50}}}}}}}
Let's first simplify your example.
result = {
"account_id_1": {
"total": 1000,
"response": {
"location_1": { "total": 300, "response": 1 },
"location_2": { "total": 500, "response": 2 },
"location_3": { "total": 200, "response": 3 }
}
},
"account_id_2": {
"total": 2000,
"response": {
"location_1": { "total": 300, "response": 1 },
"location_2": { "total": 500, "response": 2 },
"location_3": { "total": 1200, "response": 3 }
}
}
}
In your example,
result[:account_id_1][:response][:location_1][:response]
#=> { "service_1": { "metrics": { "cost": 100} },
# "service_2": { "metrics": { "cost": 100 } },
# "service_3": { "metrics": { "cost": 100 } } }
This value, and others like it, will remain unchanged, so I've replaced it with a placeholder, an arbitrary integer:
result[:account_id_1][:response][:location_1][:response]
#=> 1
result[:account_id_1][:response][:location_2][:response]
#=> 2
and so on.
I found it useful to create a helper method:
def sort_by_total_desc(h)
h.sort_by { |_,g| -g[:total] }.to_h
end
The desired hash can then be computed as follows.
sort_by_total_desc(result.transform_values do |v|
v.merge(response: sort_by_total_desc(v[:response]))
end)
#=> {:account_id_2=>{
# :total=>2000,
# :response=>{
# :location_3=>{:total=>1200, :response=>3},
# :location_2=>{:total=>500, :response=>2},
# :location_1=>{:total=>300, :response=>1}
# }
# },
# :account_id_1=>{
# :total=>1000,
# :response=>{
# :location_2=>{:total=>500, :response=>2},
# :location_1=>{:total=>300, :response=>1},
# :location_3=>{:total=>200, :response=>3}
# }
# }
# }
Regarding the helper method, here is an example.
h = { "location_1": { "total": 300, "response": 1 },
"location_2": { "total": 500, "response": 2 },
"location_3": { "total": 200, "response": 3 } }
sort_by_total_desc(h)
#=> {:location_2=>{:total=>500, :response=>2},
# :location_1=>{:total=>300, :response=>1},
# :location_3=>{:total=>200, :response=>3}}
which requires two steps:
a = h.sort_by { |_,g| -g[:total] }
#=> [[:location_2, {:total=>500, :response=>2}],
# [:location_1, {:total=>300, :response=>1}],
# [:location_3, {:total=>200, :response=>3}]]
a.to_h
#=> as above
The main method has two main steps. First perform the "inner" sorting.
h = result.transform_values do |v|
v.merge(response: sort_by_total_desc(v[:response]))
end
#=> {:account_id_1=>{
# :total=>1000,
# :response=>{
# :location_2=>{:total=>500, :response=>2},
# :location_1=>{:total=>300, :response=>1},
# :location_3=>{:total=>200, :response=>3}
# }
# },
# :account_id_2=>{
# :total=>2000,
# :response=>{
# :location_3=>{:total=>1200, :response=>3},
# :location_2=>{:total=>500, :response=>2},
# :location_1=>{:total=>300, :response=>1}
# }
# }
# }
Then do the "outer" sorting to return the desired result:
sort_by_total_desc(h)
#=> as above
See Enumerable#sort_by, Hash#transform_values and Hash#merge.

Append a Json file to an existing Jsonb field in a Postgres table and also update the json file if required

There is a table customer and it has a jsonb datatype field named report to hold json files.
The existing Json file in the report jsonb field is as follows
{
"report": {
"operations-utilization-rightsizing": {
"default_settings": [{
"type": "%",
"radio": "false",
"range": {
"max": 100,
"min": 0
}
}]
}
}
}
Now I need to append or merge the below json file to this report field in customer table.
{
"operations-battery-critical-events": {
"default_settings": [{
"type": "%",
"radio": "false",
"range": {
"max": 100,
"min": 0
}
}]
}
}
I tried the following update statement
UPDATE customer
SET report = report || '{
"operations-battery-critical-events": {
"default_settings": [{
"type": "%",
"radio": "false",
"range": {
"max": 100,
"min": 0
}
}]
}
}' :: jsonb
WHERE report IS NOT NULL;
The output for the above SQL is,
{
"report": {
"operations-utilization-rightsizing": {
"default_settings": [{
"type": "%",
"radio": "false",
"range": {
"max": 100,
"min": 0
}
}]
}
},
"operations-battery-critical-events": {
"default_settings": [{
"type": "%",
"radio": "false",
"range": {
"max": 100,
"min": 0
}
}]
}
}
And the desired output should be as below,
{
"report": {
"operations-utilization-rightsizing": {
"default_settings": [{
"type": "%",
"radio": "false",
"range": {
"max": 100,
"min": 0
}
}]
},
"operations-battery-critical-events": {
"default_settings": [{
"type": "%",
"radio": "false",
"range": {
"max": 100,
"min": 0
}
}]
}
}
}
I'm new to json, please let me for any further details.
This should work, but you have to adjust it to whether the top key is report (as in your question) or report_settings (as in your desired output example).
UPDATE customer
SET report = jsonb_set(report, '{report,operations-battery-critical-events}',
'{
"default_settings": [{
"type": "%",
"radio": "false",
"range": {
"max": 100,
"min": 0
}
}]
}'::jsonb)
WHERE report IS NOT NULL;

jq parse for multiple values

I'm trying to output displayName from JSON which has both
"source": "0.0.0.0/0" and
tcpOptions": "destinationPortRange": "min": 80
The result should display only
rule-1
eg: JSON
[
{
"displayName": "rule-1",
"secrule": [
{
"source": "0.0.0.0/0",
"tcpOptions": {
"destinationPortRange": {
"min": 80,
"max": 80
}
}
},
{
"source": "0.0.0.0/0",
"tcpOptions": {
"destinationPortRange": {
"min": 443,
"max": 443
}
}
}
]
},
{
"displayName": "rule-2",
"secrule": [
{
"source": "0.0.0.0/0",
"tcpOptions": {
"destinationPortRange": {
"min": 443,
"max": 443
}
}
},
{
"source": "20.0.0.0/0",
"tcpOptions": {
"destinationPortRange": {
"min": 80,
"max": 80
}
}
}
]
}
]
I have tried
jq -r '.[] | select(.secrule[].source == "0.0.0.0/0" and .secrule[].tcpOptions.destinationPortRange.min == 80) | .displayName' JSON | sort -u
But it displays both rules (which is incorrect)
rule-1
rule-2
You're expanding .secrule twice, thus every combination of its elements get checked. Use any instead:
.[] | select(any(.secrule[]; .source=="0.0.0.0/0" and .tcpOptions.destinationPortRange.min==80)).displayName

Elastic search to include boundary value time stamp

I am trying to include the boundary values in response, but it always rounds up to the nearest end time according to the interval.
For eg, if I ask data from 10:20 to 10:42 , at 5 mins interval, It will return data for
10:20 - 10:25 - 10:30 - 10:30 - 10:35 - 10:40 but the last 10:40-1:42 is never returned, How can I do this. Here is the query and response.
Query
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [{
"range": {
"timestamp": {
"gte": 1486443000000,
"lte": 1486446240000
}
}
}, {
"term": {
"applicationId": "******"
}
}, {
"term": {
"hostId": "*******"
}
}]
}
}
}
},
"filter": {
"limit": {
"value": 0
}
},
"aggs": {
"time": {
"histogram": {
"field": "timestamp",
"interval": 300000,
"min_doc_count": 0,
"extended_bounds": {
"min": 1486443000000 ,
"max": 1486446240000
}
},
"aggs": {
"establishedConnections": {
"sum": {
"field": "establishedConnections"
}
}
}
}
},
"sort": {
"timestamp": {
"order": "desc"
}
}
}
Response
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 21,
"successful": 21,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
},
"aggregations": {
"time": {
"buckets": [
{
"key_as_string": "2017-02-07T04:50:00.000Z",
"key": 1486443000000,
"doc_count": 50,
"establishedConnections": {
"value": 13
}
},
{
"key_as_string": "2017-02-07T04:55:00.000Z",
"key": 1486443300000,
"doc_count": 50,
"establishedConnections": {
"value": 20
}
},
{
"key_as_string": "2017-02-07T05:00:00.000Z",
"key": 1486443600000,
"doc_count": 50,
"establishedConnections": {
"value": 7
}
},
{
"key_as_string": "2017-02-07T05:05:00.000Z",
"key": 1486443900000,
"doc_count": 50,
"establishedConnections": {
"value": 14
}
},
{
"key_as_string": "2017-02-07T05:10:00.000Z",
"key": 1486444200000,
"doc_count": 50,
"establishedConnections": {
"value": 13
}
},
{
"key_as_string": "2017-02-07T05:15:00.000Z",
"key": 1486444500000,
"doc_count": 50,
"establishedConnections": {
"value": 12
}
},
{
"key_as_string": "2017-02-07T05:20:00.000Z",
"key": 1486444800000,
"doc_count": 50,
"establishedConnections": {
"value": 9
}
},
{
"key_as_string": "2017-02-07T05:25:00.000Z",
"key": 1486445100000,
"doc_count": 50,
"establishedConnections": {
"value": 14
}
},
{
"key_as_string": "2017-02-07T05:30:00.000Z",
"key": 1486445400000,
"doc_count": 50,
"establishedConnections": {
"value": 19
}
},
{
"key_as_string": "2017-02-07T05:35:00.000Z",
"key": 1486445700000,
"doc_count": 50,
"establishedConnections": {
"value": 13
}
},
{
"key_as_string": "2017-02-07T05:40:00.000Z",
"key": 1486446000000,
"doc_count": 40,
"establishedConnections": {
"value": 8
}
}
]
}
}
}
The thing is, that in the aggregations part in query, you asked for this:
"aggs": {
"time": {
"histogram": {
"field": "timestamp",
"interval": 300000,
"min_doc_count": 0,
"extended_bounds": {
"min": 1486443000000 ,
"max": 1486446240000
}
},
"aggs": {
"establishedConnections": {
"sum": {
"field": "establishedConnections"
}
}
}
}
}
and in the interval value, you specified 300000 which in milliseconds is exactly 5 minutes, that's why the last interval from 10:42 is discarded, and all document from this interval are placed under key 10:40.
To make this more formal:
When the aggregation executes, the time field of every document will be evaluated and will be rounded down to its closest bucket. Here is the rounding function that is used:
bucket_key = Math.floor((value - offset) / interval) * interval + offset

How to parse Json response and truncate child nodes

This is the JSON response I am trying to parse:
{
"data": {
"Content": {
"id": 26,
"name": "Dashboard1"
},
"List": [
{
"ListContent": {
"id": 178,
"name": "Card-144"
},
"cards": [
{
"id": 1780,
"configuration": {
"id": 7178,
"name": "Emp"
}
}
]
},
{
"ListContent": {
"id": 179,
"name": "Card-14"
},
"cards": [
{
"id": 1798,
"configuration": {
"id": 1789,
"name": "RandomColumns"
}
}
]
},
{
"ListContent": {
"id": 180,
"name": "Card-1"
},
"cards": [
{
"id": 18080,
"configuration": {
"id": 1080,
"allow": true
}
}
]
},
{
"ListContent": {
"id": 181,
"name": "Card-14"
},
"cards": [
{
"id": 18081,
"configuration": {
"id": 1881,
"name": "Functions"
}
}
]
},
{
"ListContent": {
"id": 182,
"name": "Card-1443"
},
"cards": [
{
"id": 1782,
"configuration": {
"id": 1802,
"name": "Emp-O"
}
}
]
}
]
}
}
From the Json, I need to extract "id"s under the "ListContent" nodes and store it in an array. Also, will need to ignore "id"s under the child nodes.
Here is a groovy script I am trying to achieve this with,
def CList = ""
import groovy.json.JsonSlurper
def jsonRespData = context.expand( '${TestStep#Response#$.data.List}' )
def outputResp = new JsonSlurper().parseText(jsonRespData)
outputResp.id.each()
{log.info( ":"+ it)
CList=CList.concat(it.toString()).concat(',')}
log.info (CList)
So, the array that I am expecting is CList [178,179,180,181,182]
but I am currently getting null.
What should be the correct groovy to only read "id" from "ListContent" and write it to an array?
Any help would be really appreciated.
Thanks in advance.
You can just use the (implicit) spread operator like this:
def json = new groovy.json.JsonSlurper().parse('/tmp/x.json' as File)
//
def i = json.data.List.ListContent.id
assert i == [178, 179, 180, 181, 182]
// with explicit spread operator
def e = json.data.List*.ListContent*.id
assert e == [178, 179, 180, 181, 182]
def str = '''
{
"data": {
"Content": {
"id": 26,
"name": "Dashboard1"
},
"List": [
{
"ListContent": {
"id": 178,
"name": "Card-144"
},
"cards": [
{
"id": 1780,
"configuration": {
"id": 7178,
"name": "Emp"
}
}
]
},
{
"ListContent": {
"id": 179,
"name": "Card-14"
},
"cards": [
{
"id": 1798,
"configuration": {
"id": 1789,
"name": "RandomColumns"
}
}
]
},
{
"ListContent": {
"id": 180,
"name": "Card-1"
},
"cards": [
{
"id": 18080,
"configuration": {
"id": 1080,
"allow": true
}
}
]
},
{
"ListContent": {
"id": 181,
"name": "Card-14"
},
"cards": [
{
"id": 18081,
"configuration": {
"id": 1881,
"name": "Functions"
}
}
]
},
{
"ListContent": {
"id": 182,
"name": "Card-1443"
},
"cards": [
{
"id": 1782,
"configuration": {
"id": 1802,
"name": "Emp-O"
}
}
]
}
]
}
}
'''
def outputResp = new groovy.json.JsonSlurper().parseText(str)
outputResp.data.List.collect { it.ListContent.id }
As you already have List from (context.expand( '${TestStep#Response#$.data.List}' )) , you can simply do:
outputResp.collect { it.ListContent.id }
Above returns an ArrayList.