Parsing a difficult json logs using python - json

I have a json file as shown below
{
"body": {
"results": [
[
{
"field": "#timestamp",
"value": "2020-04-26 19:28:40.136"
},
{
"field": "#message",
"value": "Hi"
},
{
"field": "#time",
"value": "19:28:40,023"
},
{
"field": "#name",
"value": "Nitish"
},
{
"field": "#hobby",
"value": "Pool"
},
{
"field": "#duration",
"value": "19 mins"
}
],
[
{
"field": "#timestamp",
"value": "2020-04-26 19:28:40.136"
},
{
"field": "#message",
"value": "Hello"
},
{
"field": "#time",
"value": "19:28:40,023"
},
{
"field": "#name",
"value": "Amuri"
},
{
"field": "#Totalruns",
"value": "2"
},
{
"field": "#wickets",
"value": "10"
},
{
"field": "#hobby",
"value": "cricket"
},
{
"field": "#commentry",
"value": "ubbjb"
}
],
[
{
"field": "#timestamp",
"value": "2020-04-26 19:28:40.136"
},
{
"field": "#message",
"value": "how are you"
},
{
"field": "#time",
"value": "19:28:40,023"
},
{
"field": "#name",
"value": "Kit"
},
{
"field": "#Totalruns",
"value": "90"
},
{
"field": "#wickets",
"value": "1"
},
{
"field": "#hobby",
"value": "cricket"
},
{
"field": "#commentry",
"value": "jbunib"
}
]
]
}
}
I'm trying to parse #Totalruns and #wickets only if #hobby = cricket
I'm able to reach till accessing cricket unable to figure out to get data of #Totalruns and #wickets
code i've tried import json
f = open('/Users/amurin/Documents/test.json','r')
data = json.load(f)
for result in data['body']['results']:
for res in result:
if res['value']=='cricket':
print("Hello")
f.close()
I need output as
wickets = 1 , Totalruns = 90
wickets = 10 , Totalruns = 2

Solution 1, keeping the structure as it is:
def sublist_is_valid(sublist):
return any(d["field"] == "#hobby" and d["value"] == "cricket" for d in sublist)
def filter_fields(sublist):
return [d for d in sublist if d["field"] in ["#wickets", "#Totalruns"]]
filtered = [
filter_fields(sublist)
for sublist in data["body"]["results"]
if sublist_is_valid(sublist)
]
print(filtered)
Output:
[[{'field': '#Totalruns', 'value': '2'}, {'field': '#wickets', 'value': '10'}],
[{'field': '#Totalruns', 'value': '90'}, {'field': '#wickets', 'value': '1'}]]
Solution 2 (better IMO) is to use dictionaries right from the beginning:
def sublist_to_dict(sublist):
return {d["field"]: d["value"] for d in sublist}
filtered = [
{key: d[key] for key in ["#Totalruns", "#wickets"]}
for d in [sublist_to_dict(sublist) for sublist in data["body"]["results"]]
if d.get("#hobby") == "cricket"
]
print(filtered)
Output:
[{'#Totalruns': '2', '#wickets': '10'}, {'#Totalruns': '90', '#wickets': '1'}]

Related

building url query string using n1ql

using couchbase 5
I need to build a query string from this object
[
{
"_id": 190,
"querystring": [
{
"name": "p1",
"value": "val1"
},
{
"name": "p2",
"value": "val2"
}
]
}
]
the expected output should be
p1=val1&p2=val2
can anyone help here?
after few attempts I think I got closer to the solution I need.
[
{
"_id": 190,
"res": [
"company_id=$PREFIJO&",
"user_country=$COUNTRY&",
"offer_unique_code=$PIXEL&",
"pub_id=$PUBID&"
]
}
]
now, how can I convert "res" to a concatenated string of all the array elements?
WITH obj AS ({ "_id": 190, "querystring": [ { "name": "p1", "value": "val1" }, { "name": "p2", "value": "val2" } ] })
SELECT obj._id, CONCAT2("&", ARRAY CONCAT2("=",v.name,v.`value`) FOR v IN obj.querystring END) AS res;
Array of objects
WITH objs AS ([{ "_id": 190, "querystring": [ { "name": "p1", "value": "val1" }, { "name": "p2", "value": "val2" } ] },
{ "_id": 191, "querystring": [ { "name": "p3", "value": "val1" }, { "name": "p4", "value": "val2" } ] }
])
SELECT obj._id, CONCAT2("&", ARRAY CONCAT2("=",v.name,v.`value`) FOR v IN obj.querystring END) AS res FROM objs AS obj ;
Older version where CONCAT2() not available, get array of strings (name=val) and do in application or use the following technique. Assume your name/val doesn't have any replace characters.
WITH objs AS ([{ "_id": 190, "querystring": [ { "name": "p1", "value": "val1" }, { "name": "p2", "value": "val2" } ] },
{ "_id": 191, "querystring": [ { "name": "p3", "value": "val1" }, { "name": "p4", "value": "val2" } ] }
])
SELECT obj._id, replace(replace(replace(encode_json(ARRAY CONCAT(v.name,"=",v.`value`) FOR v IN obj.querystring END),"\",\"","&"),"[\"",""),"\"]","") AS res FROM objs AS obj ;
If single document then have ARRAY of objects then use UNNEST
If there is number , convert to string using TO_STR() before CONCAT operation
https://docs.couchbase.com/server/current/n1ql/n1ql-language-reference/stringfun.html#fn-str-concat2

JQ: Add object to nested json with the same key names

I having trouble of getting my json append with a new object into the config -> list -> key(vehicles) -> Rows. But then only for vehicles.
Im trying it with JQ: cat file.json | jq '.config.list[].rows[] += {"data":[{"key":"fort","value":"K"},{"key":"seat","value":"leon"}],"default":false}' But with this it is replacing and not appending because of the same names ?
Object that needs to
{
"data": [
{
"key": "bike",
"value": "yyy"
},
{
"key": "car",
"value": "xxx"
}
],
"default": false
}
Source Json:
{
"id": "1234",
"name": "CatList",
"config": {
"list": [
{
"key": "vehicles",
"rows": [
{
"data": [
{
"key": "bike",
"value": "yyy"
},
{
"key": "car",
"value": "xxx"
}
],
"default": false
}
]
},
{
"key": "boots",
"rows": []
}
],
"data": [
{
"key": "GROUPS",
"value": "false"
}
]
}
}
Wanted result:
{
"id": "1234",
"name": "CatList",
"config": {
"list": [
{
"key": "vehicles",
"rows": [
{
"data": [
{
"key": "bike",
"value": "yyy"
},
{
"key": "car",
"value": "xxx"
}
],
"default": false
},
{
"data": [ <-----
{ <-----
"key": "bike", <-----
"value": "yyy" <-----
}, <-----
{ <-----
"key": "car", <-----
"value": "xxx" <-----
} <-----
], <-----
"default": false <-----
}
]
},
{
"key": "boots",
"rows": []
}
],
"data": [
{
"key": "GROUPS",
"value": "false"
}
]
}
}
You were close
jq '.config.list[.config.list|map(.key=="vehicles")|index(true)].rows += [{"data":[{"key":"fot","value":"K"},{"key":"seat","value":"leon"}],"default":false}]'
see https://stackoverflow.com/a/42248841/2235381

How to filter json file with specific tag values and get output into csv

I have created a json file with the output having key values pair. But I would like to filter more and get only specific tags and get new output in table using excel (csv) format
aws resourcegroupstaggingapi get-resources --tags-per-page 100 --tag-filters Key=ProjectName,Values=Avengers > tag-filter.json
However it provides the list of all the tags besides "ProjectName". I would like to filter the output with 2 more tags with their values but not all of them:
Actual results:
{
"ResourceTagMappingList": [
{
"ResourceARN": "arn:aws:app:us-east-1:XXXX/mesh/Avenger1",
"Tags": [
{
"Key": "ApplicationName",
"Value": "HULK"
},
{
"Key": "Owner",
"Value": "Mark Ruffalo"
},
{
"Key": "Costume",
"Value": "GREEN"
},
{
"Key": "Power",
"Value": "SMASH"
},
{
"Key": "ProjectName",
"Value": "Avengers"
}
]
},
{
"ResourceARN": "arn:aws:app:us-east-1:XXXX:mesh/Avenger2",
"Tags": [
{
"Key": "ApplicationName",
"Value": "IRON-MAN"
},
{
"Key": "Owner",
"Value": "Robert Downey Jr."
},
{
"Key": "Costume",
"Value": "RED"
},
{
"Key": "Power",
"Value": "SuperSonic"
},
{
"Key": "ProjectName",
"Value": "Avengers"
}
]
}
]
}
Expected Results:
{
"ResourceTagMappingList": [
{
"ResourceARN": "arn:aws:app:us-east-1:XXXX/mesh/Avenger1",
"Tags": [
{
"Key": "ApplicationName",
"Value": "HULK"
},
{
"Key": "Owner",
"Value": "Mark Ruffalo"
},
{
"Key": "ProjectName",
"Value": "Avengers"
}
]
},
{
"ResourceARN": "arn:aws:app:us-east-1:XXXX:mesh/Avenger2",
"Tags": [
{
"Key": "ApplicationName",
"Value": "IRON-MAN"
},
{
"Key": "Owner",
"Value": "Robert Downey Jr."
},
{
"Key": "ProjectName",
"Value": "Avengers"
}
]
}
]
}
To achieve the "expected" output given the "actual" output, you could use the following filter:
.ResourceTagMappingList[].Tags
|= map(select(.Key|IN("ApplicationName","Owner","ProjectName")))
To achieve the expected CSV, it would be helpful to know what you expect.

Sort complex JSON object by specific property

How can I sort the given JSON object with property count. I want to sort the entire sub-object. The higher the count value should come on the top an so on.
{
"Resource": [
{
"details": [
{
"value": "3.70"
},
{
"value": "3.09"
}
],
"work": {
"count": 1
}
},
{
"details": [
{
"value": "4"
},
{
"value": "5"
}
],
"work": {
"count": 2
},
{
"details": [
{
"value": "5"
},
{
"value": "5"
}
],
"work": "null"
}
]
}
You can try this example to sort your data:
data = {
"data": {
"Resource": [
{
"details": [{"value": "3.70"}, {"value": "3.09"}],
"work": {"count": 1},
},
{"details": [{"value": "4"}, {"value": "5"}], "work": {"count": 2}},
]
}
}
# sort by 'work'/'count'
data["data"]["Resource"] = sorted(
data["data"]["Resource"], key=lambda r: r["work"]["count"]
)
# sort by 'details'/'value'
for r in data["data"]["Resource"]:
r["details"] = sorted(r["details"], key=lambda k: float(k["value"]))
# pretty print:
import json
print(json.dumps(data, indent=4))
Prints:
{
"data": {
"Resource": [
{
"details": [
{
"value": "3.09"
},
{
"value": "3.70"
}
],
"work": {
"count": 1
}
},
{
"details": [
{
"value": "4"
},
{
"value": "5"
}
],
"work": {
"count": 2
}
}
]
}
}

Parse JSON with map of list

I am new to scala and JSON parsing and need some help. I need to parse the complex JSON (below) to get the values of "name" in "dimension" key i.e I need PLATFORM and OS_VERSION.
I tried multiple options, but it is not working. Any help is appreciated
This is a snippet of the code I tried, but I am not able to proceed further in parsing the list. I believe the 'ANY' keyword is causing some mismatch / issues.
import org.json4s._
import org.json4s.jackson.JsonMethods._
implicit val formats = org.json4s.DefaultFormats
val mapJSON = parse(tmp).extract[Map[String, Any]]
println(mapJSON)
//for ((k,v) <- mapJSON) printf("key: %s, value: %s\n", k, v)
val list_map = mapJSON("dimensions")
{
"uuid": "uuidddd",
"last_modified": 1559080222953,
"version": "2.6.1.0",
"name": "FULL_DAY_2_mand_date",
"is_draft": false,
"model_name": "FULL_DAY_1_may05",
"description": "",
"null_string": null,
"dimensions": [
{
"name": "PLATFORM",
"table": "tbl1",
"column": "PLATFORM",
"derived": null
},
{
"name": "OS_VERSION",
"table": "tbl1",
"column": "OS_VERSION",
"derived": null
},
],
"measures": [
{
"name": "_COUNT_",
"function": {
"expression": "COUNT",
"parameter": {
"type": "constant",
"value": "1"
},
"returntype": "bigint"
}
},
{
"name": "UU",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "tbl1.USER_ID"
},
"returntype": "hllc(12)"
}
},
{
"name": "CONT_SIZE",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "tbl1.SIZE"
},
"returntype": "bigint"
}
},
{
"name": "CONT_COUNT",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "tbl1.COUNT"
},
"returntype": "bigint"
}
}
],
"dictionaries": [],
"rowkey": {
"rowkey_columns": [
{
"column": "tbl1.OS_VERSION",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
},
{
"column": "tbl1.PLATFORM",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
},
{
"column": "tbl1.DEVICE_FAMILY",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
}
]
},
"hbase_mapping": {
"column_family": [
{
"name": "F1",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"_COUNT_",
"CONT_SIZE",
"CONT_COUNT"
]
}
]
},
{
"name": "F2",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"UU"
]
}
]
}
]
},
"aggregation_groups": [
{
"includes": [
"tbl1.PLATFORM",
"tbl1.OS_VERSION"
],
"select_rule": {
"hierarchy_dims": [],
"mandatory_dims": [
"tbl1.DATE_HR"
],
"joint_dims": []
}
}
],
"signature": "ttrrs==",
"notify_list": [],
"status_need_notify": [
"ERROR",
"DISCARDED",
"SUCCEED"
],
"partition_date_start": 0,
"partition_date_end": 3153600000000,
"auto_merge_time_ranges": [
604800000,
2419200000
],
"volatile_range": 0,
"retention_range": 0,
"engine_type": 4,
"storage_type": 2,
"override_kylin_properties": {
"job.queuename": "root.production.P0",
"is-mandatory-only-valid": "true"
},
"cuboid_black_list": [],
"parent_forward": 3,
"mandatory_dimension_set_list": [],
"snapshot_table_desc_list": []
}
You need to make more specific classes for parsing the data, something like this:
case class Dimension(name: String, table: String, column: String)
case class AllData(uuid: String, dimensions: List[Dimension])
val data = parse(tmp).extract[AllData]
val names = data.dimensions.map(_.name)