Aggregate and sum json data in python - json

I am new to python, using python3. I have json data like:
{
"message": {
"count": 46,
"limit": 1000,
"schools": [
{
"class": "1",
"class_id": "1c8***",
"charges": [
{
"cost": 10,
"breakdown": [
{
"books": "1",
"unitQuantity": "10"
}
]
}
],
"area": "maccau"
},
{
"class": "2",
"class_id": "1c3***",
"charges": [
{
"cost": 100,
"breakdown": [
{
"books": "1",
"unitQuantity": "100"
}
]
}
],
"area": "maccau"
},
{
"class": "1",
"class_id": "1c3***",
"charges": [
{
"cost": 10,
"breakdown": [
{
"books": "1",
"unitQuantity": "10"
}
]
}
],
"area": "maccau"
},
{
"class": "2",
"class_id": "1c8***",
"charges": [
{
"cost": 50,
"breakdown": [
{
"books": "1",
"unitQuantity": "50"
}
]
}
],
"area": "maccau"
}
],
"url": {
"link": "/"
}
}
}
I was able to use json.loads to load data and I am trying to get results like:
class Cost
1 20
2 150
I tried converting json to a dictionary:
item_dict = json.load(json_data)
Tried to get data out using for loop and checking if class = 1 and then summing up the cost. But I feel like that is not the best approach. Can someone please tell me what would be the best way of doing this?

Related

Sort complex JSON object by specific property

How can I sort the given JSON object with property count. I want to sort the entire sub-object. The higher the count value should come on the top an so on.
{
"Resource": [
{
"details": [
{
"value": "3.70"
},
{
"value": "3.09"
}
],
"work": {
"count": 1
}
},
{
"details": [
{
"value": "4"
},
{
"value": "5"
}
],
"work": {
"count": 2
},
{
"details": [
{
"value": "5"
},
{
"value": "5"
}
],
"work": "null"
}
]
}
You can try this example to sort your data:
data = {
"data": {
"Resource": [
{
"details": [{"value": "3.70"}, {"value": "3.09"}],
"work": {"count": 1},
},
{"details": [{"value": "4"}, {"value": "5"}], "work": {"count": 2}},
]
}
}
# sort by 'work'/'count'
data["data"]["Resource"] = sorted(
data["data"]["Resource"], key=lambda r: r["work"]["count"]
)
# sort by 'details'/'value'
for r in data["data"]["Resource"]:
r["details"] = sorted(r["details"], key=lambda k: float(k["value"]))
# pretty print:
import json
print(json.dumps(data, indent=4))
Prints:
{
"data": {
"Resource": [
{
"details": [
{
"value": "3.09"
},
{
"value": "3.70"
}
],
"work": {
"count": 1
}
},
{
"details": [
{
"value": "4"
},
{
"value": "5"
}
],
"work": {
"count": 2
}
}
]
}
}

read hyperopt parameters from json

I want to read hyperopt parameters from a JSON file.
My JSON file would be like:
[
{
"id": "121",
"model": [
{
"model_name": "power",
"estimator_type": [
{
"type": "Polynomial",
"degree": [2, 3, 4]
},
{
"type": "svm",
"C": [0, 1],
"kernel": [
{
"ktype": "linear"
},
{
"ktype": "RBF",
"width": [0, 1]
}
]
}
],
"cut_values": {
"qids": ["1234"]
}
},
{
"model_name": "speed",
"estimator_type": [
{
"type": "Polynomial",
"degree": ["quniform", 2, 3]
}
],
"cut_values": null
}
]
},
{
"id": "123",
"model": [
{
"model_name": "power",
"estimator_type": [
{
"type": "LinearRegression"
}
],
"cut_values": null
}
]
}
]
I have checked this post but with no success for more complex JSON like the one above.
I want to be able to create a space like 2.2 A Search Space Example: scikit-learn.

Merge objects in same array on single key

I have an array of JSON objects formatted as follows:
[
{
"id": 1,
"names": [
{
"name": "Bulbasaur",
"language": {
"name": "en",
"url": "http://myserver.com:8000/api/v2/language/9/"
}
},
],
},
{
"id": 1,
"types": [
{
"slot": 1,
"type": {
"name": "grass",
"url": "http://myserver.com:8000/api/v2/type/12/"
}
},
{
"slot": 2,
"type": {
"name": "poison",
"url": "http://myserver.com:8000/api/v2/type/4/"
}
}
]
},
{
"id": 2,
"names": [
{
"name": "Ivysaur",
"language": {
"name": "en",
"url": "http://myserver.com:8000/api/v2/language/9/"
}
},
],
},
{
"id": 2,
"types": [
{
"slot": 1,
"type": {
"name": "ice",
"url": "http://myserver.com:8000/api/v2/type/10/"
}
},
{
"slot": 2,
"type": {
"name": "electric",
"url": "http://myserver.com:8000/api/v2/type/8/"
}
}
]
},
{
"id": 3,
"names": [
{
"name": "Venusaur",
"language": {
"name": "en",
"url": "http://myserver.com:8000/api/v2/language/9/"
}
},
],
},
{
"id": 3,
"types": [
{
"slot": 1,
"type": {
"name": "ground",
"url": "http://myserver.com:8000/api/v2/type/2/"
}
},
{
"slot": 2,
"type": {
"name": "rock",
"url": "http://myserver.com:8000/api/v2/type/3/"
}
}
]
}
]
Note that these are pairs of separate objects that appear sequentially in a JSON array, with each pair sharing an id field. This pattern repeats several hundred times in the array. What I need to accomplish is to "merge" each id-sharing pair into one object. So, the resultant output would be
[
{
"id": 1,
"names": [
{
"name": "Bulbasaur",
"language": {
"name": "en",
"url": "http://myserver.com:8000/api/v2/language/9/"
}
},
],
"types": [
{
"slot": 1,
"type": {
"name": "grass",
"url": "http://myserver.com:8000/api/v2/type/12/"
}
},
{
"slot": 2,
"type": {
"name": "poison",
"url": "http://myserver.com:8000/api/v2/type/4/"
}
}
]
},
{
"id": 2,
"names": [
{
"name": "Ivysaur",
"language": {
"name": "en",
"url": "http://myserver.com:8000/api/v2/language/9/"
}
},
],
"types": [
{
"slot": 1,
"type": {
"name": "ice",
"url": "http://myserver.com:8000/api/v2/type/10/"
}
},
{
"slot": 2,
"type": {
"name": "electric",
"url": "http://myserver.com:8000/api/v2/type/8/"
}
}
]
},
{
"id": 3,
"names": [
{
"name": "Venusaur",
"language": {
"name": "en",
"url": "http://myserver.com:8000/api/v2/language/9/"
}
},
],
"types": [
{
"slot": 1,
"type": {
"name": "ground",
"url": "http://myserver.com:8000/api/v2/type/2/"
}
},
{
"slot": 2,
"type": {
"name": "rock",
"url": "http://myserver.com:8000/api/v2/type/3/"
}
}
]
}
]
I've gotten these objects to appear next to each other via the group_by(.id) command, but I'm at a loss as to how I should actually combine them. I'm very much still a novice with jq so I'm a bit overwhelmed with the amount of possible solutions.
[Note: The following assumes that the data shown in the Q have been corrected so that they are valid JSON.]
The merging you want can be achieved by object addition (x + y). For example, given the two JSON objects as shown in the question (i.e., as a stream), you could write:
jq -s '.[0] + .[1]'
However, since the question also indicates these objects are actually in an array, let's next consider the case of an array with two objects. In that case, you could simply write:
jq add
Finally, if you have an array of arrays each of which is an array of objects, you could use map(add). Since you don't have a very large array, you could simply write:
group_by(.id) | map(add)
Please note that jq defines object addition in a non-commutative way. Specifically, there is a bias towards the right-most key.

How to exclude specific fields from JSON using groovy

I would like to exclude the items which don't have productModel property in the below JSON. How can we achieve this in groovy
I tried using hasProperty but not worked for me as expected. If possible can I get some sample snippet
I tried below code - but didn't work as I expected.
response.getAt('myData').getAt('data').getAt('product').hasProperty('productModel').each { println "result ${it}" }
Any help would be really appreciated.
{
"myData": [{
"data": {
"product": {
"productId": "apple",
"productName": "iPhone",
"productModel": "6s"
},
"statusCode": "active",
"date": "2018-08-07T00:00:00.000Z"
},
"links": [{
"productUrl": "test"
},
{
"productImage": "test"
}
],
"info": {}
},
{
"data": {
"product": {
"productId": "apple",
"productName": "iPhone",
"productModel": "7"
},
"statusCode": "active",
"date": "2018-08-07T00:00:00.000Z"
},
"links": [{
"productUrl": "test"
},
{
"productImage": "test"
}
],
"info": {}
},
{
"data": {
"product": {
"productId": "apple",
"productName": "Macbook"
},
"statusCode": "active",
"date": "2018-08-07T00:00:00.000Z"
},
"links": [{
"productUrl": "test"
},
{
"productImage": "test"
}
],
"info": {}
}
],
"metadata": {
"count": 3,
"offset": 0
}
}
If you want to exclude specific fields from JSON object then you have to recreate it using filtered data. The crucial part takes these two lines (assuming that json variable in the below example stores your JSON as text):
def root = new JsonSlurper().parseText(json)
def myData = root.myData.findAll { it.data.product.containsKey('productModel') }
What happens here is we access root.myData list and we filter it using findAll(predicate) method and predicate in this case says that only objects that have key productModel in path data.product are accepted. This findAll() method does not mutate existing list and that is why we store the result in variable myData - after running this method we will end up with a list of size 2.
In next step you have to recreate the object you want to represent as a JSON:
def newJsonObject = [
myData: myData,
metadata: [
count: myData.size(),
offset: 0
]
]
println JsonOutput.prettyPrint(JsonOutput.toJson(newJsonObject))
In this part we create newJsonObject and in the end we convert it to a JSON representation.
Here is the full example:
import groovy.json.JsonOutput
import groovy.json.JsonSlurper
def json = '''{
"myData": [{
"data": {
"product": {
"productId": "apple",
"productName": "iPhone",
"productModel": "6s"
},
"statusCode": "active",
"date": "2018-08-07T00:00:00.000Z"
},
"links": [{
"productUrl": "test"
},
{
"productImage": "test"
}
],
"info": {}
},
{
"data": {
"product": {
"productId": "apple",
"productName": "iPhone",
"productModel": "7"
},
"statusCode": "active",
"date": "2018-08-07T00:00:00.000Z"
},
"links": [{
"productUrl": "test"
},
{
"productImage": "test"
}
],
"info": {}
},
{
"data": {
"product": {
"productId": "apple",
"productName": "Macbook"
},
"statusCode": "active",
"date": "2018-08-07T00:00:00.000Z"
},
"links": [{
"productUrl": "test"
},
{
"productImage": "test"
}
],
"info": {}
}
],
"metadata": {
"count": 3,
"offset": 0
}
}'''
def root = new JsonSlurper().parseText(json)
def myData = root.myData.findAll { it.data.product.containsKey('productModel') }
def newJsonObject = [
myData: myData,
metadata: [
count: myData.size(),
offset: 0
]
]
println JsonOutput.prettyPrint(JsonOutput.toJson(newJsonObject))
And here is the output it produces:
{
"myData": [
{
"data": {
"product": {
"productId": "apple",
"productName": "iPhone",
"productModel": "6s"
},
"statusCode": "active",
"date": "2018-08-07T00:00:00.000Z"
},
"links": [
{
"productUrl": "test"
},
{
"productImage": "test"
}
],
"info": {
}
},
{
"data": {
"product": {
"productId": "apple",
"productName": "iPhone",
"productModel": "7"
},
"statusCode": "active",
"date": "2018-08-07T00:00:00.000Z"
},
"links": [
{
"productUrl": "test"
},
{
"productImage": "test"
}
],
"info": {
}
}
],
"metadata": {
"count": 2,
"offset": 0
}
}

Aggregate two payloads in Mule ESB

My mule code is hitting two tables and getting some details.
First one is order details, which I am storing in a flow variable i.e order and another database is returning order item details which I am storing in orderitem variable.
I want to aggregate both the payload based on one condition. Every orderId has order items (which is stored in flowVars.orderitem) and map these order items to respective orderID.
flowVars.order value is as below
[{partnerId=e83185e9f33e4234ba9eaa81dba515ad, orderId=12345, orderDate=2017-02-28 16:41:41.0, id=22}, {partnerId=e83185e9f33e4234ba9eaa81dba515ad, orderId=123456, orderDate=2017-02-28 16:41:41.0, id=23}, {partnerId=e83185e9f33e4234ba9eaa81dba515ad, orderId=11111, orderDate=2017-02-28 16:41:41.0, id=24}, {partnerId=e83185e9f33e4234ba9eaa81dba515ad, orderId=321123, orderDate=2017-05-19 15:25:41.0, id=26}]
and flowVars.orderitem value is as below
[{productCode=ELT-LP-ICND1-020067, orderId=12345, quantity=10, id=14}, {productCode=ELT-IP-ICND1-1.0, orderId=12345, quantity=11, id=15}, {productCode=ELT-LP-ICND1-020067, orderId=123456, quantity=12, id=16}, {productCode=ELT-IP-ICND1-1.0, orderId=123456, quantity=13, id=17}, {productCode=ELT-LP-ICND1-020067, orderId=11111, quantity=14, id=18}, {productCode=ELT-IP-ICND1-1.0, orderId=11111, quantity=15, id=19}, {productCode=ELT-LP-ICND2-020067, orderId=321123, quantity=5, id=20}]
Expected Output
[
{
"orderId": "12345",
"orderDate": "2017-02-28T16:41:41",
"partnerId": "e83185e9f33e4234ba9eaa81dba515ad",
"orderItems": [
{
"productCode": "ELT-LP-ICND1-020067",
"quantity": "10"
},
{
"productCode": "ELT-IP-ICND1-1.0",
"quantity": "11"
}
]
},
{
"orderId": "123456",
"orderDate": "2017-02-28T16:41:41",
"partnerId": "e83185e9f33e4234ba9eaa81dba515ad",
"orderItems": [
{
"productCode": "ELT-LP-ICND1-020067",
"quantity": "12"
},
{
"productCode": "ELT-IP-ICND1-1.0",
"quantity": "13"
}
]
},
{
"orderId": "11111",
"orderDate": "2017-02-28T16:41:41",
"partnerId": "e83185e9f33e4234ba9eaa81dba515ad",
"orderItems": [
{
"productCode": "ELT-LP-ICND1-020067",
"quantity": "14"
},
{
"productCode": "ELT-IP-ICND1-1.0",
"quantity": "15"
}
]
},
{
"orderId": "321123",
"orderDate": "2017-05-19T15:25:41",
"partnerId": "e83185e9f33e4234ba9eaa81dba515ad",
"orderItems": [
{
"productCode": "ELT-LP-ICND1-020067",
"quantity": "5"
}
]
}
]
Here I need to show respective order item details of an order. So basically I need to combine both the payloads.
I tried using dataweave but not luck.
Dataweave code:
%dw 1.0
%output application/json
%var mergeddata = flowVars.orderitem groupBy $.orderId
---
flowVars.order map ((data,index) ->
{
orderid: data.orderId,
partnerid: data.partnerId,
orderdate: data.orderDate,
order: flowVars.orderitem default [] map ((data1 ,indexOf) ->
{
(productcode: data1.productCode) when (data1.orderId == data.orderId),
(quantity: data1.quantity) when (data1.orderId == data.orderId) ,
(id: data1.id) when (data1.orderId == data.orderId)
}
)})
And output after transformation:
{
"orderid": "12345",
"partnerid": "e83185e9f33e4234ba9eaa81dba515ad",
"orderdate": "2017-02-28T16:41:41",
"order": [
{
"productcode": "ELT-LP-ICND1-020067",
"quantity": 10,
"id": 14
},
{
"productcode": "ELT-IP-ICND1-1.0",
"quantity": 11,
"id": 15
},
{
},
{
},
{
},
{
},
{
}
]
},
{
"orderid": "123456",
"partnerid": "e83185e9f33e4234ba9eaa81dba515ad",
"orderdate": "2017-02-28T16:41:41",
"order": [
{
},
{
},
{
"productcode": "ELT-LP-ICND1-020067",
"quantity": 12,
"id": 16
},
{
"productcode": "ELT-IP-ICND1-1.0",
"quantity": 13,
"id": 17
},
{
},
{
},
{
}
]
},
{
"orderid": "11111",
"partnerid": "e83185e9f33e4234ba9eaa81dba515ad",
"orderdate": "2017-02-28T16:41:41",
"order": [
{
},
{
},
{
},
{
},
{
"productcode": "ELT-LP-ICND1-020067",
"quantity": 14,
"id": 18
},
{
"productcode": "ELT-IP-ICND1-1.0",
"quantity": 15,
"id": 19
},
{
}
]
},
{
"orderid": "321123",
"partnerid": "e83185e9f33e4234ba9eaa81dba515ad",
"orderdate": "2017-05-19T15:25:41",
"order": [
{
},
{
},
{
},
{
},
{
},
{
},
{
"productcode": "ELT-LP-ICND2-020067",
"quantity": 5,
"id": 20
}
]
}
]
As you can see that I am almost there and able to map order item details with respective orderId but still I am getting some blank values after transformation.
Can anyone help me to achieve expected output. Thanks in advance!!!
You need to filter the flowVars.orderitem map, rather than iterate it in full and only print values when the orderId matches.
order: ((flowVars.orderitem default []) filter (data.orderId == $.orderId)) map ((data1 ,indexOf) -> {
productcode: data1.productCode,
quantity: data1.quantity
id: data1.id
})
You can then remove all of those 'when' statements too.