How to search a JSON hash using a Regex? - json

I'm using an API to return all Macros to me, I am trying to return all the "macros" where "actions" contains a "value" matching my Regexp Pattern which I will link below.
I've tried below and other methods, but it returns me nil for present values. Any tips appreciated
macros["value"].select { |m| m['key'] == 'value' }.first['/^DE([0-9a-zA-Z]\s?){20}$/gm']
API result snippet:
jsObj
=> {"macros"=>
[{"url"=>"https://s/1900002708354.json",
"id"=>1900002708354,
"title"=>"Append Signature",
"active"=>true,
"updated_at"=>"2021-10-22T14:11:15Z",
"created_at"=>"2021-10-22T14:11:15Z",
"position"=>10001,
"description"=>"This macro appends a signature to the message ",
"actions"=>[{"field"=>"comment_value_html", "value"=>"<p>Mit besten Grüßen,</p><p>{{current_user.name}} [{{ticket.account}}] <br></p><p><br></p><p>{{dc.signature_email}}<br></p><p><br></p>"}],
"restriction"=>nil},
{"url"=>"949.json",
"id"=>59071949,
"title"=>"information",
"description"=>nil,
"actions"=>[{"field"=>"priority", "value"=>"low"}, {"field"=>"comment_value", "value"=>"DE89370400440532013000" "DE89 3704
0044 0532 0130 00"
"}],
"restriction"=>nil},
Desired Result:
{
"macros": [
{
"url": "x.json",
"id": 1900002708354,
"actions": [
{
"field": "comment_value_html",
"value": "DE89 3704 0044 0532 0130 00"
}
],
"restriction": null
},
{
"url": "x.json",
"id": 59071949,
"actions": [
{
"field": "priority",
"value": "low"
},
{
"field": "comment_value",
"value": "DE89 3704 0044 0532 0130 00
"
}
],
"restriction": null
},

Given that macros is the JSON object containing the macros data, you can use
macros.select { |m| m["actions"].any? { |w| /\ADE(?:[0-9a-zA-Z]\s?){20}\z/.match?(w["value"]) } }
Here is a Ruby demo:
require 'json'
j = <<-DATA
{
"macros": [
{
"url": "x.json",
"id": 1900002708354,
"actions": [
{
"field": "comment_value_html",
"value": "DE11111111111222222220"
}
],
"restriction": null
},
{
"url": "x.json",
"id": 59071949,
"actions": [
{
"field": "priority",
"value": "low"
},
{
"field": "comment_value",
"value": "DE12345678901234567890"
}
],
"restriction": null
}
]}
DATA
jsObj = JSON.parse(j)
macros = jsObj['macros']
puts jsObj['macros'].select { |m| m["actions"].any? { |w| /\ADE(?:[0-9a-zA-Z]\s?){20}\z/.match?(w["value"]) } }
Output:
{"url"=>"x.json", "id"=>1900002708354, "actions"=>[{"field"=>"comment_value_html", "value"=>"DE11111111111222222220"}], "restriction"=>nil}
{"url"=>"x.json", "id"=>59071949, "actions"=>[{"field"=>"priority", "value"=>"low"}, {"field"=>"comment_value", "value"=>"DE12345678901234567890"}], "restriction"=>nil}
The .select { |m| m["actions"].any? { |w| /\ADE(?:[0-9a-zA-Z]\s?){20}\z/.match?(w["value"]) } } main part gets all actions nodes that contain an array with a value key whose value matches the regex given.

Related

How to identify and explode a nested json file as columns of a dataframe?

I am reframing my question again so that it would be more clear.
My data looks like this .
{
"Research": {
"#xmlns": "http://www.xml.org/2013/2/XML",
"#language": "eng",
"#createDateTime": "2022-03-25T10:12:39Z",
"#researchID": "abcd",
"Product": {
"#productID": "abcd",
"StatusInfo": {
"#currentStatusIndicator": "Yes",
"#statusDateTime": "2022-03-25T12:18:41Z",
"#statusType": "Published"
},
"Source": {
"Organization": {
"#primaryIndicator": "Yes",
"#type": "SellSideFirm",
"OrganizationID": [
{
"#idType": "L1",
"#text": "D827C98E315F"
},
{
"#idType": "TR",
"#text": "3202"
},
{
"#idType": "TR",
"#text": "SZA"
}
],
"OrganizationName": {
"#nameType": "Legal",
"#text": "Citi"
},
"PersonGroup": {
"PersonGroupMember": {
"#primaryIndicator": "Yes",
"#sequence": "1",
"Person": {
"#personID": "tr56",
"FamilyName": "Wang",
"GivenName": "Bond",
"DisplayName": "Bond Wang",
"Biography": "Bond Wang is a",
"BiographyFormatted": "Bond Wang",
"PhotoResourceIdRef": "AS44556"
}
}
}
}
},
"Content": {
"Title": "Premier",
"Abstract": "None",
"Synopsis": "Premier’s solid 1H22 result .",
"Resource": [
{
"#language": "eng",
"#primaryIndicator": "Yes",
"#resourceID": "9553",
"Length": {
"#lengthUnit": "Pages",
"#text": "17"
},
"MIMEType": "text/html",
"URL": "https://www.DFKJG.com/rendition/eppublic"
},
{
"#language": "eng",
"#primaryIndicator": "No",
"#resourceID": "4809",
"Length": {
"#lengthUnit": "Pages",
"#text": "17"
},
"MIMEType": "ABS/pdf",
"Name": "asdf.pdf",
"Comments": "fr5.pdf"
},
{
"#language": "eng",
"#primaryIndicator": "No",
"#resourceID": "6d13a965723e",
"Length": {
"#lengthUnit": "Pages",
"#text": "17"
},
"MIMEType": "text/html",
"URL": "https://www.dfgdfg.com/"
},
{
"#primaryIndicator": "No",
"#resourceID": "709c7bdb1c99",
"MIMEType": "tyy/image",
"URL": "https://ir.ght.com"
},
{
"#primaryIndicator": "No",
"#resourceID": "gfjhgj",
"MIMEType": "gtty/image",
"URL": "https://ir.gtty.com"
}
]
},
"Context": {
"#external": "Yes",
"IssuerDetails": {
"Issuer": {
"#issuerType": "Corporate",
"#primaryIndicator": "Yes",
"SecurityDetails": {
"Security": {
"#estimateAction": "Revision",
"#primaryIndicator": "Yes",
"#targetPriceAction": "Increase",
"SecurityID": [
{
"#idType": "RIC",
"#idValue": "PMV.AX",
"#publisherDefinedValue": "RIC"
},
{
"#idType": "Bloomberg",
"#idValue": "PMV#AU"
},
{
"#idType": "SEDOL",
"#idValue": "6699781"
}
],
"SecurityName": "Premier Investments Ltd",
"AssetClass": {
"#assetClass": "Equity"
},
"AssetType": {
"#assetType": "Stock"
},
"SecurityType": {
"#securityType": "Common"
},
"Rating": {
"#rating": "NeutralSentiment",
"#ratingType": "Rating",
"#aspect": "Investment",
"#ratingDateTime": "2020-07-31T08:24:37Z",
"RatingEntity": {
"#ratingEntity": "PublisherDefined",
"PublisherDefinedValue": "Citi"
}
}
}
},
"IssuerID": {
"#idType": "PublisherDefined",
"#idValue": "PMV.AX",
"#publisherDefinedValue": "TICKER"
},
"IssuerName": {
"#nameType": "Legal",
"NameValue": "Premier Investments Ltd"
}
}
},
"ProductDetails": {
"#periodicalIndicator": "No",
"#publicationDateTime": "2022-03-25T12:18:41Z",
"ProductCategory": {
"#productCategory": "Report"
},
"ProductFocus": {
"#focus": "Issuer",
"#primaryIndicator": "Yes"
},
"EntitlementGroup": {
"Entitlement": [
{
"#includeExcludeIndicator": "Include",
"#primaryIndicator": "No",
"AudienceTypeEntitlement": {
"#audienceType": "PublisherDefined",
"#entitlementContext": "TR",
"#text": "20012"
}
},
{
"#includeExcludeIndicator": "Include",
"#primaryIndicator": "No",
"AudienceTypeEntitlement": {
"#audienceType": "PublisherDefined",
"#entitlementContext": "TR",
"#text": "2001"
}
}
]
}
},
"ProductClassifications": {
"Discipline": {
"#disciplineType": "Investment",
"#researchApproach": "Fundamental"
},
"Subject": {
"#publisherDefinedValue": "TREPS",
"#subjectValue": "PublisherDefined"
},
"Country": {
"#code": "AU",
"#primaryIndicator": "Yes"
},
"Region": {
"#primaryIndicator": "Yes",
"#emergingIndicator": "No",
"#regionType": "Australasia"
},
"AssetClass": {
"#assetClass": "Equity"
},
"AssetType": {
"#assetType": "Stock"
},
"SectorIndustry": [
{
"#classificationType": "GICS",
"#code": "25201040",
"#focusLevel": "Yes",
"#level": "4",
"#primaryIndicator": "Yes",
"Name": "Household Appliances"
},
{
"#classificationType": "GICS",
"#code": "25504020",
"#focusLevel": "Yes",
"#level": "4",
"#primaryIndicator": "Yes",
"Name": "Computer & Electronics Retail"
},
{
"#classificationType": "GICS",
"#code": "25504040",
"#focusLevel": "Yes",
"#level": "4",
"#primaryIndicator": "Yes",
"Name": "Specialty Stores"
},
{
"#classificationType": "GICS",
"#code": "25504030",
"#focusLevel": "Yes",
"#level": "4",
"#primaryIndicator": "Yes",
"Name": "Home Improvement Retail"
},
{
"#classificationType": "GICS",
"#code": "25201050",
"#focusLevel": "Yes",
"#level": "4",
"#primaryIndicator": "Yes",
"Name": "Housewares & Specialties"
}
]
}
}
}
}
}
I want to explode all of its elements into data frame .
The no of columns that has list like structure can change also.
Basically we will not be knowing if next input will have few column or more columns to be exploded .
This is what i have tried so far but it looks like it does not give me correct answer .
Also the column values i have hardcoded but it should identify and then explode.
import xmltodict as xmltodict
from pprint import pprint
import pandas as pd
import json
from tabulate import tabulate
dict =(xmltodict.parse("""xml data"""))
json_str = json.dumps(dict)
resp = json.loads(json_str)
print(resp)
df = pd.json_normalize(resp)
cols=['Research.Product.Source.Organization.OrganizationID','Research.Product.Content.Resource','Research.Product.Context.IssuerDetails.Issuer.SecurityDetails.Security.SecurityID','Research.Product.Context.ProductDetails.EntitlementGroup.Entitlement','Research.Product.Context.ProductClassifications.SectorIndustry']
def expplode_columns(df, cols):
df_e = df.copy()
for c in cols:
df_e = df_e.explode(c, ignore_index=True)
return df_e
df2 = expplode_columns(df, cols)
print(tabulate(df2, headers="keys", tablefmt="psql"))
# df2.to_csv('dataframe.csv', header=True, index=False)
As suggested in the comments, you can define a helper function in pure Python to recursively flatten the nested values of your data.
So, with the json file you provided, here is one way to do it:
def flatten(data, new_data):
"""Recursive helper function.
Args:
data: nested dictionary.
new_data: empty dictionary.
Returns:
Flattened dictionary.
"""
for key, value in data.items():
if isinstance(value, dict):
flatten(value, new_data)
if isinstance(value, str) or isinstance(value, int) or isinstance(value, list):
new_data[key] = value
return new_data
And then:
import json
import pandas as pd
with open("file.json") as f:
content = json.load(f)
df = pd.DataFrame.from_dict(flatten(content, {}), orient="index").T
From here, you can deal with columns which contains lists of dictionaries with identical keys, but different values, by exploding them and repeating the other values, like this:
cols_with_lists = [col for col in df.columns if isinstance(df.loc[0, col], list)]
for col in cols_with_lists:
temp_df = pd.concat(
[pd.DataFrame(item, index=[i]) for i, item in enumerate(df.loc[0, col])],
axis=0,
)
df = pd.concat([df.drop(columns=[col]), temp_df], axis=1).fillna(method="ffill")
So that, finally, the json file is entirely flattened:
print(df)
# Output
#xmlns #language ... #primaryIndicator Name
0 http://www.xml.org/2013/2/XML eng ... Yes Household Appliances
1 http://www.xml.org/2013/2/XML eng ... Yes Computer & Electronics Retail
2 http://www.xml.org/2013/2/XML eng ... Yes Specialty Stores
3 http://www.xml.org/2013/2/XML eng ... Yes Home Improvement Retail
4 http://www.xml.org/2013/2/XML eng ... Yes Housewares & Specialties
[5 rows x 73 columns]
Little hacky but you can extract columns that has a list type in it. Then use reduce to recursively explode and normalize all columns until there are no more list/object.
I haven't tested well but something like this.
from functools import reduce
def full_explode_normalize(df):
# Extract list columns
explode_cols = [x for x in df.columns if isinstance(df.iloc[0][x], list)]
if len(explode_cols) < 1:
return df
# Explode and normalize the list
df = reduce(_explode, explode_cols, df)
return df
def _explode(df, col):
df = df.explode(col)
if isinstance(df.iloc[0][col], list):
df = _explode(df, col)
elif isinstance(df.iloc[0][col], object):
df_child = pd.json_normalize(df[col])
# To prevent column name collision, add the parent column name as prefix.
df_child.columns = [f'{col}.{x}' for x in df_child.columns]
df = pd.concat([df.loc[:, ~df.columns.isin([col])].reset_index(drop=True), df_child], axis=1)
return df

extract a subset of deep embed json and print only key,value pair I am interested in the subset json

I have a deep embeded json file:
I want to extract and parse only the subset I am interested in , in my case all content in 'node' key.
How can I:
extract subset of this json file which contains "edges[].node" (edges is the 'parent' key of node)
in 'node' session , I am interested in key:value pair of
.url,
.headline.default, (*this one is 'grandchild' of key 'node'*)
.firstPublished
I want to keep only above 3 item inside 'node' key
How can I print out the super slim version of json file I need ?
a better to have option is : can I still keep the structure/full path which leads json root key to embed 'node' json subset I am interested in ?
Here is the jqplay-myjson (full content of my json file)
Try to attach my full content here :
{
"data": {
"legacyCollection": {
"longDescription": "The latest news, analysis and investigations from Europe.",
"section": {
"name": "world",
"url": "/section/world"
},
"collectionsPage": {
"stream": {
"pageInfo": {
"hasNextPage": true,
"__typename": "PageInfo"
},
"__typename": "AssetsConnection",
"edges": [
{
"node": {
"url": "https://www.nytimes.com/video/world/europe/100000008323381/icc-war-crimes-ukraine.html",
"firstPublished": "2022-04-27T23:28:33.241Z",
"headline": {
"default": "I.C.C. Joins Investigation of War Crimes in Ukraine",
"__typename": "CreativeWorkHeadline"
},
"summary": "Karim Khan, the chief prosecutor of the International Criminal Court, said that his organization would participate in a joint effort — with Ukraine, Poland and Lithuania — to investigate war crimes committed since Russia’s invasion.",
"promotionalMedia": {
"__typename": "Image",
"id": "SW1hZ2U6bnl0Oi8vaW1hZ2UvYTY3MTVhNDUtZDE0NS01OWZjLThkZWItNzYxMWViN2UyODhk"
},
"embedded": false
},
"__typename": "AssetsEdge"
},
{
"node": {
"__typename": "Article",
"url": "https://www.nytimes.com/2022/04/27/sports/soccer/chelsea-sale-roman-abramovich.html",
"firstPublished": "2022-04-27T19:42:17.000Z",
"typeOfMaterials": [
"News"
],
"archiveProperties": {
"lede": "",
"__typename": "ArticleArchiveProperties"
},
"headline": {
"default": "Endgame Nears in Bidding for Chelsea F.C.",
"__typename": "CreativeWorkHeadline"
},
"summary": "The American bank selling the English soccer team on behalf of its Russian owner could name its preferred suitor by the end of the week. But the drama isn’t over.",
"translations": []
},
"__typename": "AssetsEdge"
}
],
"totalCount": 52559
}
},
"sourceId": "100000004047788",
"tagline": "",
"__typename": "LegacyCollection"
}
}
}
Here is the command I have jqplay Demo:
.data.legacyCollection.collectionsPage.stream.edges[].node|= with_entries(select([.key]|inside(["default","url","firstPublished"]))
And here is the output I got
{
"data": {
"legacyCollection": {
"longDescription": "The latest news, analysis and investigations from Europe.",
"section": {
"name": "world",
"url": "/section/world"
},
"collectionsPage": {
"stream": {
"pageInfo": {
"hasNextPage": true,
"__typename": "PageInfo"
},
"__typename": "AssetsConnection",
"edges": [
{
"node": {
"url": "https://www.nytimes.com/video/world/europe/100000008323381/icc-war-crimes-ukraine.html",
"firstPublished": "2022-04-27T23:28:33.241Z"
},
"__typename": "AssetsEdge"
},
{
"node": {
"url": "https://www.nytimes.com/2022/04/27/sports/soccer/chelsea-sale-roman-abramovich.html",
"firstPublished": "2022-04-27T19:42:17.000Z"
},
"__typename": "AssetsEdge"
}
],
"totalCount": 52559
}
},
"sourceId": "100000004047788",
"tagline": "",
"__typename": "LegacyCollection"
}
}
}
Here is the output I expect to have
{
"data": {
"legacyCollection": {
"collectionsPage": {
"stream": {
"edges": [
{
"node": {
"url": "https://www.nytimes.com/video/world/europe/100000008323381/icc-war-crimes-ukraine.html",
"firstPublished": "2022-04-27T23:28:33.241Z"
}
},
{
"node": {
"url": "https://www.nytimes.com/2022/04/27/sports/soccer/chelsea-sale-roman-abramovich.html",
"firstPublished": "2022-04-27T19:42:17.000Z"
}
}
]
}
}
}
}
}
Here's a (somewhat) declarative solution:
(.data.legacyCollection.collectionsPage.stream.edges
| map( {node: (.node
| {url,
firstPublished,
headline: {default: .headline.default} })})) as $edges
| {data: {
legacyCollection: {
collectionsPage: {
stream: {
$edges
}
}
}
}
}
Here's one way to make the selection while ensuring that the structure is preserved. This solution may be of interest because
it can easily be adapted for use with jq's "--stream" option.
def array_startswith($head): .[: $head|length] == $head;
. as $in
| ["data", "legacyCollection", "collectionsPage", "stream", "edges"] as $head
| ($head|length) as $len
| reduce (paths
| select( array_startswith($head) and .[1+$len] == "node" )) as $p
(null;
if ((($p|length) == $len + 3) and ($p[-1] | IN("url", "firstPublished")))
or ((($p|length) == $len + 4) and $p[-2:] == ["headline", "default"])
then setpath($p; $in | getpath($p))
else .
end)

How to get required json output from complex nested json format.?

My original file is in CSV format which I have converted to python JSON array to JSON Sring.
jsonfile
<class 'list'>
<class 'dict'>
[
{
"key": "timestamp",
"source": "eia007",
"turnover": "65million",
"url": "abc.com",
"record": "",
"loc.reg": "nord000",
"loc.count": "abs39i5",
"loc.town": "cold54",
"co.gdp": "nscrt77",
"co.pop.min": "min50",
"co.pop.max": "max75",
"co.rev": "",
"chain.system": "5t5t5",
"chain.type": "765ef",
"chain.strat": "",
}
]
I would like to get the output as below:
{
"timestamp001": {
"key": "timestamp001",
"phNo": "ner007",
"turnover": "65million",
"url": "abc.com",
"record": "",
"loc": {
"reg": "nord000",
"count": "abs39i5",
"town": "cold54"
},
"co": {
"form": "nscrt77",
"pop": {
"min": "min50",
"max": "max75"
},
"rev: ""
},
"chain":{
"system": "5t5t5",
"type": "765ef",
"strat": ""
}
...
}
...
}
]
I have tried different options; tried to enumerate, but cannot get the required output. Please help me with this. Thanks in advance.
You can use something like this to create the nested dict:
import json
def unflatten(somedict):
unflattened = {}
for key, value in somedict.items():
splitkey = key.split(".")
print(f"doing {key} {value} {splitkey}")
# subdict is the dict that goes deeper in the nested structure
subdict = unflattened
for subkey in splitkey[:-1]:
# if this is the first time we see this key, add it
if subkey not in subdict:
subdict[subkey] = {}
# shift the subdict a level deeper
subdict = subdict[subkey]
# add the value
subdict[splitkey[-1]] = value
return unflattened
data = {
"key": "timestamp",
"source": "eia007",
"turnover": "65million",
"url": "abc.com",
"record": "",
"loc.reg": "nord000",
"loc.count": "abs39i5",
"loc.town": "cold54",
"co.gdp": "nscrt77",
"co.pop.min": "min50",
"co.pop.max": "max75",
"co.rev": "",
"chain.system": "5t5t5",
"chain.type": "765ef",
"chain.strat": "",
}
unflattened = unflatten(data)
print(json.dumps(unflattened, indent=4))
Which produces:
{
"key": "timestamp",
"source": "eia007",
"turnover": "65million",
"url": "abc.com",
"record": "",
"loc": {
"reg": "nord000",
"count": "abs39i5",
"town": "cold54"
},
"co": {
"gdp": "nscrt77",
"pop": {
"min": "min50",
"max": "max75"
},
"rev": ""
},
"chain": {
"system": "5t5t5",
"type": "765ef",
"strat": ""
}
}
Cheers!

Parse Complex JSON -- Map

I need to parse the complex JSON (below) IN SCALA to get the values of "expression" and "value" in "measure" key i.e I need List (COUNT, COUNT_DISTINCT ...) and List (1,tbl1.USER_ID ...).
I tried multiple options, but it is not working. Any help is appreciated
{
"uuid": "uuidddd",
"last_modified": 1559080222953,
"version": "2.6.1.0",
"name": "FULL_DAY_2_mand_date",
"is_draft": false,
"model_name": "FULL_DAY_1_may05",
"description": "",
"null_string": null,
"dimensions": [
{
"name": "PLATFORM",
"table": "tbl1",
"column": "PLATFORM",
"derived": null
},
{
"name": "OS_VERSION",
"table": "tbl1",
"column": "OS_VERSION",
"derived": null
}
],
"measures": [
{
"name": "_COUNT_",
"function": {
"expression": "COUNT",
"parameter": {
"type": "constant",
"value": "1"
},
"returntype": "bigint"
}
},
{
"name": "UU",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "tbl1.USER_ID"
},
"returntype": "hllc(12)"
}
},
{
"name": "CONT_SIZE",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "tbl1.SIZE"
},
"returntype": "bigint"
}
},
{
"name": "CONT_COUNT",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "tbl1.COUNT"
},
"returntype": "bigint"
}
}
],
"dictionaries": [],
"rowkey": {
"rowkey_columns": [
{
"column": "tbl1.OS_VERSION",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
},
{
"column": "tbl1.PLATFORM",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
},
{
"column": "tbl1.DEVICE_FAMILY",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
}
]
},
"hbase_mapping": {
"column_family": [
{
"name": "F1",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"_COUNT_",
"CONT_SIZE",
"CONT_COUNT"
]
}
]
},
{
"name": "F2",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"UU"
]
}
]
}
]
},
"aggregation_groups": [
{
"includes": [
"tbl1.PLATFORM",
"tbl1.OS_VERSION"
],
"select_rule": {
"hierarchy_dims": [],
"mandatory_dims": [
"tbl1.DATE_HR"
],
"joint_dims": []
}
}
],
"signature": "ttrrs==",
"notify_list": [],
"status_need_notify": [
"ERROR",
"DISCARDED",
"SUCCEED"
],
"partition_date_start": 0,
"partition_date_end": 3153600000000,
"auto_merge_time_ranges": [
604800000,
2419200000
],
"volatile_range": 0,
"retention_range": 0,
"engine_type": 4,
"storage_type": 2,
"override_kylin_properties": {
"job.queuename": "root.production.P0",
"is-mandatory-only-valid": "true"
},
"cuboid_black_list": [],
"parent_forward": 3,
"mandatory_dimension_set_list": [],
"snapshot_table_desc_list": []
}
This is a snippet of the code I tried, and it is giving a null list
import org.json4s._
import org.json4s.jackson.JsonMethods._
implicit val formats = org.json4s.DefaultFormats
case class Function (
expression: String,
parameter: Parameter,
returntype: String
)
case class Parameter (
`type`: String,
value: String
)
case class Measures (
name: String,
function: Function
)
case class AllMeasuresData(uuid: String, measure: List[Measures])
val data = parse(tmp).extract[AllMeasuresData]
val names = data.measure.map(_.name)
println(names)
case class AllMeasuresData(uuid: String, measure: List[Measures])
val data = parse(tmp).extract[AllMeasuresData]
val names = data.measure.map(_.name)
println(names)
There are couple typos in your ADT:
Here is what you need:
case class Function (
expression: String,
parameter: Parameter,
returntype: String
)
case class Parameter (
`type`: String,
value: String
)
case class Measures (
name: String,
function: Function
)
case class AllMeasuresData(uuid: String, measures: List[Measures])
There is also an extra comma int the json, here is the correct:
{
"uuid":"uuidddd",
"last_modified":1559080222953,
"version":"2.6.1.0",
"name":"FULL_DAY_2_mand_date",
"is_draft":false,
"model_name":"FULL_DAY_1_may05",
"description":"",
"null_string":null,
"dimensions":[
{
"name":"PLATFORM",
"table":"tbl1",
"column":"PLATFORM",
"derived":null
},
{
"name":"OS_VERSION",
"table":"tbl1",
"column":"OS_VERSION",
"derived":null
} // There was an extra trailing comma here
],
"measures":[
{
"name":"_COUNT_",
"function":{
"expression":"COUNT",
"parameter":{
"type":"constant",
"value":"1"
},
"returntype":"bigint"
}
},
{
"name":"UU",
"function":{
"expression":"COUNT_DISTINCT",
"parameter":{
"type":"column",
"value":"tbl1.USER_ID"
},
"returntype":"hllc(12)"
}
},
{
"name":"CONT_SIZE",
"function":{
"expression":"SUM",
"parameter":{
"type":"column",
"value":"tbl1.SIZE"
},
"returntype":"bigint"
}
},
{
"name":"CONT_COUNT",
"function":{
"expression":"SUM",
"parameter":{
"type":"column",
"value":"tbl1.COUNT"
},
"returntype":"bigint"
}
}
],
"dictionaries":[
],
"rowkey":{
"rowkey_columns":[
{
"column":"tbl1.OS_VERSION",
"encoding":"dict",
"encoding_version":1,
"isShardBy":false
},
{
"column":"tbl1.PLATFORM",
"encoding":"dict",
"encoding_version":1,
"isShardBy":false
},
{
"column":"tbl1.DEVICE_FAMILY",
"encoding":"dict",
"encoding_version":1,
"isShardBy":false
}
]
},
"hbase_mapping":{
"column_family":[
{
"name":"F1",
"columns":[
{
"qualifier":"M",
"measure_refs":[
"_COUNT_",
"CONT_SIZE",
"CONT_COUNT"
]
}
]
},
{
"name":"F2",
"columns":[
{
"qualifier":"M",
"measure_refs":[
"UU"
]
}
]
}
]
},
"aggregation_groups":[
{
"includes":[
"tbl1.PLATFORM",
"tbl1.OS_VERSION"
],
"select_rule":{
"hierarchy_dims":[
],
"mandatory_dims":[
"tbl1.DATE_HR"
],
"joint_dims":[
]
}
}
],
"signature":"ttrrs==",
"notify_list":[
],
"status_need_notify":[
"ERROR",
"DISCARDED",
"SUCCEED"
],
"partition_date_start":0,
"partition_date_end":3153600000000,
"auto_merge_time_ranges":[
604800000,
2419200000
],
"volatile_range":0,
"retention_range":0,
"engine_type":4,
"storage_type":2,
"override_kylin_properties":{
"job.queuename":"root.production.P0",
"is-mandatory-only-valid":"true"
},
"cuboid_black_list":[
],
"parent_forward":3,
"mandatory_dimension_set_list":[
],
"snapshot_table_desc_list":[
]
}
Now you can run:
val data = parse(tmp).extract[AllMeasuresData]
val names = data.measures.map(_.name)
println(names)
// Displays
// List(_COUNT_, UU, CONT_SIZE, CONT_COUNT)
Your Parameter class does not match the JSON because you have used type1 rather than type as the field name. Use backticks to use "type" as a field name even though it is a reserved word:
case class Parameter (
`type`: String,
value: String
)
You also need to change the Function class as it has returntype1 rather than returntype:
case class Function (
expression: String,
parameter: Parameter,
returntype: String
)
The names of the fields in Scala must exactly match the names of the fields in the JSON. Extra fields in the JSON are ignored, but all the fields in the Scala must have matching fields in the JSON. If there are optional fields in the JSON then the Scala field type should be Option[...].

Read JSON File for Records using Linq

Following is my JSON file . I have to get Fields mentioned for each page and for each Type in comma separated string. Please help in how to proceed using Linq
Example : If I want "Type = customFields" defined for "page1" , have to get output in comma separated ProjectID,EmployeeID,EmployeeName,hasExpiration etc
{
"Fields": {
"Pages": {
"Page": {
"-Name": "page1",
"Type": [
{
"-TypeID": "CUSTOMIZEDFIELDS",
"Field": [
"ProjectID",
"EmployeeID",
"EmployeeName",
"HasExpiration",
"EndDate",
"OTStrategy",
"Division",
"AddTimesheets",
"SubmitTimesheets",
"ManagerTimesheetApprovalRequired",
"OTAllowed",
"AddExpenses",
"SubmitExpenses",
"ManagerExpenseApprovalRequired",
"SendApprovalEmails"
]
},
{
"-TypeID": "CFDATASET",
"Field": [
"ProjectID",
"EmployeeID",
"EmployeeName",
"HasExpiration",
"EndDate",
"OTStrategy",
"Division",
"AddTimesheets",
"SubmitTimesheets",
"ManagerTimesheetApprovalRequired",
"OTAllowed",
"AddExpenses",
"SubmitExpenses",
"ManagerExpenseApprovalRequired",
"SendApprovalEmails"
]
},
{
"-TypeID": "CFDATASETCAPTION",
"Field": [
"ProjectID",
"EmployeeID",
"EmployeeName",
"HasExpiration",
"EndDate",
"OTStrategy",
"Division",
"AddTimesheets",
"SubmitTimesheets",
"ManagerTimesheetApprovalRequired",
"OTAllowed",
"AddExpenses",
"SubmitExpenses",
"ManagerExpenseApprovalRequired",
"SendApprovalEmails"
]
}
]
}
}
}
}