How to parse nested Json in SQL Server

How to parse nested Json in SQL Server - json

I retrieved JSON from an API (part of json file was showed at the bottom). I was hoping to parse the json and store in SQL table. With the following SQL query, there was only 1 row returned. How can I return all rows with table headers NAME JobNum Water Sewer ? I tried while loop using variable to replace [0] after $.items, but seemed not to work. I wasn't sure if the structure of json file works for cross apply.
DECLARE #MondayComApi VARCHAR(MAX)
SELECT #MondayComApi = BULKCOLUMN
FROM OPENROWSET(BULK'D:/temp/a.json', SINGLE_BLOB) JSON
IF (ISJSON(#MondayComApi) = 1)
BEGIN
PRINT 'JSON File is valid';
SELECT NAME, JobNum, Water, Sewer
FROM OPENJSON(#MondayComApi, '$.data.boards')
WITH (
NAME VARCHAR(100) '$.items[0].name',
JobNum VARCHAR(100) '$.items[0].column_values[0].text',
Water VARCHAR(100) '$.items[0].column_values[1].text',
Sewer VARCHAR(100) '$.items[0].column_values[2].text'
)
END
ELSE
BEGIN
PRINT 'JSON File is invalid';
END
The following was part of the JSON - I reduced content of "items" to shorten length:
{
"data": {
"boards": [
{
"items": [
{
"name": "Holmes Project",
"column_values": [
{
"title": "Job",
"text": "D1210"
},
{
"title": "Water",
"text": "YES"
},
{
"title": "Sewer",
"text": "YES"
}
]
},
{
"name": "Lake Short Project)",
"column_values": [
{
"title": "Job",
"text": "D1014"
},
{
"title": "Water",
"text": "YES"
},
{
"title": "Sewer",
"text": "YES"
}
]
},
{
"name": "Chase Project",
"column_values": [
{
"title": "Job",
"text": "D2101"
},
{
"title": "Water",
"text": "NO"
},
{
"title": "Sewer",
"text": "YES"
}
]
},
{
"name": "Juanita Project",
"column_values": [
{
"title": "Job",
"text": "D1102"
},
{
"title": "Water",
"text": "YES"
},
{
"title": "Sewer",
"text": "YES"
}
]
},
{
"name": "Lowry Project",
"column_values": [
{
"title": "Job",
"text": "D1014"
},
{
"title": "Water",
"text": "YES"
},
{
"title": "Sewer",
"text": "YES"
}
]
}
]
}
]
},
"account_id": 5687438790
}

I moved more of the JSON path out of WITH and into OPENJSON:
SELECT NAME, JobNum, Water, Sewer
FROM
OPENJSON(#MondayComApi, '$.data.boards[0].items')
WITH (
NAME VARCHAR(100) '$.name',
JobNum VARCHAR(100) '$.column_values[0].text',
Water VARCHAR(100) '$.column_values[1].text',
Sewer VARCHAR(100) '$.column_values[2].text'
)

Firstly, SINGLE_BLOB should be SINGLE_CLOB if it is ANSI or UTF-8 data.
Next, to break out a JSON array into separate rows, you need OPENJSON without a schema, then you use OPENJSON again on each row, where key column contains the index, and value contains the object.
If boards always contains only one object in its array, you can remove the second OPENJSON and change the path of the first to $.data.boards[0].items
Then we can break out the column_values and pivot them back up into a single row.
SELECT
NAME = JSON_VALUE(item.value, '$.name'),
cv.JobNum,
cv.Water,
cv.Sewer
FROM OPENJSON(#j, '$.data.boards') boards
CROSS APPLY OPENJSON(boards.value, '$.items') item
CROSS APPLY (
SELECT
MIN(CASE WHEN title = 'Job' THEN [text] END) JobNum,
MIN(CASE WHEN title = 'Water' THEN [text] END) Water,
MIN(CASE WHEN title = 'Sewer' THEN [text] END) Sewer
FROM OPENJSON(item.value, '$.column_values')
WITH (
title varchar(100),
[text] varchar(100)
) column_values
) cv

Related

How to use SQL FOR JSON PATH - dot notation for Custom JSON output

I'm trying to output a SQL query results in a custom JSON format.
I've tried several dot notation formats (I believe necessary) to get the
desired format.
The table has test data like
Status = 'Test Status'
Type = 'Test Type'
Code = 'Test Code'
What I've tried;
SELECT
[Status] AS [id:950 .VALUE]
,[Type] AS [id:951 .VALUE]
,[Code] AS [id:952 .VALUE]
FROM MyTable
FOR JSON PATH, ROOT('fieldval')
Which gets me close with this;
{
"fieldval": [
{
"id:950 ": {
"VALUE": Test Status"
},
"id:951 ": {
"VALUE": "Test Type"
},
"id:952 ": {
"VALUE": "Test Code"
}
}
]
}
But I need it in this format
{
"type": "CustomJSON",
"fieldval": [
{
"id": "950",
"value": "Test Status",
"fieldName": "Status"
},
{
"id": "951",
"value": "Test Type",
"fieldName": "Type"
},
{
"id": "952",
"value": "Test Code",
"fieldName": "Code"
}
]
}
What do I need to add/change? Thanks

You need to use JSON_QUERY() to add arrays of data to the outer JSON, e.g.:
create table dbo.Example (
ExampleID nvarchar(3), --<<-- nvarchar since the required JSON has strings here, not numbers.
ExampleValue nvarchar(11),
ExampleFieldName nvarchar(6)
);
insert dbo.Example (ExampleID, ExampleValue, ExampleFieldName)
values
(N'950', N'Test Status', N'Status'),
(N'951', N'Test Type', N'Type'),
(N'952', N'Test Code', N'Code');
select
N'CustomJSON' as [type],
json_query((
select
ExampleID as [id],
ExampleValue as [value],
ExampleFieldName as [fieldName]
from dbo.Example
for json path
)) as [fieldval]
for json path, without_array_wrapper;
Which yields the desired result:
{
"type": "CustomJSON",
"fieldval": [
{
"id": "950",
"value": "Test Status",
"fieldName": "Status"
},
{
"id": "951",
"value": "Test Type",
"fieldName": "Type"
},
{
"id": "952",
"value": "Test Code",
"fieldName": "Code"
}
]
}

Null objects while using Coalesce and duplicate values while joining

[
{
"permissions": [
{
"name": "CREATE",
"id": 1
},
{
"name": "DELETE",
"id": 4
}
],
"roles": [
{
"name": "ADMIN",
"permission": [
{
"name": "CREATE",
"id": 1
},
{
"name": "UPDATE",
"id": 2
},
{
"name": "GET",
"id": 3
},
{
"name": "DELETE",
"id": 4
}
],
"id": 1
},
{
"name": "ADMIN",
"permission": [
{
"name": "CREATE",
"id": 1
},
{
"name": "UPDATE",
"id": 2
},
{
"name": "GET",
"id": 3
},
{
"name": "DELETE",
"id": 4
}
],
"id": 1
}
],
"id": 1,
"username": "raj#100"
},
{
"permissions": [
{
"name": null,
"id": null
}
],
"roles": [
{
"name": "USER",
"permission": [
{
"name": "GET",
"id": 3
}
],
"id": 3
}
],
"id": 2,
"username": "ram145"
}
]
As you can see from the above output the in roles the ADMIN is repeated twice and in the second users has no permissions so he should have an empty array but the output is with the permission object with all its values empty
This is the jooq statement which is executed :
public Object findAllUsers(String role, String permission) {
SelectOnConditionStep<Record1<JSON>> query = dslContext.select(
jsonObject(
key("id").value(USER.ID),
key("fullName").value(USER.FULL_NAME),
key("username").value(USER.USERNAME),
key("email").value(USER.EMAIL),
key("mobile").value(USER.MOBILE),
key("isActive").value(USER.IS_ACTIVE),
key("lastLoggedIn").value(USER.LAST_LOGGED_IN),
key("profileImage").value(USER.PROFILE_IMAGE),
key("roles").value(
coalesce(
jsonArrayAgg(
jsonObject(
key("id").value(ROLE.ID),
key("name").value(ROLE.NAME),
key("permission").value(
coalesce(
select(
jsonArrayAgg(
jsonObject(
key("id").value(PERMISSION.ID),
key("name").value(PERMISSION.NAME)
)
)
).from(ROLE_PERMISSION)
.leftJoin(PERMISSION)
.on(PERMISSION.ID.eq(ROLE_PERMISSION.PERMISSION_ID))
.where(ROLE_PERMISSION.ROLE_ID.eq(ROLE.ID))
.orderBy(PERMISSION.NAME.asc()),
jsonArray()
)
)
)
),
jsonArray()
)
),
key("permissions").value(
coalesce(
jsonArrayAgg(
jsonObject(
key("id").value(PERMISSION.ID),
key("name").value(PERMISSION.NAME)
)
),
jsonArray()
)
)
)
).from(USER)
.leftJoin(USER_ROLE).on(USER.ID.eq(USER_ROLE.USER_ID))
.leftJoin(ROLE).on(USER_ROLE.ROLE_ID.eq(ROLE.ID))
.leftJoin(USER_PERMISSION).on(USER.ID.eq(USER_PERMISSION.USER_ID))
.leftJoin(PERMISSION).on(USER_PERMISSION.PERMISSION_ID.eq(PERMISSION.ID));
if (role != null) {
query.where(ROLE.NAME.eq(role));
}
if (permission != null) {
query.where(PERMISSION.NAME.eq(role));
}
return query.groupBy(USER.ID)
.orderBy(USER.ID.asc())
.fetch().into(JSONObject.class);
}
Is there any way to fix this problem?

Why the duplicates?
Your join graph creates a cartesian product between the two "nested collections" ROLE and PERMISSION. You can't remove that cartesian product with GROUP BY alone, that works only if you join a single to-many relationship.
Instead, you can write subqueries like this (you already did this correctly for the ROLE_PERMISSION relationship):
dslContext.select(jsonObject(
key("id").value(USER.ID),
key("username").value(USER.USERNAME),
key("roles").value(coalesce(field(
select(jsonArrayAgg(jsonObject(
key("id").value(ROLE.ID),
key("name").value(ROLE.NAME),
key("permission").value(coalesce(field(
select(coalesce(jsonArrayAgg(jsonObject(
key("id").value(PERMISSION.ID),
key("name").value(PERMISSION.NAME)
)), jsonArray()))
.from(ROLE_PERMISSION)
.join(PERMISSION)
.on(PERMISSION.ID.eq(ROLE_PERMISSION.PERMISSION_ID))
.where(ROLE_PERMISSION.ROLE_ID.eq(ROLE.ID))
.orderBy(PERMISSION.NAME.asc())
), jsonArray()))
)))
.from(USER_ROLE)
.join(ROLE)
.on(USER_ROLE.ROLE_ID.eq(ROLE.ID))
.where(USER_ROLE.USER_ID.eq(USER.ID))
), jsonArray())),
key("permissions").value(coalesce(field(
select(coalesce(jsonArrayAgg(jsonObject(
key("id").value(PERMISSION.ID),
key("name").value(PERMISSION.NAME)
)))
.from(USER_PERMISSION)
.join(PERMISSION)
.on(USER_PERMISSION.PERMISSION_ID.eq(PERMISSION.ID))
.where(USER_PERMISSION.USER_ID.eq(USER.ID))
), jsonArray()))
))
.from(USER)
.orderBy(USER.ID.asc())
.fetch().into(JSONObject.class);
Join vs semi join
After you edited your question to become a slightly different question, the point you were trying to make is that you want to filter the USER table by some ROLE or PERMISSION that they must have. You can't achieve this with JOIN alone (unless you're happy with the duplicates). The answer I gave doesn't change. If you're joining multiple to-many relationships, you'll get cartesian products.
So, instead, why not semi join them? Either with jOOQ's synthetic SEMI JOIN syntax, or manually using EXISTS or IN, e.g.
.where(role != null
? exists(selectOne()
.from(USER_ROLE)
.where(USER_ROLE.role().NAME.eq(role))
)
: noCondition()
)
.and(permission != null
? exists(selectOne()
.from(USER_PERMISSION)
.where(USER_PERMISSION.permission().NAME.eq(permission))
)
: noCondition()
)
This is using the implicit join syntax, which is optional, but I think it does simplify your query.

Excel JSON VBA Parsing - Determining if an array is empty

I am trying to parse a JSON response. I cannot use the VBA-JSON library. I need to check to see if a nested array is empty or null. I keep getting this error:
Example JSON:
{
"gardenAssets": [],
"gardenAssetsAlertCount": 0,
"gardenAssetsCount": 0,
"gardenAssetsErrorCount": 0,
"locationsSummaries": [
{
"locations": [
{
"auditOrder": "102",
"code": "POT 102",
"name": "POT 102",
"type": "ProcessingLocation",
"gardenAssets": [
{
"annotation": "Pallets",
"broker": {
"code": "TMTO",
"isOwner": null,
"name": null
},
"datetimeOfArrivalIngarden": 1622754283.937,
"id": "crusaf",
"isSealable": true,
"load": null,
"mastergardenCode": null,
"name": null,
"owner": {
"code": "SUN",
"isOwner": null,
"name": null
}
}
]
},
{
"auditOrder": "103",
"code": "POT 103",
"description": "POT 103",
"id": "110746",
"name": "POT 103",
"type": "ProcessingLocation",
"gardenAssets": []
},
{
"auditOrder": "104",
"code": "POT 104",
"name": "POT 104",
"gardenAssets": [
{
"annotation": "Soil",
"broker": {
"code": "OTHR",
"isOwner": null,
"name": null
},
"datetimeOfArrivalIngarden": 1622571699.767,
"id": "arserana",
"isSealable": true,
"load": null,
"mastergardenCode": null,
"name": null,
"owner": {
"code": "WTR",
"isOwner": null,
"name": null
}
}
]
},
{
"auditOrder": "111",
"code": "POT 111",
"name": "POT 111",
"type": "ProcessingLocation",
"gardenAssets": [
{
"annotation": null,
"broker": {
"code": "CLD",
"isOwner": null,
"name": null
},
"datetimeOfArrivalIngarden": 1622746446.932,
"id": "Bacrea",
"isSealable": true,
"load": null,
"mastergardenCode": null,
"name": null,
"owner": {
"code": "ICE",
"isOwner": null,
"name": null
},
"status": "EMPTY",
"type": "JUNK",
"unavailable": false,
"visitId": "1003768526"
}
]
}
],
"logingarden": true,
"mastergardenCodes": [],
"gardenCode": "FUN5"
}
],
"offsitegardens": [],
"gardenAssetsInTransit": []}
Code:
Option Explicit
Dim S as Object, k, Ks as Object
Set S = CreateObject("ScriptControl")
S.Language = "JScript"
S.addcode "function k(a){var k=[];for(var b in a){k.push('[\'' + b + '\']');}return k;}"
S.Eval ("var J = " & http.ResponseText)
S.Eval ("var L = J.locationsSummaries['0'].locations")
Set Ks = S.Eval("J.locationsSummaries['0'].locations")
For Each K In Ks
If Not IsNull(S.Eval(K.gardenAssets)) = True Then
Sheet1.Cells(Rows.Count, 1).End(xlUp).Offset(1) = "Assets"
End If
Next K
I need to pull different information out of the JSON depending on if there are any gardenAssets. But I can't seem to check to see if the array is empty or not.

You can use the length property in JScript.
Dim S As Object
Dim n As Integer, i As Integer, r As Long
r = Sheet1.Cells(Rows.Count, 1).End(xlUp).Offset(1).Row
Set S = CreateObject("ScriptControl")
With S
.Language = "JScript"
.eval "var J = " & http.ResponseText
.eval "var A = J.locationsSummaries['0'].locations"
For n = 1 To S.eval("A.length")
.eval "var L = A[" & n - 1 & "]"
For i = 1 To .eval("L.gardenAssets.length")
Sheet1.Cells(r, 1) = .eval("L.code")
Sheet1.Cells(r, 2) = .eval("L.gardenAssets[" & i - 1 & "].id")
r = r + 1
Next
Next
End With

The example JSON isn't valid. The last member of an object or the last element of an array shouldn't have a comma after it. So where you have:
"broker": {
"code": "TMTO",
"isOwner": null,
"name": null,
}
There shouldn't be a comma after "name": null - there are multiple other errors like this in the example JSON.
You can use an online JSON validator (like this one) to detect these errors. You would ideally want to fix the system that is generating this invalid JSON rather than trying to correct the issues yourself during processing

Snowflake - Querying Nested JSON

I need some help querying this JSON file I've ingested into a temp table in Snowflake. So, I've created a JSON_DATA variant column and plan to query and do a COPY INTO another table, but my query isn't working yet... I feel I'm close (possibly?)
JSON layout:
{
"nextPage": "01",
"page": "0",
"status": "ok",
"transactions": [
{
"id": "65985",
"recordTp": "vendorbill",
"values": {
"account": [
{
"text": "14500 Deferred Expenses",
"value": "249"
}
],
"account.number": "1450",
"account.type": [
{
"text": "Deferred Expense",
"value": "DeferExpense"
}
],
"amount": "51733",
"classnohierarchy": [
{
"text": "901 Corporate",
"value": "139"
}
],
"currency": [
{
"text": "Canadian Dollar",
"value": "3"
}
],
"customer.altname": "V Sties expenses (Tor)",
"customer.custate": "12/31/2019",
"customer.custentient": "ada Inc.",
"customer.custendate": "1/1/2019",
"customer.entyid": "PR781",
"departmentnohierarchy": [
{
"text": "8rity",
"value": "37"
}
],
"fxamount": "689",
"location": [
{
"text": "Othad Projects",
"value": "48"
}
],
"postingperiod": [
{
"text": "Jan 2020",
"value": "1"
}
],
"subsidiary.custrecord_region": [
{
"text": "CANADA",
"value": "3"
}
],
"subsidiarynohierarchy": [
{
"text": "ada Inc.",
"value": "25"
}
]
}
},
I've been able to query the values that are not (deeply) nested but I need help getting, for example, the values from 'classnohierarchy', to get both the 'text' and 'value' I tried:
transactions.value:"values".classnohierarchy.text::string as class_txt,
transactions.value:"values".classnohierarchy.value::string as class_val,
but it's returning NULL values.
Below is my entire query:
SELECT
JSON_DATA:status::string as connection_status,
transactions.value:id::string as id,
transactions.value:recordType::string as record_type,
transactions.value:"values"::variant as trans_val,
transactions.value:"values".account as acc,
transactions.value:"values".account.text as text,
transactions.value:"values".account.value as val,
transactions.value:"values"."account.number"::string as acc_num,
transactions.value:"values"."account.type".text::string as acc_type_txt,
transactions.value:"values"."account.type".value::string as acc_type_val,
transactions.value:"values".amount::string as amount,
**transactions.value:"values".classnohierarchy.text::string as class_txt,
transactions.value:"values".classnohierarchy.value::string as class_val,**
transactions.value:"values".currency.text::string as currency_text,
transactions.value:"values".currency.value::string as currency_val,
transactions.value:"values"."customer.altname"::string as customer_project_name,
transactions.value:"values"."customer.custate"::string as customer_end_date,
transactions.value:"values"."customer.custentient"::string as customer_end_client,
transactions.value:"values"."customer.custendate"::string as customer_start_date,
transactions.value:"values"."customer.entyid"::string as customer_project_id,
transactions.value:"values".departmentnohierarchy.text::string as department_name,
transactions.value:"values".departmentnohierarchy.value::string as department_value,
transactions.value:"values".fxamount::string as fx_amount,
transactions.value:"values".location.text::string as product_name,
transactions.value:"values".postingperiod.text::string as postingperiod,
transactions.value:"values".postingperiod.value::string as postingperiod,
transactions.value:"values"."subsidiary.custrecord_region".text::string as region_name,
transactions.value:"values"."subsidiary.custrecord_region".value::string as region_value,
transactions.value:"values".subsidiarynohierarchy.text::string as entity_name,
transactions.value:"values".subsidiarynohierarchy.value::string as entity_value,
FROM MY_TABLE,
LATERAL FLATTEN (JSON_DATA:transactions) as transactions
and here's a picture of whats showing in Snowflake:
SNOWFLAKE_SCREENSHOT

departmentnohierarchy is an array. you need to mention the index as below.
select *,transactions.VALUE:"values".departmentnohierarchy[0].value::text as department_name
FROM jsont1,
LATERAL FLATTEN (JSON_DATA:transactions) as transactions

AWS Athena - Querying JSON - Searching for Values

I have nested JSON files on S3 and am trying to query them with Athena.
However, I am having problems to query the nested JSON values.
My JSON file looks like this:
{
"id": "17842007980192959",
"acount_id": "17841401243773780",
"stats": [
{
"name": "engagement",
"period": "lifetime",
"values": [
{
"value": 374
}
],
"title": "Engagement",
"description": "Total number of likes and comments on the media object",
"id": "17842007980192959/insights/engagement/lifetime"
},
{
"name": "impressions",
"period": "lifetime",
"values": [
{
"value": 11125
}
],
"title": "Impressions",
"description": "Total number of times the media object has been seen",
"id": "17842007980192959/insights/impressions/lifetime"
},
{
"name": "reach",
"period": "lifetime",
"values": [
{
"value": 8223
}
],
"title": "Reach",
"description": "Total number of unique accounts that have seen the media object",
"id": "17842007980192959/insights/reach/lifetime"
},
{
"name": "saved",
"period": "lifetime",
"values": [
{
"value": 0
}
],
"title": "Saved",
"description": "Total number of unique accounts that have saved the media object",
"id": "17842007980192959/insights/saved/lifetime"
}
],
"import_date": "2017-12-04"
}
What I'm trying to do is to query the "stats" field value where name=impressions.
So ideally something like:
SELECT id, account_id, stats.values.value WHERE stats.name='engagement'
AWS example: https://docs.aws.amazon.com/athena/latest/ug/searching-for-values.html
Any help would be appreciated.

You can query the JSON with the following table definition:
CREATE EXTERNAL TABLE test(
id string,
acount_id string,
stats array<
struct<
name:string,
period:string,
values:array<
struct<value:string>>,
title:string
>
>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION 's3://bucket/';
Now, the value column is available through the following unnesting:
select id, acount_id, stat.name,x.value
from test
cross join UNNEST(test.stats) as st(stat)
cross join UNNEST(stat."values") as valx(x)
WHERE stat.name='engagement';

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

How to parse nested Json in SQL Server - json

Related

How to use SQL FOR JSON PATH - dot notation for Custom JSON output

Null objects while using Coalesce and duplicate values while joining

Excel JSON VBA Parsing - Determining if an array is empty

Snowflake - Querying Nested JSON

AWS Athena - Querying JSON - Searching for Values

Categories

Resources