Parse Nested JSON into SQL Table - json

I am trying to get a JSON file parsed into a usable format so I can insert it into a SQL table.
The JSON file I have is heavily nested (and I can't get the vendor to change it at this point), and uses the same name at different levels.
I have used the following code, to start off, but it is the multi sections and potentially multiple accounts etc that has me stumped. I know I will probably need to iterate through somehow, but just not sure where to begin.
DECLARE #JSON VARCHAR(MAX)
SELECT #JSON = BulkColumn
FROM OPENROWSET
(BULK 'C:\Users\joshu\Downloads\Cashflow.JSON', SINGLE_CLOB)
AS j
If (ISJSON(#JSON)=1)
Select * FROM OPENJSON (#JSON,'$.data')
with
(
[id] nvarchar(50),
[title] nvarchar(50),
[sections] nvarchar(max) as json
) data_Structure
cross apply openjson(data_structure.sections, '$')
with (
[income] nvarchar(max) as json
) data2
--Income is one type, there should be a loop here
cross apply openjson(data2.income, '$')
Which is getting my down the tree. The last data points that I want to collect are based on this bit of code
DECLARE #JSON VARCHAR(MAX)
SELECT #JSON = BulkColumn
FROM OPENROWSET
(BULK 'C:\Users\joshu\Downloads\Cashflow.JSON', SINGLE_CLOB)
AS j
If (ISJSON(#JSON)=1)
Select * FROM OPENJSON (#JSON,'$.data.sections.income.sections.tracker_1193.sections.tracker_1193_income.rows."b5cfd1ce-bb7f-4f5c-a6b4-12b469ff0b9d".data."2017-06"')
with
(
[date] nvarchar(50),
[value] decimal(18,2))
A sample of the JSON is here
"data": {
"id": "cashflow",
"title": "Cashflow Report",
"sections": {
"income": {
"id": "income",
"title": "Income",
"sections": {
"tracker_1193": {
"id": "tracker_1193",
"title": "xxxxxxx",
"sections": {
"tracker_1193_income": {
"id": "tracker_1193_income",
"title": "Income",
"sections": null,
"rows": {
"b5cfd1ce-bb7f-4f5c-a6b4-12b469ff0b9d": {
"account_id": "b5cfd1ce-bb7f-4f5c-a6b4-12b469ff0b9d",
"account_name": "Bobby Calf Sales",
"data": {
"2017-06": {
"date": "2017-06",
"value": 0
},
"2017-09": {
"date": "2017-09",
"value": 4801.36
},
"2017-12": {
"date": "2017-12",
"value": 1997.33
Now the fun part.
The income section is the most complicated;
The First "section" in the JSON data is one of 13.
The Second "section" is variable, as in there could be 1, or 12.
The Third "section is fixed to 3 (income, costs, gross-profit)
The rest is more straight forward
{
"data": {
"id": "cashflow",
"title": "Cashflow Report",
"sections": {
//other data ahead of this
"operating_expenses": {
"id": "operating_expenses",
"title": "Operating Expenses",
"sections": {
"operating_expenses_animal_health_animal": {
"id": "operating_expenses_animal_health_animal",
"title": "Animal Health",
"sections": null,
"rows": {
"0de82545-be93-4fb5-9d20-fa076af48e40": {
"account_id": "0de82545-be93-4fb5-9d20-fa076af48e40",
"account_name": "Animal Health - Minerals",
"data": {
"2019-07": {
"date": "2019-07",
"value": 5827.93
}
}
},
"9ba329a6-f77e-4779-9d79-28dd20465b9c": {
"account_id": "9ba329a6-f77e-4779-9d79-28dd20465b9c",
"account_name": "Animal Health - Other",
"data": {
"2019-07": {
"date": "2019-07",
"value": 663.73
}
}
},
"4f406965-3355-4968-a5ba-519d9706f329": {
"account_id": "4f406965-3355-4968-a5ba-519d9706f329",
"account_name": "Animal Health - Treatments",
"data": {
"2019-07": {
"date": "2019-07",
"value": 8670.1
}
}
},
"79c8ab89-22a2-4c5c-b591-0a3d95a4a95b": {
"account_id": "79c8ab89-22a2-4c5c-b591-0a3d95a4a95b",
"account_name": "Animal Health - Vet",
"data": {
"2019-07": {
"date": "2019-07",
"value": 7645.18
}
}
}
},
"totals": {
"2019-07": {
"date": "2019-07",
"value": 22806.94
}
}
},
Because of the nature of this data, I haven't sorted my SQL table structure yet, but I am imagining it to be something along the lines of the below:
That's about it. I am stuck, need some help/guidance so anything you can do to assist is greatly appreciated

I've made an attempt with your JSON to traverse its data without having to define explicit keys. This only handles the "Income" portion, however, it should get you moving in the right direction to extract your data into SQL server. Note that given "rows" can have multiple values, some data is duplicated.
DECLARE #data nvarchar(MAX) = '{"data":{"id":"cashflow","title":"Cashflow Report","sections":{"income":{"id":"income","title":"Income","sections":{"tracker_1193":{"id":"tracker_1193","title":"xxxxxxx","sections":{"tracker_1193_income":{"id":"tracker_1193_income","title":"Income","sections":null,"rows":{"b5cfd1ce-bb7f-4f5c-a6b4-12b469ff0b9d":{"account_id":"b5cfd1ce-bb7f-4f5c-a6b4-12b469ff0b9d","account_name":"Bobby Calf Sales","data":{"2017-06":{"date":"2017-06","value":0},"2017-09":{"date":"2017-09","value":4801.36},"2017-12":{"date":"2017-12","value":1997.33}}}}}}}}}}}}';
SELECT
section_id,
section_title,
tracker_id,
tracker_title,
string_id,
string_title,
account_id,
account_title,
[period],
period_value
FROM OPENJSON ( #data, '$.data.sections.income' )
WITH (
section_id varchar(50) '$.id',
section_title varchar(50) '$.title',
sections nvarchar(max) '$.sections' AS JSON
) AS dat
OUTER APPLY (
SELECT * FROM OPENJSON ( dat.sections ) AS a
CROSS APPLY (
SELECT * FROM OPENJSON ( a.value )
WITH (
tracker_id varchar(50) '$.id',
tracker_title varchar(50) '$.title',
tracker_income nvarchar(max) '$.sections' AS json
)
) AS b
CROSS APPLY (
SELECT [key] AS income_key, [value] AS income_value FROM OPENJSON ( b.tracker_income )
) AS c
CROSS APPLY (
SELECT * FROM OPENJSON ( c.income_value )
WITH (
string_id varchar(50) '$.id',
string_title varchar(50) '$.title',
income_rows nvarchar(max) '$.rows' AS json
)
) AS d
CROSS APPLY (
SELECT [key] AS account_key, [value] AS account_value FROM OPENJSON ( d.income_rows )
) e
CROSS APPLY (
SELECT * FROM OPENJSON ( e.account_value )
WITH (
account_id varchar(255) '$.account_id',
account_title varchar(50) '$.account_name',
account_data nvarchar(max) '$.data' AS json
)
) f
CROSS APPLY (
SELECT [key] AS [period], JSON_VALUE ( [value], '$.value' ) AS period_value FROM OPENJSON ( f.account_data )
) g
) AS Income;
Returns
+------------+---------------+--------------+---------------+---------------------+--------------+--------------------------------------+------------------+---------+--------------+
| section_id | section_title | tracker_id | tracker_title | string_id | string_title | account_id | account_title | period | period_value |
+------------+---------------+--------------+---------------+---------------------+--------------+--------------------------------------+------------------+---------+--------------+
| income | Income | tracker_1193 | xxxxxxx | tracker_1193_income | Income | b5cfd1ce-bb7f-4f5c-a6b4-12b469ff0b9d | Bobby Calf Sales | 2017-06 | 0 |
| income | Income | tracker_1193 | xxxxxxx | tracker_1193_income | Income | b5cfd1ce-bb7f-4f5c-a6b4-12b469ff0b9d | Bobby Calf Sales | 2017-09 | 4801.36 |
| income | Income | tracker_1193 | xxxxxxx | tracker_1193_income | Income | b5cfd1ce-bb7f-4f5c-a6b4-12b469ff0b9d | Bobby Calf Sales | 2017-12 | 1997.33 |
+------------+---------------+--------------+---------------+---------------------+--------------+--------------------------------------+------------------+---------+--------------+

Related

SQL Server - parse json with multiple objects

I'm parsing a JSON dataset within SQL Server and it works great with a single object, but falls down when multiple data objects are presented. I assume it's because of the CROSS APPLY statements.
Within the JSON dataset, there is only 4 records, but my current sql is returning 16 (4 duplicate sets, as there are 4 cross apply statements), but I'm not sure how to get around this?
json
{
"type": "test",
"user": {
"last_update": "2022-06-19T14:13:07.707502+00:00",
"user_id": "12345"
},
"data": [
{
"metadata": {
"start_time": "2022-06-19T00:00:00+01:00",
"end_time": "2022-06-20T00:00:00+01:00"
},
"distance_data": {
"steps": 9299,
"distance_meters": 7704.0
}
},
{
"metadata": {
"start_time": "2022-06-17T00:00:00+01:00",
"end_time": "2022-06-18T00:00:00+01:00"
},
"distance_data": {
"steps": 2546,
"distance_meters": 2143.0
}
},
{
"metadata": {
"start_time": "2022-06-16T00:00:00+01:00",
"end_time": "2022-06-17T00:00:00+01:00"
},
"distance_data": {
"steps": 4969,
"distance_meters": 4192.0
}
},
{
"metadata": {
"start_time": "2022-06-18T00:00:00+01:00",
"end_time": "2022-06-19T00:00:00+01:00"
},
"distance_data": {
"steps": 6769,
"distance_meters": 5698.0
}
}
]
}
SQL statement
SELECT
distance_meters, steps, cast(left(start_time,10) as date) startDate
FROM
OPENJSON ( #json )
WITH (
jType nvarchar(50) N'$.type',
jUser char(36) N'$.user.user_id',
data nvarchar(max) as JSON
) as a
CROSS APPLY
OPENJSON(a.data)
WITH
(
distance_data nvarchar(max) as json
) as b
CROSS APPLY
OPENJSON (b.distance_data)
WITH
(
distance_meters float,
steps int
) as c
CROSS APPLY
OPENJSON (a.data)
WITH
(
metadata nvarchar(max) as json
) as d
CROSS APPLY
OPENJSON (d.metadata)
WITH
(
start_time nvarchar(25),
end_time nvarchar(25)
) as e
ORDER BY startDate ASC;
I think you need a single APPLY operator:
SELECT j1.jType, j1.jUser, j2.*
FROM OPENJSON(#json) WITH (
jType nvarchar(50) N'$.type',
jUser char(36) N'$.user.user_id',
data nvarchar(max) as JSON
) AS j1
CROSS APPLY OPENJSON(j1.data) WITH (
start_time nvarchar(25) '$.metadata.start_time',
end_time nvarchar(25) '$.metadata.end_time',
steps numeric(10, 0) '$.distance_data.steps',
distance_meters numeric(10, 1) '$.distance_data.distance_meters'
) j2
Result:
jType
jUser
start_time
end_time
steps
distance_meters
test
12345
2022-06-19T00:00:00+01:00
2022-06-20T00:00:00+01:00
9299
7704.0
test
12345
2022-06-17T00:00:00+01:00
2022-06-18T00:00:00+01:00
2546
2143.0
test
12345
2022-06-16T00:00:00+01:00
2022-06-17T00:00:00+01:00
4969
4192.0
test
12345
2022-06-18T00:00:00+01:00
2022-06-19T00:00:00+01:00
6769
5698.0

Azure Synapse Analytics Json Flatten

I'm new to Azure Synapse and currently have the following problem:
I get a json that looks like the following:
{
"2022-02-01":[
{
"shiftId": ,
"employeeId": ,
"duration": ""
},
{
"shiftId": ,
"employeeId": ,
"duration": ""
}
],
"2022-02-02": [
{
"shiftId": ,
"employeeId": ,
"duration": ""
}
],
"2022-02-03": [
{
"shiftId": ,
"employeeId": ,
"duration": ""
},
{
"shiftId": ,
"employeeId": ,
"duration": ""
}
],
"2022-02-4": []
}
Now I would like to convert this so that I get it in a view. I have already tried with a dataflow as array of documents but I get an error.
"Malformed records are detected in schema inference. Parse Mode: FAILFAST"
I want something like:
date shiftId employeeId duration
___________|_________|____________|_________
2022-02-01 | 1234 | 345345 | 420
2022-02-01 | 2345 | 345345 | 124
2022-02-02 | 5345 | 123567 | 424
2022-02-03 | 5675 | 987542 | 123
2022-02-03 | 9456 | 234466 | 754
Azure Synapse Analytics, dedicated SQL pools are actually very capable with JSON, supporting OPENJSON and JSON_VALUE, so you could just use a Stored Procedure with the JSON as a parameter. A simple exmaple:
SELECT
k.[key] AS [shiftDate],
JSON_VALUE( d.[value], '$.shiftId' ) shiftId,
JSON_VALUE( d.[value], '$.employeeId' ) employeeId,
JSON_VALUE( d.[value], '$.duration' ) duration
FROM OPENJSON( #json, '$' ) k
CROSS APPLY OPENJSON( k.value, '$' ) d;
The full code:
DECLARE #json NVARCHAR(MAX) = '{
"2022-02-01": [
{
"shiftId": 1234,
"employeeId": 345345,
"duration": 420
},
{
"shiftId": 2345,
"employeeId": 345345,
"duration": 124
}
],
"2022-02-02": [
{
"shiftId": 5345,
"employeeId": 123567,
"duration": 424
}
],
"2022-02-03": [
{
"shiftId": 5675,
"employeeId": 987542,
"duration": 123
},
{
"shiftId": 9456,
"employeeId": 234466,
"duration": 754
}
]
}'
SELECT
k.[key] AS [shiftDate],
JSON_VALUE( d.[value], '$.shiftId' ) shiftId,
JSON_VALUE( d.[value], '$.employeeId' ) employeeId,
JSON_VALUE( d.[value], '$.duration' ) duration
FROM OPENJSON( #json, '$' ) k
CROSS APPLY OPENJSON( k.value, '$' ) d;
My results:
You could use a Synapse Notebook or Mapping Data Flows if you wanted something more dynamic.

JSON_QUERY to do a "Select Many"

I have a JSON variable that looks like this (the real one is more complex):
DECLARE #myJson VARCHAR(3000) = '{
"CustomerId": "123456",
"Orders": [{
"OrderId": "852",
"OrderManifests": [{
"ShippedProductId": 884,
"ProductId": 884
}, {
"ShippedProductId": 951,
"ProductId": 2564
}
]
}, {
"OrderId": "5681",
"OrderManifests": [{
"ShippedProductId": 198,
"ProductId": 4681
}, {
"ShippedProductId": 8188,
"ProductId": 8188
}, {
"ShippedProductId": 144,
"ProductId": 8487
}
]
}
]
}'
In the end, I need to know if any of the ShippedProductId values match their corresponding ProductId (in the same JSON object).
I started in by trying to get a list of all the OrderManifests. But while this will get me the array of orders:
SELECT JSON_QUERY(#myJson, '$.Orders')
I can't seem to find a way to get a list of all the OrderManifests across all the entries in the Orders array. This does not work:
SELECT JSON_QUERY(#myJson, '$.Orders.OrderManifests')
Is there a way to do a Select Many kind of query to get all the OrderManifests in the Orders array?
Use OPENJSON and CROSS APPLY to drill down into your objects.
This should do it for you:
SELECT j.CustomerId,o.OrderId, m.ShippedProductId, m.ProductId
FROM OPENJSON(#myJson)
WITH (
CustomerId NVARCHAR(1000),
Orders NVARCHAR(MAX) AS JSON
) j
CROSS APPLY OPENJSON(j.Orders)
WITH (
OrderId NVARCHAR(1000),
OrderManifests NVARCHAR(MAX) AS JSON
) o
CROSS APPLY OPENJSON(o.OrderManifests)
WITH (
ShippedProductId INT,
ProductId int
) m
WHERE m.ShippedProductId = m.ProductId;
This query returns:
CustomerId | OrderId | ShipedProductId | ProductId
------------+-----------+-------------------+-------------
123456 | 852 | 884 | 884
------------+-----------+-------------------+-------------
123456 | 5681 | 8188 | 8188

Converting a Dictionary in MySql 8

I have a JSON structure stored in a MySql table. Now, months later, I have a requirement to join to pieces found deep in the bowels of this JSON string.
{
"id": "000689ba-9891-4d82-ad13-a7b96dc08ec4",
"type": "emp",
"firstName": "Brooke",
"facilities": {
​​​​​"0001": [{​​​​​
"id": 1,
"code": "125",
"name": "DSL",
"type": "MGMTSupport",
"payRate": 18}​​​​​],
"0002": [
{
​​​​​"id": 1,
"code": "100",
"name": "Server",
"type": "FOH",
"payRate": 8
}, {
​​​​​"id": 2,
"code": "320",
"name": "NCFOHTrainer",
"type": "NCHourlyTraining",
"payRate": 14.5
}​​​]
},
"permissions": ["read:availability", "..."],
"primaryJobCode": "150",
"primaryPayRate": 9,
"primaryFacility": "0260"
}​
​​​​The big question is: How do I shape this as a query in MySql when the facilities do not follow a single key/value pattern? i.e.: the key to the first entry is the facilityId so I cannot use a path like '$.0001' and the dictionary value is an array so how do I path that correctly?
select id as EmployeeId
, companyId as cpkEmployeeId
, json_table( `data`
, '$.facilities[*]' COLUMNS( facilityId VARCHAR(10) PATH '$.????'
, NESTED PATH '??? $[*] ???' COLUMNS ( code VARCHAR(10) PATH '$.code'
, payRate DECIMAL(8,4) PATH '$.payRate') facilities
from employee
;
Yea - the above does not work. Any help appreciated.
Desired output?
[Other columns from the table] plus facilityId, code & payrate.
A single row in the native table could produce something like:
id | companyId | facilityId | code | payRate
--------+-----------+------------+------+---------
1 | 324337 | 0001 | 125 | 18.0000
1 | 324337 | 0002 | 100 | 8.0000
1 | 324337 | 0002 | 320 | 14.5000
WITH
cte AS (
SELECT test.id,
test.value,
jsontable.facilityId,
CONCAT('$.facilities."', jsontable.facilityId, '"') path
FROM test
CROSS JOIN JSON_TABLE(JSON_KEYS(test.value, '$.facilities'),
'$[*]' COLUMNS (facilityId CHAR(4) PATH '$')) jsontable
)
SELECT cte.id,
cte.facilityId,
jsontable.code,
jsontable.payRate
FROM cte
CROSS JOIN JSON_TABLE(JSON_EXTRACT(cte.value, cte.path),
'$[*]' COLUMNS (code CHAR(3) PATH '$.code',
payRate DECIMAL(6, 4) PATH '$.payRate')) jsontable
https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=18ab7b6f181b61fb53f88a6de6e049be

how to add key-value pair in json root node and convert it into table using SQL server

I have table people and it's maintain Four column which is Name ,TagName ,Value , Location.
I want to convert the tagname and value in json with name and location column as rootnode (Name & location same for multiple records)
Need output as :
{
"{"Name":"EMP1","Location":"mumbai"}": [
{
"TagName": "1",
"Value": "844.17769999999996"
},
{
"TagName": "abc",
"Value": "837.43679999999995"
},
{
"TagName": "pqr",
"Value": "0"
},
{
"TagName": "XYZ",
"Value": "1049.2429999999999"
}
]
}
please check the below query, In which I am trying to create json string using json path but stuck in root node.
SELECT TagName
,Value
FROM dbo.people
FOR JSON PATH, ROOT('')---
when I convert the above json into tabular format, required output as :
Name | Location |TagName| Value
EMP1 | Mumbai |1 | 844.17769999999996|
EMP1 | Mumbai |abc | 837.43679999999995|
.....
Your expected output is not a valid JSON, but you are probably looking for something like this:
Table:
CREATE TABLE People (
[Name] varchar(10),
[Location] varchar(50),
[TagName] varchar(3),
[Value] numeric(20, 14)
)
INSERT INTO People ([Name], [Location], [TagName], [Value])
VALUES
('EMP1', 'Mumbai', '1', 844.17769999999996),
('EMP1', 'Mumbai', 'abc', 837.43679999999995),
('EMP2', 'Mumbai', 'abc', 837.43679999999995)
Statement:
SELECT DISTINCT p.[Name], p.[Location], c.Items
FROM People p
CROSS APPLY (
SELECT [TagName], [Value]
FROM People
WHERE [Name] = p.[Name] AND [Location] = p.[Location]
FOR JSON AUTO
) c (Items)
FOR JSON PATH
Result:
[
{
"Name":"EMP1",
"Location":"Mumbai",
"Items":[
{
"TagName":"1",
"Value":844.17769999999996
},
{
"TagName":"abc",
"Value":837.43679999999995
}
]
},
{
"Name":"EMP2",
"Location":"Mumbai",
"Items":[
{
"TagName":"abc",
"Value":837.43679999999995
}
]
}
]
If you want to parse the generated JSON, you need to use OPENJSON() twice:
Generated JSON:
DECLARE #json varchar(max) = N'[
{
"Name":"EMP1",
"Location":"Mumbai",
"Items":[
{
"TagName":"1",
"Value":844.17769999999996
},
{
"TagName":"abc",
"Value":837.43679999999995
}
]
},
{
"Name":"EMP2",
"Location":"Mumbai",
"Items":[
{
"TagName":"abc",
"Value":837.43679999999995
}
]
}
]'
Statement:
SELECT j1.Name, j1.Location, j2.TagName, j2.Value
FROM OPENJSON(#json) WITH (
[Name] varchar(10) '$.Name',
[Location] varchar(50) '$.Location',
[Items] nvarchar(max) '$.Items' AS JSON
) j1
OUTER APPLY OPENJSON(j1.Items) WITH (
[TagName] varchar(3) '$.TagName',
[Value] numeric(20, 14) '$.Value'
) j2