Convert flat SQL rows into nested JSON array using FOR JSON - json

So, I have a simple view that looks like this:
Name | Type | Product | QuantitySold
------------------------------------------------------
Walmart | Big Store | Gummy Bears | 10
Walmart | Big Store | Toothbrush | 6
Target | Small Store | Toothbrush | 2
Without using nested queries, using sql's FOR JSON clause, can this be easily converted to this json.
[
{
"Type": "Big Store",
"Stores": [
{
"Name": "Walmart",
"Products": [
{
"Name": "Gummy Bears",
"QuantitySold": 10
},
{
"Name": "Toothbrush",
"QuantitySold": 6
}
]
}
]
},
{
"Type": "Smaller Store",
"Stores": [
{
"Name": "Target",
"Products": [
{
"Name": "Toothbrush",
"QuantitySold": 2
}
]
}
]
}
]
Essentially Group by Type, Store then, line items. My attempt so far below. Not sure how to properly group the rows.
SELECT Type, (
SELECT Store,
(SELECT Product,QuantitySold from MyTable m3 where m3.id=m2.id for json path) as Products
FROM MyTable m2 where m1.ID = m2.ID for json path) as Stores
) as Types FROM MyTable m1

You can try something like this:
DECLARE #Data TABLE (
Name VARCHAR(20), Type VARCHAR(20), Product VARCHAR(20), QuantitySold INT
);
INSERT INTO #Data ( Name, Type, Product, QuantitySold ) VALUES
( 'Walmart', 'Big Store', 'Gummy Bears', 10 ),
( 'Walmart', 'Big Store', 'Toothbrush', 6 ),
( 'Target', 'Small Store', 'Toothbrush', 2 );
SELECT DISTINCT
t.[Type],
Stores
FROM #Data AS t
OUTER APPLY (
SELECT (
SELECT DISTINCT [Name], Products FROM #Data x
OUTER APPLY (
SELECT (
SELECT Product AS [Name], QuantitySold FROM #Data n WHERE n.[Name] = x.[Name]
FOR JSON PATH
) AS Products
) AS p
WHERE x.[Type] = t.[Type]
FOR JSON PATH
) AS Stores
) AS Stores
ORDER BY [Type]
FOR JSON PATH;
Returns
[{
"Type": "Big Store",
"Stores": [{
"Name": "Walmart",
"Products": [{
"Name": "Gummy Bears",
"QuantitySold": 10
}, {
"Name": "Toothbrush",
"QuantitySold": 6
}]
}]
}, {
"Type": "Small Store",
"Stores": [{
"Name": "Target",
"Products": [{
"Name": "Toothbrush",
"QuantitySold": 2
}]
}]
}]

If you had normalized data structure you could use a another approach.
--Let's assume that Types are stored like this
DECLARE #Types TABLE (
id int,
Type nvarchar(20)
);
INSERT INTO #Types VALUES (1, N'Big Store'), (2, N'Small Store');
--Stores in separate table
DECLARE #Stores TABLE (
id int,
Name nvarchar(10),
TypeId int
);
INSERT INTO #Stores VALUES (1, N'Walmart', 1), (2, N'Target', 2),
(3, N'Tesco', 2); -- I added one more just for fun
--Products table
DECLARE #Products TABLE (
id int,
Name nvarchar(20)
);
INSERT INTO #Products VALUES (1, N'Gummy Bears'), (2, N'Toothbrush'),
(3, N'Milk'), (4, N'Ball') -- Added some here
-- And here comes the sales
DECLARE #Sales TABLE (
StoreId int,
ProductId int,
QuantitySold int
);
INSERT INTO #Sales VALUES (1, 1, 10), (1, 2, 6), (2, 2, 2),
(3, 4, 15), (3, 3, 7); -- I added few more
Now we can join the tables a get result that you need
SELECT Type = Type.Type,
Name = [Stores].Name,
Name = Products.Product,
QuantitySold = Products.QuantitySold
FROM (
SELECT s.StoreId,
p.Name Product,
s.QuantitySold
FROM #Sales s
INNER JOIN #Products p
ON p.id = s.ProductId
) Products
INNER JOIN #Stores Stores
ON Stores.Id = Products.StoreId
INNER JOIN #Types [Type]
ON Stores.TypeId = [Type].id
ORDER BY Type.Type, [Stores].Name
FOR JSON AUTO;
Output:
[
{
"Type": "Big Store",
"Stores": [
{
"Name": "Walmart",
"Products": [
{
"Name": "Gummy Bears",
"QuantitySold": 10
},
{
"Name": "Toothbrush",
"QuantitySold": 6
}
]
}
]
},
{
"Type": "Small Store",
"Stores": [
{
"Name": "Target",
"Products": [
{
"Name": "Toothbrush",
"QuantitySold": 2
}
]
},
{
"Name": "Tesco",
"Products": [
{
"Name": "Ball",
"QuantitySold": 15
},
{
"Name": "Milk",
"QuantitySold": 7
}
]
}
]
}
]

Related

How to return result of a join into a single property in a Postgres query?

Suppose the following,
CREATE SCHEMA IF NOT EXISTS my_schema;
CREATE TABLE IF NOT EXISTS my_schema.my_table_a (
id serial PRIMARY KEY
);
CREATE TABLE IF NOT EXISTS my_schema.my_table_b (
id serial PRIMARY KEY,
my_table_a_id BIGINT REFERENCES my_schema.my_table_a (id) NOT NULL
);
INSERT INTO my_schema.my_table_a VALUES
(1);
INSERT INTO my_schema.my_table_b VALUES
(1, 1),
(2, 1),
(3, 1);
If I run the following query,
SELECT
ta.*,
tb as tb
FROM my_schema.my_table_a ta
LEFT JOIN my_schema.my_table_b tb
ON ta.id = tb.my_table_a_id;
Then the result is,
[
{
"id": 1,
"tb": {
"id": 1,
"my_table_a_id": 1
}
},
{
"id": 1,
"tb": {
"id": 2,
"my_table_a_id": 1
}
},
{
"id": 1,
"tb": {
"id": 3,
"my_table_a_id": 1
}
}
]
How can I get it to work like this:
[
{
"id": 1,
"tb": [
{
"id": 1,
"my_table_a_id": 1
},
{
"id": 2,
"my_table_a_id": 1
},
{
"id": 3,
"my_table_a_id": 1
}
]
}
]
SELECT
ta.*,
ARRAY_AGG(tb) AS tb
FROM my_schema.my_table_a ta, my_schema.my_table_b tb
GROUP BY ta.id
ORDER BY ta.id;
Example https://www.db-fiddle.com/f/5i97YZ6FMRY48pZaJ255EJ/0

query json document SQL Server

I'm trying read data from a json document stored on azure data lake storage from an Azure SQL database using the below query. I tried a couple of ways and nothing seems to bring the data back. I tried using this '$.data[0].AccID' in OPENJSON for instance and brings back only the first record from multiple arrays but, not sure how to bring back all the data.
json:
[
{
"data": [
{
"AccID": 1234,
"CustID": 456,
"Total": 1234.1234,
"OrderDate": "2022-12-01"
},
{
"AccID": 5678,
"CustID": 890,
"Total": 5678.5678,
"OrderDate": "2022-12-01"
}
],
"count": 2
},
{
"data": [
{
"AccID": 1234,
"CustID": 456,
"Total": 100.0,
"OrderDate": "2021-12-01"
},
{
"AccID": 5678,
"CustID": 890,
"Total": 200.0,
"OrderDate": "2021-12-01"
},
{
"AccID": 8900,
"CustID": 235,
"Total": 300.0,
"OrderDate": "2021-12-01"
}
],
"count": 3
}
]
Query:
SELECT *
FROM OPENROWSET (
BULK 'blobpath/file.json',
DATA_SOURCE = 'adls',
SINGLE_CLOB
) AS [data]
CROSS APPLY OPENJSON (X.BulkColumn, '$.value')
WITH (
AccID int,
CustID int,
Total float,
OrderDate date)
I created storage account and uploaded Json file into container,
reference image.
input .json:
[
{
"data": [
{
"AccID": 1234,
"CustID": 456,
"Total": 1234.1234,
"OrderDate": "2022-12-01"
},
{
"AccID": 5678,
"CustID": 890,
"Total": 5678.5678,
"OrderDate": "2022-12-01"
}
],
"count": 2
},
{
"data": [
{
"AccID": 1234,
"CustID": 456,
"Total": 100.0,
"OrderDate": "2021-12-01"
},
{
"AccID": 5678,
"CustID": 890,
"Total": 200.0,
"OrderDate": "2021-12-01"
},
{
"AccID": 8900,
"CustID": 235,
"Total": 300.0,
"OrderDate": "2021-12-01"
}
],
"count": 3
}
]
I generated sas token and created masterkey and data source. I created a table in sql with following columns
Create table data1( ACCID varchar(100),
CustID varchar(100),
Total float(100),
OrderDate date,
count int )
Insert data into that table using below code:
INSERT INTO data1
SELECT ACCID,CustID,Total,OrderDate,count FROM OPENROWSET(
BULK 'jsonfile path',
DATA_SOURCE = 'your data source'
SINGLE_CLOB
) AS DataFile
cross apply openjson(BulkColumn)
WITH (
AccID varchar(100) '$.data[0].AccID',
CustID varchar(100) '$.data[0].CustID',
Total float '$.data[0].Total',
OrderDate date '$.data[0].OrderDate',
count int '$.count'
)
INSERT INTO data1
SELECT ACCID,CustID,Total,OrderDate,count FROM OPENROWSET(
BULK 'jsonfile path',
DATA_SOURCE = 'your data source'
SINGLE_CLOB
) AS DataFile
cross apply openjson(BulkColumn)
WITH (
AccID varchar(100) '$.data[1].AccID',
CustID varchar(100) '$.data[1].CustID',
Total float '$.data[1].Total',
OrderDate date '$.data[1].OrderDate',
count int '$.count'
)
INSERT INTO data1
SELECT ACCID,CustID,Total,OrderDate,count FROM OPENROWSET(
BULK 'jsonfile path',
DATA_SOURCE = 'your data source'
SINGLE_CLOB
) AS DataFile
cross apply openjson(BulkColumn)
WITH (
AccID varchar(100) '$.data[2].AccID',
CustID varchar(100) '$.data[2].CustID',
Total float '$.data[2].Total',
OrderDate date '$.data[2].OrderDate',
count int '$.count'
)
Data is inserted successfully.
I retrieved the data of table I got as below
I deleted the Null values row using below code
Image for reference:
delete from data1 where ACCID is Null
I retrieved the data of table order by count according to the json using below code
select * from data1 order by count
Output:
In this way I retrieve all the data from Json document.

Creating JSON Nested Level Arrays with Query

I'm trying to recreate a JSON output that is about 5 levels deep. I can recreate 2 levels deep, but after that, I'm stumped. It has worked using FOR JSON AUTO, but I need control over the naming and how they appear, so I need to be able to use FOR JSON PATH.
Below is sample code and would allow for a 3 level deep output. I'm hoping that whatever is needed to make it 3 levels, I can learn from to go to 5 levels.
CREATE TABLE [FamilyTree](
[ID] INT NOT NULL ,
[Name] VARCHAR(250) NOT NULL,
[ParentID] INT NOT NULL,
) ON [PRIMARY]
GO
INSERT [FamilyTree]([ID],[Name],[ParentID])
VALUES(1,N'Person1',0)
INSERT [FamilyTree]([ID],[Name],[ParentID])
VALUES(2,N'Person2',0)
INSERT [FamilyTree]([ID],[Name],[ParentID])
VALUES(3,N'Person3',1)
INSERT [FamilyTree]([ID],[Name],[ParentID])
VALUES(4,N'Person4',2)
INSERT [FamilyTree]([ID],[Name],[ParentID])
VALUES(5,N'Person5',3)
INSERT [FamilyTree]([ID],[Name],[ParentID])
VALUES(6,N'Person6',3)
INSERT [FamilyTree]([ID],[Name],[ParentID])
VALUES(7,N'Person7',4)
INSERT [FamilyTree]([ID],[Name],[ParentID])
VALUES(8,N'Person8',4)
INSERT [FamilyTree]([ID],[Name],[ParentID])
VALUES(9,N'Person9',4)
Using this query:
SELECT
FT1.Name AS [name],
(SELECT Name FROM FamilyTree WHERE ParentID = FT1.ID FOR JSON PATH) children
FROM FamilyTree FT1
WHERE FT1.ParentID = 0
FOR JSON PATH
I can get the following output:
[
{
"name": "Person1",
"children": [
{
"Name": "Person3"
}
]
},
{
"name": "Person2",
"children": [
{
"Name": "Person4"
}
]
}
]
but I want to be able to get it to go even further to look like this:
[
{
"name": "Person1",
"children": [
{
"Name": "Person3",
"children": [
{
"name": "Person 5",
"name": "Person 6",
}
]
}
]
},
{
"name": "Person2",
"children": [
{
"name": "Person4",
"children": [
{
"name": "Person 7",
"name": "Person 8",
"name": "Person 9"
}
]
}
]
}
]
You can just keep nesting subqueries if you want
SELECT
ft1.ID,
ft1.Name,
ft2.children
FROM FamilyTree ft1
CROSS APPLY (
SELECT
ft2.ID,
ft2.Name,
ft3.children
FROM FamilyTree ft2
CROSS APPLY (
SELECT
ft3.ID,
ft3.Name,
ft4.children
FROM FamilyTree ft3
CROSS APPLY (
SELECT
ft4.ID,
ft4.Name,
ft5.children
FROM FamilyTree ft4
CROSS APPLY (
SELECT
ft5.ID,
ft5.Name
FROM FamilyTree ft5
WHERE ft5.ParentID = ft4.ID
FOR JSON PATH
) ft5(children)
WHERE ft4.ParentID = ft3.ID
FOR JSON PATH
) ft4(children)
WHERE ft3.ParentID = ft2.ID
FOR JSON PATH
) ft3(children)
WHERE ft2.ParentID = ft1.ID
FOR JSON PATH
) ft2(children)
WHERE ft1.ParentID IS NULL
FOR JSON PATH;
db<>fiddle
To do this recursively, or for that matter to prevent duplication of code, you cannot use a Table Valued Function. You can only do this with a scalar UDF (oh the horror!).
CREATE OR ALTER FUNCTION dbo.GetJson (#ParentID int)
RETURNS nvarchar(max)
AS
BEGIN
RETURN (
SELECT
ft.ID,
ft.Name,
children = dbo.GetJson(ft.ID)
FROM FamilyTree ft
WHERE EXISTS (SELECT ft.ParentID INTERSECT SELECT #ParentID) -- null compare
FOR JSON PATH
);
END;
SELECT dbo.GetJson(NULL);
db<>fiddle
Note that in both of these examples, root nodes have a ParentID of NULL not 0. This is the correct way to do it, as there is no 0 row.
Try this, Mabe its usful:
DECLARE #FamilyTree TABLE (
[ID] INT NOT NULL ,
[Name] VARCHAR(250) NOT NULL,
[ParentID] INT NOT NULL
)
INSERT #FamilyTree([ID],[Name],[ParentID])
VALUES(1,N'Person1',0)
INSERT #FamilyTree([ID],[Name],[ParentID])
VALUES(2,N'Person2',0)
INSERT #FamilyTree([ID],[Name],[ParentID])
VALUES(3,N'Person3',1)
INSERT #FamilyTree([ID],[Name],[ParentID])
VALUES(4,N'Person4',2)
INSERT #FamilyTree([ID],[Name],[ParentID])
VALUES(5,N'Person5',3)
INSERT #FamilyTree([ID],[Name],[ParentID])
VALUES(6,N'Person6',3)
INSERT #FamilyTree([ID],[Name],[ParentID])
VALUES(7,N'Person7',4)
INSERT #FamilyTree([ID],[Name],[ParentID])
VALUES(8,N'Person8',4)
INSERT #FamilyTree([ID],[Name],[ParentID])
VALUES(9,N'Person9',4)
SELECT
FT1.Name AS [name],children.Name,grandchild.Name AS grandchild
FROM #FamilyTree FT1
INNER JOIN (SELECT * FROM #FamilyTree ) children ON children.ParentID = FT1.ID
INNER JOIN (SELECT * FROM #FamilyTree ) grandchild ON grandchild.ParentID = children.ID
WHERE FT1.ParentID = 0
FOR JSON AUTO
For each level, you can set another "INNER JOIN"
It's the result:
[
{
"name": "Person1",
"children": [
{
"Name": "Person3",
"grandchild": [
{
"grandchild": "Person5"
},
{
"grandchild": "Person6"
}
]
}
]
},
{
"name": "Person2",
"children": [
{
"Name": "Person4",
"grandchild": [
{
"grandchild": "Person7"
},
{
"grandchild": "Person8"
},
{
"grandchild": "Person9"
}
]
}
]
}
]

JSON_QUERY to do a "Select Many"

I have a JSON variable that looks like this (the real one is more complex):
DECLARE #myJson VARCHAR(3000) = '{
"CustomerId": "123456",
"Orders": [{
"OrderId": "852",
"OrderManifests": [{
"ShippedProductId": 884,
"ProductId": 884
}, {
"ShippedProductId": 951,
"ProductId": 2564
}
]
}, {
"OrderId": "5681",
"OrderManifests": [{
"ShippedProductId": 198,
"ProductId": 4681
}, {
"ShippedProductId": 8188,
"ProductId": 8188
}, {
"ShippedProductId": 144,
"ProductId": 8487
}
]
}
]
}'
In the end, I need to know if any of the ShippedProductId values match their corresponding ProductId (in the same JSON object).
I started in by trying to get a list of all the OrderManifests. But while this will get me the array of orders:
SELECT JSON_QUERY(#myJson, '$.Orders')
I can't seem to find a way to get a list of all the OrderManifests across all the entries in the Orders array. This does not work:
SELECT JSON_QUERY(#myJson, '$.Orders.OrderManifests')
Is there a way to do a Select Many kind of query to get all the OrderManifests in the Orders array?
Use OPENJSON and CROSS APPLY to drill down into your objects.
This should do it for you:
SELECT j.CustomerId,o.OrderId, m.ShippedProductId, m.ProductId
FROM OPENJSON(#myJson)
WITH (
CustomerId NVARCHAR(1000),
Orders NVARCHAR(MAX) AS JSON
) j
CROSS APPLY OPENJSON(j.Orders)
WITH (
OrderId NVARCHAR(1000),
OrderManifests NVARCHAR(MAX) AS JSON
) o
CROSS APPLY OPENJSON(o.OrderManifests)
WITH (
ShippedProductId INT,
ProductId int
) m
WHERE m.ShippedProductId = m.ProductId;
This query returns:
CustomerId | OrderId | ShipedProductId | ProductId
------------+-----------+-------------------+-------------
123456 | 852 | 884 | 884
------------+-----------+-------------------+-------------
123456 | 5681 | 8188 | 8188

Recursively generate JSON tree from hierarchical table in Postgres and jOOQ

I have a hierarchical table in Postgres database, e.g. category. The structure is simple like this:
id
parent_id
name
1
null
A
2
null
B
3
1
A1
4
3
A1a
5
3
A1b
6
2
B1
7
2
B2
What i need to get from this table is recursive deep tree structure like this:
[
{
"id": 1,
"name": "A",
"children": [
{
"id": 3,
"name": "A1",
"children": [
{
"id": 4,
"name": "A1a",
"children": []
},
{
"id": 5,
"name": "A1b",
"children": []
}
]
}
]
},
{
"id": 2,
"name": "B",
"children": [
{
"id": 6,
"name": "B1",
"children": []
},
{
"id": 7,
"name": "B2",
"children": []
}
]
},
]
Is it possible with unknown depth using combination of WITH RECURSIVE and json_build_array() or some other solution?
I found an answer to this question in this excellent blog post here, as I was wondering how to generalise over this problem in jOOQ. It would be useful if jOOQ could materialise arbitrary recursive object trees in a generic way: https://github.com/jOOQ/jOOQ/issues/12341
In the meantime, use this SQL statement, which was inspired by the above blog post, with a few modifications. Translate to jOOQ if you must, though you might as well store this as a view:
WITH RECURSIVE
d1 (id, parent_id, name) as (
values
(1, null, 'A'),
(2, null, 'B'),
(3, 1, 'A1'),
(4, 3, 'A1a'),
(5, 3, 'A1b'),
(6, 2, 'B1'),
(7, 2, 'B2')
),
d2 AS (
SELECT d1.*, 0 AS level
FROM d1
WHERE parent_id IS NULL
UNION ALL
SELECT d1.*, d2.level + 1
FROM d1
JOIN d2 ON d2.id = d1.parent_id
),
d3 AS (
SELECT d2.*, jsonb_build_array() children
FROM d2
WHERE level = (SELECT max(level) FROM d2)
UNION (
SELECT (branch_parent).*, jsonb_agg(branch_child)
FROM (
SELECT
branch_parent,
to_jsonb(branch_child) - 'level' - 'parent_id' AS branch_child
FROM d2 branch_parent
JOIN d3 branch_child ON branch_child.parent_id = branch_parent.id
) branch
GROUP BY branch.branch_parent
UNION
SELECT d2.*, jsonb_build_array()
FROM d2
WHERE d2.id NOT IN (
SELECT parent_id FROM d2 WHERE parent_id IS NOT NULL
)
)
)
SELECT jsonb_pretty(jsonb_agg(to_jsonb(d3) - 'level' - 'parent_id')) AS tree
FROM d3
WHERE level = 0;
dbfiddle. Again, read the linked blog post for an explanation of how this works