Snowflake - Object Construct - Json Value - json

I currently have a table in snowflake as below
Address Zip State
123 St. 94143 CA
3432 St. 93059 TX
I wan to create a json block as below:
{
"Address" : 123 St.,
"Zip" : 93059,
"State" : CA
},
{
"Address" : 3432 St.,
"Zip" : 94143,
"State" : TX
}
I have the code below:
select
OBJECT_CONSTRUCT (
'Address', Address ,
'Zip', Zip,
'State', State )
as json_value
from example_table_above;
The above currently returns sometimes single records of each key pair value, but not in a blob at all times.
ie it returns just this - no real patttern or seperation by record:
"JSON_VALUE"
{"Address":adsf}
{"Address":"Triang St"}
{"Zip":949}
{"State":CA}

This is a subtle point, but the JSON block you want is not valid. In order to make it valid, it needs to be wrapped with an outer array:
[
{
"Address": "123 St.",
"State": "CA",
"Zip": 94143
},
{
"Address": "3422 St",
"State": "TX",
"Zip": 93059
}
]
To get Snowflake to construct a JSON document like that, simply wrap the OBJECT_CONSTRUCT function with ARRAY_AGG like this:
select array_agg(object_construct('Address', address, 'Zip', zip, 'State', state)) from T1;

Can you provide a bit more detail on what you need to do with this large JSON object(s)? Are you looking to output it from Snowflake as result set or to a file, or do you want to insert it back into another table in Snowflake?
With the unconstrained array_agg all qualifying rows from your query are being grouped into a single object, and if your table exceeds the 16777216 byte array/object limit, as in your case, you get the array size error your seeing.
Array_Agg and Object_Agg are aggregate functions, so if there is a grouping you can use to keep each Array/Object within the size limit you could use that?
e.g. grouping by STATE reduces each JSON object by ~1/50th in size.
Select
State,
ARRAY_AGG(
OBJECT_CONSTRUCT (
'Address', Address ,
'Zip', Zip,
'State', State )
)
as json_value
from T
Group By State;
How to unload single large JSON Object > 16777216 Bytes
Create some chunky test data
Create or Replace Table T as
Select
(MOD(ABS(RANDOM()),999)||' '||
UPPER(RANDSTR(
ABS(MOD(RANDOM(),20)),
RANDOM() ))||' '||
ARRAY_CONSTRUCT( 'Rd.', 'St.', 'Av.', 'Way', 'Dr.','Cl.')[MOD(ABS(RANDOM()),6)]) ADDRESS,
RIGHT(RANDOM(),5) ZIP,
ARRAY_CONSTRUCT( 'AL', 'AK', 'AS', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'DC',
'FM', 'FL', 'GA', 'GU', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS',
'KY', 'LA', 'ME', 'MH', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO',
'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'MP',
'OH', 'OK', 'OR', 'PW', 'PA', 'PR', 'RI', 'SC', 'SD', 'TN',
'TX', 'UT', 'VT', 'VI', 'VA', 'WA', 'WV', 'WI', 'WY')[MOD(ABS(RANDOM()),59)]::VARCHAR STATE
from
table(GENERATOR( ROWCOUNT => 400000))
;
Create a view to construct JSON data as multi-line text
Create or Replace View V as
With
rc as (Select count(*) row_count from T),
ro as (Select
row_number() over ( order by NULL) as RID,
(object_construct(*))::VARCHAR row_obj
from T),
json_data as (
Select 0 as RID, '['::VARCHAR as JSON_LINE
UNION ALL
Select RID, row_obj as JSON_LINE from ro where RID = 1
UNION ALL
Select RID, ','||row_obj as JSON_LINE from ro where RID > 1
UNION ALL
Select row_count+1 as RID, ']'::VARCHAR as JSON_LINE from rc
)
Select RID, JSON_LINE from json_data
order by RID
;
Check the view output.
Select the 10 first and last records, including the array '[' ']' rows
Select RID, JSON_LINE from V where RID <=10 or RID >= 399990;
Create CSV stage to unload into
Note: file_format options to ensure escape characters aren't introduced
create or replace stage T_STAGE_CSV file_format = (
type = 'CSV'
skip_header = 1
ESCAPE = ' '
COMPRESSION = NONE);
Copy data from the View into the Stage.
copy into #TEST.TEST.T_STAGE_CSV from (Select JSON_LINE from V);
Check stage as output may get split across more than one file, so you will need to concatenate the files together externally of Snowflake!
list #TEST.TEST.T_STAGE_CSV;
Pull files to your client.
GET #TEST.TEST.T_STAGE_CSV/ file:////Users/fieldy/Documents/_sql_code/data;
Concatenate the files together externally
e.g.
cat /Users/fieldy/Documents/_sql_code/data/data* > /Users/fieldy/Documents/_sql_code/datadata.json

Related

How do I create a nested json object from xml data in oracle

I am trying to parse xml in my oracle DB to return individual json object. I have different xpath. I would like to extract data from oracle xml in a nested json format. I have multiple applicants in my xml data, I would like to extract the data as a nested json format to know what data belong to an applicant
Result format example
{
"loanApplication": {
"applicantGroup": [
{
"applicant": {
"birthDate": "1-1-1",
"maritalStatusDd": "3",
"languagePreferenceDd": "0",
"assetTypeDd": [1, 6],
"asset": [1500, 60000],
"Liabilities": [500, 600, 400],
"sumOfAsset": 61500,
"sumOfliabilities": 1500
}
},
{
"applicant": {
"birthDate": "2-2-2",
"maritalStatusDd": "0",
"languagePreferenceDd": "0",
"assetTypeDd": [2, 6, 9],
"asset": [5000, 20000, 100],
"Liabilities": [500, 600, 400],
"sumOfAsset": 25100,
"sumOfliabilities": 1500
}
}
]
}
}
sample data from xml
WITH t( xml ) AS
(
SELECT XMLType('<loanApplication xmlns="http://www.abcdef.com/Schema/FCX/1"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<applicantGroup>
<applicantGroupTypeDd>0</applicantGroupTypeDd>
<applicant>
<asset>
<assetDescription>neweg</assetDescription>
<assetTypeDd>1</assetTypeDd>
<assetValue>1500.0</assetValue>
</asset>
<asset>
<assetDescription>RayM</assetDescription>
<assetTypeDd>6</assetTypeDd>
<assetValue>60000</assetValue>
</asset>
<liability>
<broker>
<liabilityAmount>9999999.8</liabilityAmount>
<liabilityDescription>CI</liabilityDescription>
<liabilityTypeDd>2</liabilityTypeDd>
</broker>
</liability>
<employmentHistory>
<income>
<annualIncomeAmount>150000.0</annualIncomeAmount>
<incomeAmount>150000.0</incomeAmount>
<incomeDescription>income description</incomeDescription>
<incomePeriodDd>0</incomePeriodDd>
<incomeTypeDd>6</incomeTypeDd>
</income>
</employmentHistory>
</applicant>
</applicantGroup>
<applicantGroup>
<applicantGroupTypeDd>1</applicantGroupTypeDd>
<applicant>
<asset>
<assetDescription>neweg</assetDescription>
<assetTypeDd>2</assetTypeDd>
<assetValue>5000.0</assetValue>
</asset>
<asset>
<assetDescription>Bay</assetDescription>
<assetTypeDd>6</assetTypeDd>
<assetValue>20000</assetValue>
</asset>
<asset>
<assetDescription>TDC</assetDescription>
<assetTypeDd>9</assetTypeDd>
<assetValue>100</assetValue>
</asset>
<liability>
<broker>
<liabilityAmount>9999999.8</liabilityAmount>
<liabilityDescription>CI</liabilityDescription>
<liabilityTypeDd>2</liabilityTypeDd>
</broker>
</liability>
<liability>
<broker>
<liabilityAmount>9999999.8</liabilityAmount>
<liabilityDescription>CI</liabilityDescription>
<liabilityTypeDd>2</liabilityTypeDd>
</broker>
</liability>
<employmentHistory>
<income>
<annualIncomeAmount>150000.0</annualIncomeAmount>
<incomeAmount>150000.0</incomeAmount>
<incomeDescription>income description</incomeDescription>
<incomePeriodDd>0</incomePeriodDd>
<incomeTypeDd>6</incomeTypeDd>
</income>
</employmentHistory>
</applicant>
</applicantGroup>
</loanApplication>')
FROM dual
)
SELECT JSON_OBJECT (
KEY 'Assets' value y.Assets
,KEY 'assetType' VALUE (SELECT JSON_ARRAYAGG( val) FROM
XMLTABLE(XMLNAMESPACES(DEFAULT 'http://www.abcdef.com/Schema/FCX/1'),'/loanApplication/applicantGroup/applicant/asset/assetTypeDd'
PASSING (EXTRACT(xml, '/loanApplication', 'xmlns="http://www.abcdef.com/Schema/FCX/1"') ) COLUMNS val INT PATH './text()') )
,KEY 'liability' VALUE (SELECT JSON_ARRAYAGG( val) FROM
XMLTABLE(XMLNAMESPACES(DEFAULT 'http://www.abcdef.com/Schema/FCX/1'),'/loanApplication/applicantGroup/applicant/liability/broker/BorrowerLiabilityType'
PASSING (EXTRACT(xml, '/loanApplication', 'xmlns="http://www.abcdef.com/Schema/FCX/1"') ) COLUMNS val INT PATH './text()') )
) applicant
FROM t,
XMLTABLE(XMLNAMESPACES(DEFAULT 'http://www.abcdef.com/Schema/FCX/1'), '/loanApplication/applicantGroup/applicant/asset'
PASSING xml
COLUMNS
Assets INT PATH 'assetValue') y
You can start with grouping by applicantGroupTypeDd in order to seperate innermost objects(applicant), and go on with suitable function(JSON_OBJECT or JSON_ARRAYAGG) to the topmost level such as
WITH t(xml) AS
(SELECT XMLType(<yourXMLvalue>)
FROM dual)
SELECT JSON_OBJECT(KEY 'loanApplication' VALUE
JSON_OBJECT(KEY 'applicantGroup' VALUE
JSON_ARRAYAGG(JSON_OBJECT(KEY 'applicant'
VALUE JSON_OBJECT(KEY 'assetTypeDd' VALUE JSON_ARRAYAGG(assetTypeDd),
KEY 'asset' VALUE JSON_ARRAYAGG(assetValue),
KEY 'sumOfAsset' VALUE SUM(assetValue))))))
FROM t,
XMLTABLE(XMLNAMESPACES(DEFAULT 'http://www.abcdef.com/Schema/FCX/1'),
'/loanApplication/applicantGroup' PASSING t.xml COLUMNS
applicantGroupTypeDd INT PATH 'applicantGroupTypeDd',
asset XMLTYPE PATH 'applicant/asset') t1,
XMLTABLE(XMLNAMESPACES(DEFAULT 'http://www.abcdef.com/Schema/FCX/1'),
'asset' PASSING t1.asset COLUMNS
assetTypeDd INT PATH 'assetTypeDd',
assetValue INT PATH 'assetValue') t2
GROUP BY applicantGroupTypeDd
Demo

MySQL JSON Array Column Count By Value

For example: create a JSON Array:
select JSON_ARRAY(JSON_OBJECT('check1', "false", 'check2', 'false'),
JSON_OBJECT('check2', "true", 'check3', 'true'),
JSON_OBJECT('check3', "false", 'check4', 'false')) as c1;
[{"check1": "false", "check2": "false"}, {"check2": "true", "check3": "true"}, {"check3": "false", "check4": "false"}]
The key is dynamic in each object. How to count how many value is false for each object once only?
I am using JSON_SEARCH(array, 'all', 'false'); It returns
["$[0].check1", "$[0].check2", "$[2].check3", "$[2].check4"]
I would like to get something like
[$[0], $[1]]
to get length is 2.
Thanks.
WITH
-- source data
cte1 AS ( SELECT JSON_ARRAY(JSON_OBJECT('check1', "false", 'check2', 'false'),
JSON_OBJECT('check2', "true", 'check3', 'true'),
JSON_OBJECT('check3', "false", 'check4', 'false')) c1),
-- search for specified value
cte2 AS ( SELECT JSON_SEARCH(c1, 'all', 'false') c1
FROM cte1 )
-- parse searching result, count distinct elements
SELECT COUNT(DISTINCT SUBSTRING_INDEX(value, '.', 1)) cnt
FROM cte2
CROSS JOIN JSON_TABLE(CAST(cte2.c1 AS JSON),
"$[*]" COLUMNS ( value VARCHAR(254) PATH "$" )) jsontable;
GROUP BY SUBSTRING_INDEX(value, '.', 1)

how to maintain order of elements using snowflake object_construct() function instead of sorting by the keys?

Following snowflake query returns the JSON structure but output is sorted by the keys. How not to sort by the keys but retains the order? Is there any parameter setting that needs to be set?
select
object_construct
(
'entity', 'XYZ',
'allowed', 'Yes',
'currency', 'USD',
'statement_month','July, 2020'
)
Output: --it sorts by the keys
{
"allowed": "Yes",
"currency": "USD",
"entity": "XYZ",
"statement_month": "July, 2020"
}
Expected Output: --same order as specified
{
"entity": "XYZ",
"allowed": "Yes",
"currency": "USD",
"statement_month": "July, 2020"
}
JSON is an unordered collection of name and values. Order cannot be guaranteed in JSON.
The constructed object does not necessarily preserve the original order of the key-value pairs.
You can do it like as below
SELECT mytable:entity::string as entity,
mytable:allowed::string as allowed,
mytable:currency::string as currency,
mytable:statement_month::string as statement_month
from
(select
object_construct
(
'entity', 'XYZ',
'allowed', 'Yes',
'currency', 'USD',
'statement_month','July, 2020'
) mytable);
Unfortunately, no
Usage notes:
https://docs.snowflake.com/en/sql-reference/functions/object_construct.html#usage-notes
The constructed object does not necessarily preserve the original order of the key-value pairs.
same for PARSE_JSON Usage notes:
https://docs.snowflake.com/en/sql-reference/functions/parse_json.html#usage-notes
The order of the key-value pairs in the string produced by TO_JSON is not predictable.
The order was found to be maintained when using object_construct(*):
WITH base AS (
SELECT 'XYZ' "entity", 'Yes' "allowed", 'USD' "currency", 'July, 2020' "statement_month")
SELECT object_construct(*) FROM base;

Correctly return column as JSON Array in MySQL after using CONCAT, GROUP_CONCAT and JSON_OBJECT

I'm using MySQL in a Node.JS API, so I need to get data from the database as JSON objects/arrays.
I'm trying to get a JSON Array nested within the result JSON as one of the values, so this is my current query:
SELECT
l.id AS id, l.description AS description, l.parent AS parent,
(
SELECT CONCAT(
'[',
GROUP_CONCAT(
JSON_OBJECT(
'id', a.id, 'description', a.description,
'ip', a.ip, 'lastmovementdetected', a.lastmovementdetected
)
),
']'
)
FROM airconditioners AS a WHERE location = l.id
) AS airconditioners
FROM locations as l`
However, this is the query result (actual output is an array of these JSON objects):
{
"id": 1,
"description": "Meu quarto",
"parent": 0,
"airconditioners": "[{\"id\": 1, \"ip\": \"192.168.137.96\", \"description\": \"Ar-condicionado\", \"lastmovementdetected\": null},{\"id\": 2, \"ip\": \"192.168.0.1\", \"description\": \"Ar-condicionado\", \"lastmovementdetected\": null},{\"id\": 3, \"ip\": \"192.168.0.1\", \"description\": \"Ar-condicionado\", \"lastmovementdetected\": null}]"
}
SQL is returning the JSON Array as a String and it's also escaping the double quotes from within the JSON.
This is the expected return:
"id": 1,
"description": "Meu quarto",
"parent": 0,
"airconditioners": [
{
"id":1,
"ip":"192.168.137.96",
"description":"Ar-condicionado",
"lastmovementdetected":null
},
{
"id":2,
"ip":"192.168.0.1",
"description":"Ar-condicionado",
"lastmovementdetected":null
},
{
"id":3,
"ip":"192.168.0.1",
"description":"Ar-condicionado",
"lastmovementdetected":null
}
]
Can this be done using a SQL query only? Or I'll have to treat the result before sending the response on the API?
I've tried surrounding the column with a CAST((SELECT...) AS JSON) AS airconditioners and also putting JSON_UNQUOTE() in many places, with no success whatsoever.
EDIT
I couldn't get to a conclusion whether MySQL is compatible with what I want or not. But, for instance, I'm using the following Javascript code to work around it:
Object.keys(result).forEach(key => {
let airconditioners = result[key].airconditioners;
if(airconditioners == null) {
// If the airconditioner field is null, then we replace it with an empty array
result[key].airconditioners = [];
} else {
result[key].airconditioners = JSON.parse(airconditioners);
}
});
use JSON_EXTRACT then get result as you expect
SELECT
l.id AS id, l.description AS description, l.parent AS parent,
(
SELECT JSON_EXTRACT( IFNULL(
CONCAT(
'[',
GROUP_CONCAT(
JSON_OBJECT(
'id', a.id, 'description', a.description,
'ip', a.ip, 'lastmovementdetected', a.lastmovementdetected
)
),
']'
)
,'[]'),'$')
FROM airconditioners AS a WHERE location = l.id
) AS airconditioners
FROM locations as l`

Generate nested json with couting in Postgresql

I created a simple database (in latest stable postgresql), like this:
create table table_a(id int primary key not null, name char(10));
create table table_b(id int primary key not null, name char(10), parent_a_id int);
create table table_c(id int primary key not null, name char(10), parent_a_id int, parent_b_id int, parent_c_id int, c_number int);
create table table_d(id int primary key not null, name char(10), parent_c_id int, d_number int);
with some example data like this:
insert into table_a(id, name) values(1, "a");
insert into table_b(id, name, parent_a_id) values(1, "b", 1);
insert into table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(1, "c1", 1, 1, null, 1);
insert into table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(2, "c1.1", 1, 1, 1, 5);
insert into table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(3, "c1.1.1", 1, 1, 2, 2);
insert into table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(4, "c1.2", 1, 1, 1, 8);
insert into table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(5, "c2", 1, 1, null, 4);
insert into table_d(id, name, parent_c_id, d_number) values(1, "c1_d1", 1, 5);
insert into table_d(id, name, parent_c_id, d_number) values(2, "c1.1_d1", 2, 6);
insert into table_d(id, name, parent_c_id, d_number) values(3, "c1.1_d2", 2, 1);
insert into table_d(id, name, parent_c_id, d_number) values(4, "c1.1.1_d1", 3, 2);
insert into table_d(id, name, parent_c_id, d_number) values(5, "c2_d1", 5, 4);
insert into table_d(id, name, parent_c_id, d_number) values(6, "c2_d2", 5, 3);
insert into table_d(id, name, parent_c_id, d_number) values(7, "c2_d3", 5, 7);
Now I want to generate json like this: http://codebeautify.org/jsonviewer/cb9bc2a1
With relation rules:
table_a has many table_b
table_b has one table_a and has many table_c (select only where table_c_id is null)
table_c has one table_a and has one table_b and has many table_c (children) and has one table_c (parent)
and couting rules:
table_c has d_numbers_sum (sum of d_number in table_d and sum of d_numbers_sum in table_c relation )
table_b has d_numbers_sum (sum of d_numbers_sum in table_c relation )
table_a has d_numbers_sum (sum of d_numbers_sum in table_b relation )
table_c has real_c_number (if has children_c then sum of real_c_number in table_c relation else c_number)
table_b has real_c_number_sum (sum of real_c_number in table_c relation )
table_a has real_c_number_sum (sum of real_c_number_sum in table_b relation )
Is it possible to generate that JSON with that rules in pure postgresql code?
Is it possible to generate shourtcat function for this like:
select * from my_shourtcat where id = ?;
or whitout id (generate json array):
select * from my_shourtcat;
Can you show me an example with description (how to generate nested json and couting), so I could use relations similar, but more complex that these in my app?
EDIT:
I wrote something interesting, but it's not 100% nested hash - here all leaf has own tree and result is an array of these trees I need to deep merge that array to create array of unique trees:
with recursive j as (
SELECT c.*, json '[]' children -- at max level, there are only leaves
FROM test.table_c c
WHERE (select count(1) from test.table_c where parent_c_id = c.id) = 0
UNION ALL
-- a little hack, because PostgreSQL doesn't like aggregated recursive terms
SELECT (c).*, array_to_json(array_agg(j)) children
FROM (
SELECT c, j
FROM j
JOIN test.table_c c ON j.parent_c_id = c.id
) v
GROUP BY v.c
)
SELECT json_agg(row_to_json(j)) json_tree FROM j WHERE parent_c_id is null;
The answer consists of two parts. First to rig up a basic json structure, and then to build up nested json objects from self-referencing column in table_c.
UPDATE: I rewrote example/part 2 as a pure sql solution, and added that code as example 3.
I also added a plsql function that encapsulates almost all code, that takes the name of a view as input to produce the nested json. See example 4.
All code requires Postgres 9.5.
The first code sets up a json object with most joins except for the nested children in table_c. The counting part is mostly left out.
In the second code example I wrote a "merge" function in pure plpgsql, which should solve the nested json problem. This solution requires only PG9.5 and no extensions, since plpgsql is built in.
As an alternative, I found one other solution that requires plv8 installed which does a deep merge in javascript
).
Creating nested json is not trivial to do in pure sql, where the challenge is to merge the separate json trees we can get from a recursive CTE.
Code example 1
Creating the query as a view makes it easy to reuse the query to either return a json array of all objects from table_a, or return only one object with a given id.
I made some small changes to the data model and data. The code for a self-contained example follows:
--TABLES
DROP SCHEMA IF EXISTS TEST CASCADE;
CREATE SCHEMA test;
-- Using text instead of char(10), to avoid padding. For most databases text is the best choice.
-- Postgresql uses the same implementation the hood (char vs text)
-- Source: https://www.depesz.com/2010/03/02/charx-vs-varcharx-vs-varchar-vs-text/
create table test.table_a(id int primary key not null, name text);
create table test.table_b(id int primary key not null, name text, parent_a_id int);
create table test.table_c(id int primary key not null, name text, parent_a_id int, parent_b_id int, parent_c_id int, c_number int);
create table test.table_d(id int primary key not null, name text, parent_c_id int, d_number int);
--DATA
insert into test.table_a(id, name) values(1, 'a');
-- Changed: parent_a_id=1 (instead of null)
insert into test.table_b(id, name, parent_a_id) values(1, 'b', 1);
insert into test.table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(1, 'c1', 1, 1, null, 1);
insert into test.table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(2, 'c1.1', 1, 1, 1, 5);
insert into test.table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(3, 'c1.1.1', 1, 1, 2, 2);
insert into test.table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(4, 'c1.2', 1, 1, 1, 8);
insert into test.table_c(id, name, parent_a_id, parent_b_id, parent_c_id, c_number) values(5, 'c2', 1, 1, null, 4);
insert into test.table_d(id, name, parent_c_id, d_number) values(1, 'c1_d1', 1, 5);
insert into test.table_d(id, name, parent_c_id, d_number) values(2, 'c1.1_d1', 2, 6);
insert into test.table_d(id, name, parent_c_id, d_number) values(3, 'c1.1_d2', 2, 1);
insert into test.table_d(id, name, parent_c_id, d_number) values(4, 'c1.1.1_d1', 3, 2);
insert into test.table_d(id, name, parent_c_id, d_number) values(5, 'c2_d1', 5, 4);
insert into test.table_d(id, name, parent_c_id, d_number) values(6,'c2_d2', 5, 3);
insert into test.table_d(id, name, parent_c_id, d_number) values(7, 'c2_d3', 5, 7);
CREATE OR REPLACE VIEW json_objects AS
--Root object
SELECT ta.id, json_build_object(
'id', ta.id,
'name', ta.name,
'd_numbers_sum', (SELECT sum(d_number) FROM test.table_d),
'real_c_number_sum', null,
'children_b', (
-- table_b
SELECT json_agg(json_build_object(
'id', tb.id,
'name', tb.name,
'd_numbers_sum', null,
'real_c_number_sum', null,
'children_c', (
-- table_c
SELECT json_agg(json_build_object(
'id', tc.id,
'name', tc.name,
'd_numbers_sum', null,
'real_c_number_sum', null,
'children_d', (
-- table_d
SELECT json_agg(json_build_object(
'id', td.id,
'name', td.name,
'd_numbers_sum', null,
'real_c_number_sum', null
))
FROM test.table_d td
WHERE td.parent_c_id = tc.id
)
))
FROM test.table_c tc
WHERE tc.parent_b_id = tb.id
)
))
FROM test.table_b tb
WHERE tb.parent_a_id = ta.id
)
) AS object
FROM test.table_a ta
-- Return json array of all objects
SELECT json_agg(object) FROM json_objects;
-- Return only json object with given id
SELECT object FROM json_objects WHERE id = 1
Code example 2
Here we map the data from table_c so we can insert it directly into a recursive CTE from the documentation, for readability and educational purposes.
Then prepares the data as input to the "merge" function. For simplicity I just aggregated the rows into a big json object. The performance should be ok.
We can choose to get the parent object, or only its children as an (json)array in the third function parameter.
Which node to get the children for is specified in the last query in the last lines of the example. This query can be used all places where we need the children for a table_c node.
I did test this on a more complex example and it looks like I sorted out most rough edges.
The three parts of the CTE (graph, search_graph and filtered_graph) can be refactored into one for performance, since CTE's are optimization fences for the database planner, but I kept this version for readability and debugging.
This example utilizes jsonb instead of json, see the documentation.
The reason for using jsonb here is not having to reparse the json each time we manipulate it in the function. When the function is done, the result is casted back to json so it can be inserted directly into the code in example 1.
--DROP VIEW test.tree_path_list_v CASCADE;
CREATE OR REPLACE VIEW test.tree_path_list_v AS
WITH RECURSIVE
-- Map the source data so we can use it directly in a recursive query from the documentation:
graph AS
(
SELECT id AS id, parent_c_id AS link, name, jsonb_build_object('id', id, 'name', name, 'parent_c_id', parent_c_id, 'parent_a_id', parent_a_id, 'parent_b_id', parent_b_id) AS data
FROM test.table_c
),
-- Recursive query from documentation.
-- http://www.postgresql.org/docs/current/static/queries-with.html
search_graph(id, link, data, depth, path, cycle) AS (
SELECT g.id, g.link, g.data, 1,
ARRAY[g.id],
false
FROM graph g
UNION ALL
SELECT g.id, g.link, g.data, sg.depth + 1,
path || g.id,
g.id = ANY(path)
FROM graph g, search_graph sg
WHERE g.id = sg.link AND NOT cycle
),
-- Decorate/filter the result so it can be used as input to the "test.create_jsonb_tree" function
filtered_graph AS (
SELECT
sg.path[1] AS id,
sg.path[2] AS parent_id,
sg.depth AS level,
sg.id AS start_id,
d.name,
sg.path,
d.data::jsonb AS json
FROM search_graph sg
INNER JOIN graph d ON d.id = sg.path[1]
ORDER BY level DESC
)
-- "Main" query
SELECT * FROM filtered_graph
;
-- Returns a json object with all children merged into its parents.
-- Parameter 1 "_tree_path_list": A json document with rows from the view "test.tree_path_list_v" aggregates as one big json.
-- Parameter 2 "_children_keyname": Choose the name for the children
CREATE OR REPLACE FUNCTION test.create_jsonb_tree(_tree_path_list jsonb, _children_keyname text DEFAULT 'children', _get_only_children boolean DEFAULT false)
RETURNS jsonb AS
$$
DECLARE
node_map jsonb := jsonb_build_object();
node_result jsonb := jsonb_build_array();
parent_children jsonb := jsonb_build_array();
node jsonb;
relation jsonb;
BEGIN
FOR node IN SELECT * FROM jsonb_array_elements(_tree_path_list)
LOOP
RAISE NOTICE 'Input (per row): %', node;
node_map := jsonb_set(node_map, ARRAY[node->>'id'], node->'json');
END LOOP;
FOR relation IN SELECT * FROM jsonb_array_elements(_tree_path_list)
LOOP
IF ( (relation->>'level')::int > 1 ) THEN
parent_children := COALESCE(node_map->(relation->>'parent_id')->_children_keyname, jsonb_build_array()) || jsonb_build_array(node_map->(relation->>'id'));
node_map := jsonb_set(node_map, ARRAY[relation->>'parent_id', _children_keyname], parent_children);
node_map := node_map - (relation->>'id');
ELSE
IF _get_only_children THEN
node_result := node_map->(relation->>'id')->_children_keyname;
ELSE
node_result := node_map->(relation->>'id');
END IF;
END IF;
END LOOP;
RETURN node_result;
END;
$$ LANGUAGE plpgsql
;
-- Aggregate the rows from the view into a big json object. The function
SELECT test.create_jsonb_tree(
( SELECT jsonb_agg( (SELECT x FROM (SELECT id, parent_id, level, name, json) x) )
FROM test.tree_path_list_v
WHERE start_id = 1 --Which node to get children for
),
'children'::text,
true
)::json
;
Output for example 2
[
{
"id": 2,
"name": "c1.1",
"children": [
{
"id": 3,
"name": "c1.1.1",
"parent_a_id": 1,
"parent_b_id": 1,
"parent_c_id": 2
}
],
"parent_a_id": 1,
"parent_b_id": 1,
"parent_c_id": 1
},
{
"id": 4,
"name": "c1.2",
"parent_a_id": 1,
"parent_b_id": 1,
"parent_c_id": 1
}
]
Code example 3: pure sql nested json solution
I rewrote the nested-json code to pure sql, and put it into an SQL function so we can reuse the code by parameterizing the start_ids (as an array)
I have not benchmarked the code yet, and it does not necessarily perform better than the sql+plpgsql solution. I had to (ab)use CTEs to loop through the result the same way I do in plgsql to add nodes to their parents. The solution for "merging" is essentialy procedural even though it is pure sql.
--DROP VIEW test.source_data_v CASCADE;
--Map your data (in this view) so it can be directly used in the recursive CTE.
CREATE OR REPLACE VIEW test.source_data_v AS
SELECT
id AS id,
parent_c_id AS parent_id,
name as name, -- Only for debugging: Give the node a name for easier debugging (a name is easier than an id)
--jsonb_build_object('id', tree_id, 'name', name, 'pid', parent_tree_id, 'children', jsonb_build_array()) AS data --Allow empty children arrays
jsonb_build_object('id', id, 'name', name, 'parent_id', parent_c_id) AS data -- Ignore empty children arrays
FROM test.table_c
;
SELECT * FROM test.source_data_v;
--DROP VIEW test.tree_path_list_v CASCADE;
CREATE OR REPLACE FUNCTION test.get_nested_object(bigint[])
RETURNS jsonb
AS $$
WITH RECURSIVE
search_graph(id, parent_id, data, depth, path, cycle) AS (
SELECT g.id, g.parent_id, g.data, 1,
ARRAY[g.id],
false
FROM test.source_data_v g
UNION ALL
SELECT g.id, g.parent_id, g.data, sg.depth + 1,
path || g.id,
g.id = ANY(path)
FROM test.source_data_v g, search_graph sg
WHERE g.id = sg.parent_id AND NOT cycle
),
transformed_result_graph AS (
SELECT
sg.path[1] AS id,
d.parent_id,
sg.depth AS level,
sg.id AS start_id,
d.name,
sg.path,
(SELECT string_agg(t.name, ' ') FROM (SELECT unnest(sg.path::int[]) AS id) a INNER JOIN test.source_data_v t USING (id)) AS named_path,
d.data
FROM search_graph sg
INNER JOIN test.source_data_v d ON d.id = sg.path[1]
WHERE sg.id = ANY($1) --Parameterized input for start nodes
ORDER BY level DESC, start_id ASC
),
-- Sort path list and build a map/index of all individual nodes which we loop through in the next CTE:
sorted_paths AS (
SELECT null::int AS rownum, *
FROM transformed_result_graph WHERE false
UNION ALL
SELECT
0, null, null, null, null, null, null, null,
(SELECT jsonb_object_agg(id::text, data) FROM transformed_result_graph) -- Build a map/index of all individual nodes
UNION ALL
SELECT row_number() OVER () as rownum, *
FROM transformed_result_graph c
ORDER BY level DESC, start_id ASC
),
build_tree_loop (rownum, level, id, parent_id, data, named_path, result) AS (
SELECT
rownum, level, id, parent_id, data,
named_path,
data -- First row has the complete node map
FROM sorted_paths
WHERE rownum = 0
UNION ALL
SELECT
c.rownum, c.level, c.id, c.parent_id, c.data,
c.named_path,
CASE WHEN (c.parent_id IS NULL) OR (prev.result->(c.parent_id::text) IS NULL)
THEN prev.result
WHEN c.parent_id IS NOT NULL
THEN jsonb_set(
prev.result - (c.id::text), -- remove node and add it as child
ARRAY[c.parent_id::text, 'children'],
COALESCE(prev.result->(c.parent_id::text)->'children',jsonb_build_array())||COALESCE(prev.result->(c.id::text), jsonb_build_object('msg','ERROR')), -- add node as child (and create empty children array if not exist)
true --add key (children) if not exists
)
END AS result
FROM sorted_paths c -- Join each row in "sorted_paths" with the previous row from the CTE.
INNER JOIN build_tree_loop prev ON c.rownum = prev.rownum+1
), nested_start_nodes AS (
SELECT jsonb_agg(q.value) AS result
FROM jsonb_each((SELECT result FROM build_tree_loop ORDER BY rownum DESC LIMIT 1)) q
)
-- "Main" query
SELECT result FROM nested_start_nodes
$$ LANGUAGE sql STABLE;
-- END of sql function
SELECT test.get_nested_object(ARRAY[1]);
Output:
Unfortunately, jsonb does not preserver order, so "children" key comes first, making it harder to read the tree.
[
{
"children": [
{
"children": [
{
"id": 3,
"name": "c1.1.1",
"parent_id": 2
}
],
"id": 2,
"name": "c1.1",
"parent_id": 1
},
{
"id": 4,
"name": "c1.2",
"parent_id": 1
}
],
"id": 1,
"name": "c1",
"parent_id": null
}
]
Code example 4
Another variant: I put everything into a plsql function. The dynamic query inside the function takes the name of any view/table as parameter, which contains columns id+parent_id+data+name. It also takes an array of ids for where to start. When using the function in a query you can aggregate a set of ids to an array as input. (array_agg etc).
The function is not "transparent", so it is harder to optimize indexes and such. With the "_debug" parameter set to true the function wil loutput the raw generated sql as a notice, so you can explain analyze the query.
/*
Parameters:
_ids Array of ids. Specify where to start recursion down the tree.
_view Name of a view/table with the source data. The view must contain the following colums:
id(int/bigint)
parent_id(int/bigint)
data(jsonb) The data for each node, without the children key, which is added in this func.
name(text) Name is optional, only used for debugging purposes, can be empty string.
_children_keyname What key to use for children arrays
_no_root Exclude the root node, only returning the children array. Makes less sense when returning multiple root nodes (dont know which children belongs to which roots)
*/
--DROP FUNCTION test.get_nested_jsonb(bigint[], regclass, text, boolean, boolean) CASCADE;
CREATE OR REPLACE FUNCTION test.get_nested_jsonb(_ids bigint[], _view regclass, _children_keyname text DEFAULT 'children', _no_root boolean DEFAULT false, _debug boolean DEFAULT false)
RETURNS jsonb AS $$
DECLARE
dynamic_sql text := '';
tree_path_list jsonb;
node_map jsonb := jsonb_build_object();
node_result jsonb := jsonb_build_array();
parent_children jsonb := jsonb_build_array();
node jsonb;
relation jsonb;
BEGIN
dynamic_sql := format(
'
WITH RECURSIVE
search_graph(id, parent_id, depth, path, cycle) AS (
SELECT g.id, g.parent_id, 1,
ARRAY[g.id],
false
FROM '|| _view ||' g
UNION ALL
SELECT g.id, g.parent_id, sg.depth + 1,
path || g.id,
g.id = ANY(path)
FROM '|| _view ||' g, search_graph sg
WHERE g.id = sg.parent_id AND NOT cycle
),
graph_by_id AS (
SELECT
sg.path[1] AS id, d.parent_id, sg.depth, sg.id AS start_id, d.name, sg.path,
--(SELECT string_agg(t.name, '' '') FROM (SELECT unnest(sg.path::int[]) AS id) a INNER JOIN '|| _view ||' t USING (id)) AS named_path, -- For debugging, show the path as list of names instead of ids
d.data
FROM search_graph sg
INNER JOIN '|| _view ||' d ON d.id = sg.path[1] -- Join in data for the current node
WHERE sg.id = ANY($1) --Parameterized input for start nodes: To debug raw sql: replace variable $1 with array of ids: ARRAY[1]
ORDER BY depth DESC, start_id ASC
)
SELECT jsonb_agg( (SELECT x FROM (SELECT id, parent_id, depth, name, data) x) )
FROM graph_by_id
');
IF _debug THEN
RAISE NOTICE 'Dump of raw dynamic SQL. Remember to replace $1 with ARRAY[id1,id2]: %', dynamic_sql;
END IF;
EXECUTE dynamic_sql USING _ids INTO tree_path_list;
-- Create a node map (id as key)
FOR node IN SELECT * FROM jsonb_array_elements(tree_path_list)
LOOP
node := jsonb_set(node, ARRAY['data', _children_keyname], jsonb_build_array()); --add children key to all nodes
node_map := jsonb_set(node_map, ARRAY[node->>'id'], node->'data');
END LOOP;
RAISE NOTICE 'dump: %', node_map;
-- Loop sorted list, add nodes to node map from leaves and up
FOR relation IN SELECT * FROM jsonb_array_elements(tree_path_list)
LOOP
IF ( (relation->>'depth')::int > 1 ) THEN
parent_children := COALESCE(node_map->(relation->>'parent_id')->_children_keyname, jsonb_build_array()) || jsonb_build_array(node_map->(relation->>'id'));
node_map := jsonb_set(node_map, ARRAY[relation->>'parent_id', _children_keyname], parent_children);
node_map := node_map - (relation->>'id');
ELSE
IF _no_root THEN
node_result := node_map->(relation->>'id')->_children_keyname;
ELSE
node_result := node_map->(relation->>'id');
END IF;
END IF;
END LOOP;
RETURN node_result;
END;
$$ LANGUAGE plpgsql STABLE;
-- Test the function on a view 'test.source_data_v', starting from id=1
SELECT test.get_nested_jsonb(ARRAY[1], 'test.source_data_v', 'children', false, true);