Recursive parent child JSON Postgresql - json

I have the following recursive PostgreSQL table.
CREATE TABLE public."data" (
id int4 NULL,
parentid int4 NULL,
menu varchar NULL
);
I am trying to create a JSON object from the table.
[
{
"menu": "home",
"children": [
{
"menu": "home 1",
"children": []
},
{
"menu": "home 2",
"children": [
{
"menu": "home 2.1",
"children": []
}
]
}
]
},
{
"menu": "config",
"children": []
}
]
How could I create a hierarchal object like this?

First you should use the jsonb format instead of the json format in postgres, see the documentation here :
In general, most applications should prefer to store JSON data as
jsonb, unless there are quite specialized needs, such as legacy
assumptions about ordering of object keys..
Then, assuming your table is a set of (id, menu) tuples and parentid is the id of the parent of this tuple, you can try :
CREATE VIEW parent_children (parent, children, root, cond) AS
( SELECT jsonb_build_object('menu', p.menu, 'children', '[]' :: jsonb) :: text AS parent
, jsonb_agg(jsonb_build_object('menu', c.menu, 'children', '[]' :: jsonb)) :: text AS children
, array[c.parentid] AS root
, array[c.parentid] AS cond
FROM public.data AS c
LEFT JOIN public.data AS p
ON p.id = c.parentid
GROUP BY c.parentid
) ;
WITH RECURSIVE list(parent, children, root, cond) AS
( SELECT parent, children, root, cond
FROM parent_children
WHERE root = array[NULL] -- start with the root parents
UNION
SELECT p.parent
, replace(p.children, c.parent, replace(c.parent, '[]', c.children))
, p.root
, p.cond || c.cond
FROM list AS p
INNER JOIN parent_children AS c
ON position(c.parent IN p.children) > 0
AND NOT p.cond #> c.root -- condition to avoid circular path
)
SELECT children :: jsonb
FROM list AS l
ORDER BY array_length(cond, 1) DESC
LIMIT 1 ;

Related

Conditionally update JSON column

I have a table which has ID & JSON columns. ID is auto incrementing column. Here are my sample data.
Row 1
1 | {
"HeaderInfo":
{
"Name": "ABC",
"Period": "2010",
"Code": "123"
},
"HData":
[
{ "ID1": "1", "Value": "$1.00", "Code": "A", "Desc": "asdf" },
{ "ID1": "2", "Value": "$1.00", "Code": "B", "Desc": "pqr" },
{ "ID1": "3", "Value": "$1.00", "Code": "C", "Desc": "xyz" }
]
}
Row 2
2 | {
"HeaderInfo":
{
"Name": "ABC",
"Period": "2010",
"Code": "123"
},
"HData":
[
{ "ID1": "76", "Value": "$1.00", "Code": "X", "Desc": "asdf" },
{ "ID1": "25", "Value": "$1.00", "Code": "Y", "Desc": "pqr" },
{ "ID1": "52", "Value": "$1.00", "Code": "Z", "Desc": "lmno" },
{ "ID1": "52", "Value": "$1.00", "Code": "B", "Desc": "xyz" }
]
}
and it keep goes. Items inside the HData section is infinite. It can be any numbers of items.
On this JSON I need to update the Value = "$2.00" where "Code" is "B". I should be able to do this with 2 scenarios. My parameter inputs are #id=2, #code="B", #value="$2.00". #id sometimes will be null. So,
If #id is null then the update statement should go through all records and update the Value="$2.00" for all items inside the HData section which has Code="B".
If #id = 2 then the update statement should update only the second row which Id is 2 for the items which Code="b"
Appreciate your help in advance.
Thanks
See DB Fiddle for an example.
declare #id bigint = 2
, #code nvarchar(8) = 'B'
, #value nvarchar(8) = '$2.00'
update a
set json = JSON_MODIFY(json, '$.HData[' + HData.[key] + '].Value', #value)
from so75416277 a
CROSS APPLY OPENJSON (json, '$.HData') HData
CROSS APPLY OPENJSON (HData.Value, '$')
WITH (
ID1 bigint
, Value nvarchar(8)
, Code nvarchar(8)
, [Desc] nvarchar(8)
) as HDataItem
WHERE id = #id
AND HDataItem.Code = #Code
The update / set statement says we want to replace the value of json with a new generated value / functions exactly the same as it would in any other context; e.g. update a set json = 'something' from so75416277 a where a.column = 'some condition'
The JSON_MODIFY does the manipulation of our json.
The first input is the original json field's value
The second is the path to the value to be updated.
The third is the new value
'$.HData[' + HData.[key] + '].Value' says we go from our JSON's root ($), find the HData field, filter the array of values for the one we're after (i.e. key here is the array item's index), then use the Value field of this item.
key is a special term; where we don't have a WITH block accompanying our OPENJSON statement we get back 3 items: key, value and type; key being the identifier, value being the content, and type saying what sort of content that is.
CROSS APPLY allows us to perform logic on a value from a single DB rowto return potentially multiple rows; e.g. like a join but against its own contents.
OPENJSON (json, '$.HData') HData says to extract the HData field from our json column, and return this with the table alias HData; as we've not included a WITH, this HData column has 3 fields; key, value, and type, as mentioned above (this is the same key we used in our JSONMODIFY).
The next OPENJSON works on HData.Value; i.e. the contents of the array item under HData. Here we take the object from this array (i.e. that's the root from the current context; hence $), and use WITH to parse it into a specific structure; i.e. ID1, Value, Code, and Desc (brackets around Desc as it's a keyword). We give this the alias HDataItem.
Finally we filter for the bit of the data we're interested in; i.e. on id to get the row we want to update, then on HDataItem.Code so we only update those array items with code 'B'.
Try the below SP.
CREATE PROC usp_update_75416277
(
#id Int = null,
#code Varchar(15),
#value Varchar(15)
)
AS
BEGIN
SET NOCOUNT ON;
DECLARE #SQLStr Varchar(MAX)=''
;WITH CTE
AS
( SELECT ROW_NUMBER()OVER(PARTITION BY YourTable.Json ORDER BY (SELECT NULL))RowNo,*
FROM YourTable
CROSS APPLY OPENJSON(YourTable.Json,'$.HData')
WITH (
ID1 Int '$.ID1',
Value Varchar(20) '$.Value',
Code Varchar(20) '$.Code',
[Desc] Varchar(20) '$.Desc'
) HData
WHERE (#id IS NULL OR ID =#id)
)
SELECT #SQLStr=#SQLStr+' UPDATE YourTable
SET [JSON]=JSON_MODIFY(YourTable.Json,
''$.HData['+CONVERT(VARCHAR(15),RowNo-1)+'].Value'',
'''+CONVERT(VARCHAR(MAX),#value)+''') '+
'WHERE ID ='+CONVERT(Varchar(15),CTE.ID) +' '
FROM CTE
WHERE Code=#code
AND (#id IS NULL OR ID =#id)
EXEC( #SQLStr)
END

Add a new key/value pair into a nested array inside a PostgreSQL JSON column

Using PostgreSQL 13.4 I have a table with a JSON column in a structure like the following sample:
{
"username": "jsmith",
"location": "United States",
"posts": [
{
"id":"1",
"title":"Welcome",
"newKey":true <----------- insert new key/value pair here
},
{
"id":"4",
"title":"What started it all",
"newKey":true <----------- insert new key/value pair here
}
]
}
For changing keys on the first level, I used a simple query like this
UPDATE
sample_table_json
SET
json = json::jsonb || '{"active": true}';
But this doesn't work for nested objects and objects in an array like in the sample.
How would I insert a key/value pair into a JSON column with nested objects in an array?
You have to use the jsonb_set function while specifying the right path see the manual.
For a single json update :
UPDATE sample_table_json
SET json = jsonb_set( json::jsonb
, '{post,0,active}'
, 'true'
, true
)
For a (very) limited set of json updates :
UPDATE sample_table_json
SET json = jsonb_set(jsonb_set( json::jsonb
, '{post,0,active}'
, 'true'
, true
)
, '{post,1,active}'
, 'true'
, true
)
For a larger set of json updates of the same json data, you can create the "aggregate version" of the jsonb_set function :
CREATE OR REPLACE FUNCTION jsonb_set(x jsonb, y jsonb, p text[], e jsonb, b boolean)
RETURNS jsonb LANGUAGE sql AS $$
SELECT jsonb_set(COALESCE(x,y), p, e, b) ; $$ ;
CREATE OR REPLACE AGGREGATE jsonb_set_agg(x jsonb, p text[], e jsonb, b boolean)
( STYPE = jsonb, SFUNC = jsonb_set) ;
and then use the new aggregate function jsonb_set_agg while iterating on a query result where the path and val fields could be calculated :
SELECT jsonb_set_agg('{"username": "jsmith","location": "United States","posts": [{"id":"1","title":"Welcome"},{"id":"4","title":"What started it all"}]}' :: jsonb
, l.path :: text[]
, to_jsonb(l.val)
, true)
FROM (VALUES ('{posts,0,active}', 'true'), ('{posts,1,active}', 'true')) AS l(path, val) -- this list could be the result of a subquery
This query could finally be used in order to update some data :
WITH list AS
(
SELECT id
, jsonb_set_agg(json :: jsonb
, l.path :: text[]
, to_jsonb(l.val)
, true) AS res
FROM sample_table_json
CROSS JOIN (VALUES ('{posts,0,active}', 'true'), ('{posts,1,active}', 'true')) AS l(path, val)
GROUP BY id
)
UPDATE sample_table_json AS t
SET json = l.res
FROM list AS l
WHERE t.id = l.id
see the test result in dbfiddle
It became a bit complicated. Loop through the array, add the new key/value pair to each array element and re-aggregate the array, then rebuild the whole object.
with t(j) as
(
values ('{
"username": "jsmith",
"location": "United States",
"posts": [
{
"id":"1", "title":"Welcome", "newKey":true
},
{
"id":"4", "title":"What started it all", "newKey":true
}]
}'::jsonb)
)
select j ||
jsonb_build_object
(
'posts',
(select jsonb_agg(je||'{"active":true}') from jsonb_array_elements(j->'posts') je)
)
from t;

Full text search in concrete node in json

I has table "Product" with two columns:
Id - Bigint primary key
data - Jsonb
Here example of json:
{
"availability": [
{
"qty": 10,
"price": 42511,
"store": {
"name": "my_best_store",
"hours": null,
"title": {
"en": null
},
"coords": null,
"address": null,
I insert json to column "data".
Here sql get find "my_best_store"
select *
from product
where to_tsvector(product.data) ## to_tsquery('my_best_store')
Nice. It's work fine.
But I need to find "my_best_store" only in section "availability".
I try this but result is empty:
select *
from product
where to_tsvector(product.data) ## to_tsquery('availability & my_best_store')
Assuming you want to search in the name attribute, you can do the following:
select p.*
from product p
where exists (select *
from jsonb_array_elements(p.data -> 'availability') as t(item)
where to_tsvector(t.item -> 'store' ->> 'name') ## to_tsquery('my_best_store'))
With Postgres 12, you can simplify that to:
select p.*
from product p
where to_tsvector(jsonb_path_query_array(data, '$.availability[*].store.name')) ## to_tsquery('my_best_store')

Postgres - Convert adjacency list to nested JSON object

I have a table with this data in Postgres and I am having a hard time to convert this in to a JSON object.
node_id parent_node name
------- ----------- ----
1 node1
2 1 node2
3 1 node3
4 2 node4
5 2 node5
6 2 node6
7 3 node7
8 3 node8
How do I convert it like this?
{
name: 'node1'
childs: [
{
name: 'node2',
childs: [
{
name: 'node4',
childs: []
},
{
name: 'node5',
childs: []
},
{
name: 'node6',
childs: []
}
]
},
...
]
}
Any suggestion will help. Thanks
Using WITH RECURSIVE (https://www.postgresql.org/docs/current/static/queries-with.html) and JSON Functions (https://www.postgresql.org/docs/current/static/functions-json.html) I build this solution:
db<>fiddle
The core functionality:
WITH RECURSIVE tree(node_id, ancestor, child, path, json) AS (
SELECT
t1.node_id,
NULL::int,
t2.node_id,
'{children}'::text[] ||
(row_number() OVER (PARTITION BY t1.node_id ORDER BY t2.node_id) - 1)::text,-- C
jsonb_build_object('name', t2.name, 'children', array_to_json(ARRAY[]::int[])) -- B
FROM test t1
LEFT JOIN test t2 ON t1.node_id = t2.parent_node -- A
WHERE t1.parent_node IS NULL
UNION
SELECT
t1.node_id,
t1.parent_node,
t2.node_id,
tree.path || '{children}' || (row_number() OVER (PARTITION BY t1.node_id ORDER BY t2.node_id) - 1)::text,
jsonb_build_object('name', t2.name, 'children', array_to_json(ARRAY[]::int[]))
FROM test t1
LEFT JOIN test t2 ON t1.node_id = t2.parent_node
INNER JOIN tree ON (t1.node_id = tree.child)
WHERE t1.parent_node = tree.node_id -- D
)
SELECT -- E
child as node_id, path, json
FROM tree
WHERE child IS NOT NULL ORDER BY path
Every WITH RECURSIVE contains a start SELECT and a recursion part (the second SELECT) combined by a UNION.
A: Joining the table agains itself for finding the children of a node_id.
B: Building the json object for the child which can be inserted into its parent
C: Building the path where the child object has to be inserted (from root). The window function row_number() (https://www.postgresql.org/docs/current/static/tutorial-window.html) generates the index of the child within the children array of the parent.
D: The recursion part works as the initial part with one difference: It's not searching for the root element but for the element which has the parent node of the last recursion.
E: Executing the recursion and filtering all elements without any children gives this result:
node_id path json
2 children,0 {"name": "node2", "children": []}
4 children,0,children,0 {"name": "node4", "children": []}
5 children,0,children,1 {"name": "node5", "children": []}
6 children,0,children,2 {"name": "node6", "children": []}
3 children,1 {"name": "node3", "children": []}
7 children,1,children,0 {"name": "node7", "children": []}
8 children,1,children,1 {"name": "node8", "children": []}
Though I found no way to add all children elements in the recursion (the origin json is no global variable; so it always knows the changes of the direct ancestors, not their siblings), I had to iterate the rows in a seconds step.
That's why I build the function. In there I can do the iteration for a global variable. With the function jsonb_insert I am inserting all calculated elements into a root json object - using the calculated path.
CREATE OR REPLACE FUNCTION json_tree() RETURNS jsonb AS $$
DECLARE
_json_output jsonb;
_temprow record;
BEGIN
SELECT
jsonb_build_object('name', name, 'children', array_to_json(ARRAY[]::int[]))
INTO _json_output
FROM test
WHERE parent_node IS NULL;
FOR _temprow IN
/* Query above */
LOOP
SELECT jsonb_insert(_json_output, _temprow.path, _temprow.json) INTO _json_output;
END LOOP;
RETURN _json_output;
END;
$$ LANGUAGE plpgsql;
Last step is calling the function and make the JSON more readable (jsonb_pretty())
{
"name": "node1",
"children": [{
"name": "node2",
"children": [{
"name": "node4",
"children": []
},
{
"name": "node5",
"children": []
},
{
"name": "node6",
"children": []
}]
},
{
"name": "node3",
"children": [{
"name": "node7",
"children": []
},
{
"name": "node8",
"children": []
}]
}]
}
I am sure it is possible to optimize the query but for a sketch it works.

Couchbase N1QL - Nest within a Nest

Nesting within a Nest.
I've adapted my need into the following restaurant example:
Desired Output:
{
"restaurant": {
"id": "restaurant1",
"name": "Foodie",
"mains": [ // < main nested in restaurant
{
"id": "main1",
"title": "Steak and Chips",
"ingredients": [ // < ingredient nested in main (...which is nested in restaurant)
{
"id": "ingredient1",
"title": "steak"
},
{
"id": "ingredient2",
"title": "chips"
}
]
},
{
"id": "main2",
"title": "Fish and Chips",
"ingredients": [
{
"id": "ingredient3",
"title": "fish"
},
{
"id": "ingredient2",
"title": "chips"
}
]
}
]
"drinks": [ you get the idea ] // < drink nested in restaurant
}
}
Example Docs:
// RESTAURANTS
{
"id": "restaurant1",
"type": "restaurant",
"name": "Foodie",
"drinkIds": [ "drink1", "drink2" ],
"mainIds: [ "main1", "main2" ]
},
// MAINS
{
"id": "main1",
"type": "main",
"restaurantIds": [ "restaurant1" ],
"title": "Steak and Chips"
},
{
"id": "main2",
"type": "main",
"restaurantIds": [ "restaurant1" ],
"title": "Fish and Chips"
},
// INGREDIENTS
{
"id": "ingredient1",
"type": "ingredient",
"title": "steak",
"mainIds": [ "main1" ]
},
{
"id": "ingredient2",
"type": "ingredient",
"title": "chips",
"mainIds": [ "main1", "main2" ]
},
{
"id": "ingredient3",
"type": "ingredient",
"title": "fish",
"mainIds": [ "main2" ]
},
// DRINKS
{ you get the idea.... }
The closest I can get without error is:
SELECT restaurant, mains, drinks
FROM default restauant USE KEYS "restaurant1"
NEST default mains ON KEYS restaurant.mainIds
NEST default drinks ON KEYS restaurant.drinkIds;
But:
1. Obviously the nested nest is missing
2. The returned order is incorrect - the drinks nest comes first instead of last
(3. Since I'm also using Sync Gateway - it returns all the "_sync" fields with every doc - can't figure out how to omit this on each doc.)
UPDATE 1: ADAPTED SOLUTION
NB: I should have specified above that a main cannot hold ingredientIds.
Based on geraldss' v helpful input below, I added a doc which tracks keys per restaurant, eg:
{
"id": "restaurant1-JoeBloggs",
"dinerId": "JoeBloggs",
"ingredientIds": [ "ingredient1", "ingredient2" "ingredient3" ],
"mainOrdered": [ "main1" ], // < other potential uses...
"drinkOrdered": [ "drink2" ]
}
I added this to geraldss' first solution below as a JOIN to make it available to the query, eg:
SELECT *
FROM
(
SELECT
r.*,
(
SELECT
drink.*
FROM default AS drink
USE KEYS r.drinkIds
) AS drinks,
(
SELECT
main.*,
(
SELECT
ingredient.*
FROM default AS ingredient
USE KEYS keyIndex.ingredientIds // < keyIndex
WHERE ingredient.mainId=main.id
) AS ingredients
FROM default AS main
USE KEYS r.mainIds
) AS mains
FROM default AS r
USE KEYS "restaurant1"
JOIN default AS keyIndex ON KEYS "restaurant1-JoeBloggs" // < keyIndex JOINed
) AS restaurant
;
geraldss' second solution below also looks good - unfortunately it won't work for my case as this query requires that mains are found via ingredients; for my needs a main can exist without any ingredients. EDIT: > he came up with another solution. See 2.
UPDATE 2: FINAL SOLUTION
So, again, with geraldss' help I have a solution which does not require an additional doc to track keys:
SELECT *
FROM
(
SELECT
restaurant.id, restaurant.name,
(
SELECT
drink.id, drink.title
FROM default AS drink
USE KEYS restaurant.drinkIds
)
AS drinks,
(
SELECT
main.id, main.title,
ARRAY_AGG({"title":ingredient.title, "id":ingredient.id}) AS ingredients
FROM default AS ingredient
JOIN default AS main
ON KEYS ingredient.mainIds
WHERE main.restaurantId="restaurant1"
AND meta().id NOT LIKE '_sync:%' // < necessary only if using Sync Gateway
GROUP BY main
UNION ALL
SELECT
mainWithNoIngredients.id, mainWithNoIngredients.title
FROM default AS mainWithNoIngredients
UNNEST mainWithNoIngredients AS foo // < since this is being flattened the AS name is irrelevant
WHERE mainWithNoIngredients.restaurantId="restaurant1"
AND mainWithNoIngredients.type="main"
AND meta().id NOT LIKE '_sync:%' // < necessary only if using Sync Gateway
AND META(mainWithNoIngredients).id NOT IN
(
SELECT RAW mainId
FROM default AS ingredient
)
)
AS mains
FROM default AS restaurant
USE KEYS "restaurant1"
)
AS restaurant
;
NB - the AND meta().id NOT LIKE '_sync:%' lines are only necessary if using Sync Gateway.
With just 1 key I can pull all the related docs - even if they are unknown to the immediate 'parent'.
Thank you geraldss.
If the mains contain ingredientIds:
SELECT *
FROM
(
SELECT
r.*,
(
SELECT
drink.*
FROM default AS drink
USE KEYS r.drinkIds
) AS drinks,
(
SELECT
main.*,
(
SELECT
ingredient.*
FROM default AS ingredient
USE KEYS main.ingredientIds
) AS ingredients
FROM default AS main
USE KEYS r.mainIds
) AS mains
FROM default AS r
USE KEYS "restaurant1"
) AS restaurant
;
EDIT: Updated to include mains not referenced by any ingredients.
If the mains do not contain ingredientIds:
SELECT *
FROM
(
SELECT
r.*,
(
SELECT
drink.*
FROM default AS drink
USE KEYS r.drinkIds
) AS drinks,
(
SELECT
main.*,
ARRAY_AGG(ingredient) AS ingredients
FROM default AS ingredient
JOIN default AS main
ON KEYS ingredient.mainIds
WHERE "restaurant1" IN main.restaurantIds
GROUP BY main
UNION ALL
SELECT
main.*
FROM default AS main
WHERE "restaurant1" IN main.restaurantIds
AND META(main).id NOT IN (
SELECT RAW mainId
FROM default AS ingredient
UNNEST mainIds AS mainId
)
) AS mains
FROM default AS r
USE KEYS "restaurant1"
) AS restaurant
;