Adjacency List to JSON graph with Postgres - json

I have the following schema for the tags table:
CREATE TABLE tags (
id integer NOT NULL,
name character varying(255) NOT NULL,
parent_id integer
);
I need to build a query to return the following structure (here represented as yaml for readability):
- name: Ciencia
parent_id:
id: 7
children:
- name: Química
parent_id: 7
id: 9
children: []
- name: Biología
parent_id: 7
id: 8
children:
- name: Botánica
parent_id: 8
id: 19
children: []
- name: Etología
parent_id: 8
id: 18
children: []
After some trial and error and looking for similar questions in SO, I've came up with this query:
WITH RECURSIVE tagtree AS (
SELECT tags.name, tags.parent_id, tags.id, json '[]' children
FROM tags
WHERE NOT EXISTS (SELECT 1 FROM tags tt WHERE tt.parent_id = tags.id)
UNION ALL
SELECT (tags).name, (tags).parent_id, (tags).id, array_to_json(array_agg(tagtree)) children FROM (
SELECT tags, tagtree
FROM tagtree
JOIN tags ON tagtree.parent_id = tags.id
) v
GROUP BY v.tags
)
SELECT array_to_json(array_agg(tagtree)) json
FROM tagtree
WHERE parent_id IS NULL
But it returns the following results when converted to yaml:
- name: Ciencia
parent_id:
id: 7
children:
- name: Química
parent_id: 7
id: 9
children: []
- name: Ciencia
parent_id:
id: 7
children:
- name: Biología
parent_id: 7
id: 8
children:
- name: Botánica
parent_id: 8
id: 19
children: []
- name: Etología
parent_id: 8
id: 18
children: []
The root node is duplicated.
I could merge the results to the expected result in my app code but I feel I am close and it could be done al from PG.
Here's an example with SQL Fiddle:
http://sqlfiddle.com/#!15/1846e/1/0
Expected output:
https://gist.github.com/maca/e7002eb10f36fcdbc51b
Actual output:
https://gist.github.com/maca/78e84fb7c05ff23f07f4

Here's a solution using PLV8 for your schema.
First, build a materialized path using PLSQL function and recursive CTEs.
CREATE OR REPLACE FUNCTION get_children(tag_id integer)
RETURNS json AS $$
DECLARE
result json;
BEGIN
SELECT array_to_json(array_agg(row_to_json(t))) INTO result
FROM (
WITH RECURSIVE tree AS (
SELECT id, name, ARRAY[]::INTEGER[] AS ancestors
FROM tags WHERE parent_id IS NULL
UNION ALL
SELECT tags.id, tags.name, tree.ancestors || tags.parent_id
FROM tags, tree
WHERE tags.parent_id = tree.id
) SELECT id, name, ARRAY[]::INTEGER[] AS children FROM tree WHERE $1 = tree.ancestors[array_upper(tree.ancestors,1)]
) t;
RETURN result;
END;
$$ LANGUAGE plpgsql;
Then, build the tree from the output of the above function.
CREATE OR REPLACE FUNCTION get_tree(data json) RETURNS json AS $$
var root = [];
for(var i in data) {
build_tree(data[i]['id'], data[i]['name'], data[i]['children']);
}
function build_tree(id, name, children) {
var exists = getObject(root, id);
if(exists) {
exists['children'] = children;
}
else {
root.push({'id': id, 'name': name, 'children': children});
}
}
function getObject(theObject, id) {
var result = null;
if(theObject instanceof Array) {
for(var i = 0; i < theObject.length; i++) {
result = getObject(theObject[i], id);
if (result) {
break;
}
}
}
else
{
for(var prop in theObject) {
if(prop == 'id') {
if(theObject[prop] === id) {
return theObject;
}
}
if(theObject[prop] instanceof Object || theObject[prop] instanceof Array) {
result = getObject(theObject[prop], id);
if (result) {
break;
}
}
}
}
return result;
}
return JSON.stringify(root);
$$ LANGUAGE plv8 IMMUTABLE STRICT;
This will yield the required JSON mentioned in your question. Hope that helps.
I've written a detailed post/breakdown of how this solution works here.

Try PL/Python and networkx.
Admittedly, using the following doesn't yield JSON in exactly the requested format, but the information seems to be all there and, if PL/Python is acceptable, this might be adapted into a complete answer.
CREATE OR REPLACE FUNCTION get_adjacency_data(
names text[],
ids integer[],
parent_ids integer[])
RETURNS jsonb AS
$BODY$
pairs = zip(ids, parent_ids)
import networkx as nx
import json
from networkx.readwrite import json_graph
name_dict = dict(zip(ids, names))
G=nx.DiGraph()
G.add_nodes_from(ids)
nx.set_node_attributes(G, 'name', name_dict)
G.add_edges_from(pairs)
return json.dumps(json_graph.adjacency_data(G))
$BODY$ LANGUAGE plpythonu;
WITH raw_data AS (
SELECT array_agg(name) AS names,
array_agg(parent_id) AS parent_ids,
array_agg(id) AS ids
FROM tags
WHERE parent_id IS NOT NULL)
SELECT get_adjacency_data(names, parent_ids, ids)
FROM raw_data;

i was finding same solution and may be this example could be useful for anyone
tested on Postgres 10 with table with same structure
table with columns: id, name and pid as parent_id
create or replace function get_c_tree(p_parent int8) returns setof jsonb as $$
select
case
when count(x) > 0 then jsonb_build_object('id', c.id, 'name', c.name, 'children', jsonb_agg(f.x))
else jsonb_build_object('id', c.id, 'name', c.name, 'children', null)
end
from company c left join get_c_tree(c.id) as f(x) on true
where c.pid = p_parent or (p_parent is null and c.pid is null)
group by c.id, c.name;
$$ language sql;
select jsonb_agg(get_c_tree) from get_c_tree(null::int8);

Related

Count the number of arrays in json with a MySQL select statement

How can I count the number of arrays in json with a MySQL select statement?
For example, in the following case, I want 2 to be returned.
sample
+-----------+-----------+----------------------------------+
| id | json |
+-----------+-----------+----------------------------------+
| 1 | { items: [{name: a, age: 20}, {name: b, age: 30}] } |
...
I was able to get the contents with json_extract.
but I want count the number.
select
json_extract(json, '$.items')
from
sample
where
id = 1
select
json_array_length(json_extract(json, '$.items')) as size
from
sample
where
id = 1
json_array_length() is use to count size of json array
You can use JSON_LENGTH function, which is compatible with MySQL 5.7:
SELECT JSON_EXTRACT(json, '$.items'),
JSON_LENGTH(json, '$.items')
FROM sample
WHERE id = 1
Check the demo here.
Here is a trick to count, you can use a combination of LENGTH() and REPLACE() functions.
db<>fiddle
SELECT id, json, ROUND((LENGTH(json)- LENGTH(REPLACE(json, 'name', '')))/4,0) AS array_count
FROM (
SELECT 1 AS id, '{ items: [{name: a, age: 20}, {name: b, age: 30}] }' AS json
) tmp

Add a new key/value pair into a nested array inside a PostgreSQL JSON column

Using PostgreSQL 13.4 I have a table with a JSON column in a structure like the following sample:
{
"username": "jsmith",
"location": "United States",
"posts": [
{
"id":"1",
"title":"Welcome",
"newKey":true <----------- insert new key/value pair here
},
{
"id":"4",
"title":"What started it all",
"newKey":true <----------- insert new key/value pair here
}
]
}
For changing keys on the first level, I used a simple query like this
UPDATE
sample_table_json
SET
json = json::jsonb || '{"active": true}';
But this doesn't work for nested objects and objects in an array like in the sample.
How would I insert a key/value pair into a JSON column with nested objects in an array?
You have to use the jsonb_set function while specifying the right path see the manual.
For a single json update :
UPDATE sample_table_json
SET json = jsonb_set( json::jsonb
, '{post,0,active}'
, 'true'
, true
)
For a (very) limited set of json updates :
UPDATE sample_table_json
SET json = jsonb_set(jsonb_set( json::jsonb
, '{post,0,active}'
, 'true'
, true
)
, '{post,1,active}'
, 'true'
, true
)
For a larger set of json updates of the same json data, you can create the "aggregate version" of the jsonb_set function :
CREATE OR REPLACE FUNCTION jsonb_set(x jsonb, y jsonb, p text[], e jsonb, b boolean)
RETURNS jsonb LANGUAGE sql AS $$
SELECT jsonb_set(COALESCE(x,y), p, e, b) ; $$ ;
CREATE OR REPLACE AGGREGATE jsonb_set_agg(x jsonb, p text[], e jsonb, b boolean)
( STYPE = jsonb, SFUNC = jsonb_set) ;
and then use the new aggregate function jsonb_set_agg while iterating on a query result where the path and val fields could be calculated :
SELECT jsonb_set_agg('{"username": "jsmith","location": "United States","posts": [{"id":"1","title":"Welcome"},{"id":"4","title":"What started it all"}]}' :: jsonb
, l.path :: text[]
, to_jsonb(l.val)
, true)
FROM (VALUES ('{posts,0,active}', 'true'), ('{posts,1,active}', 'true')) AS l(path, val) -- this list could be the result of a subquery
This query could finally be used in order to update some data :
WITH list AS
(
SELECT id
, jsonb_set_agg(json :: jsonb
, l.path :: text[]
, to_jsonb(l.val)
, true) AS res
FROM sample_table_json
CROSS JOIN (VALUES ('{posts,0,active}', 'true'), ('{posts,1,active}', 'true')) AS l(path, val)
GROUP BY id
)
UPDATE sample_table_json AS t
SET json = l.res
FROM list AS l
WHERE t.id = l.id
see the test result in dbfiddle
It became a bit complicated. Loop through the array, add the new key/value pair to each array element and re-aggregate the array, then rebuild the whole object.
with t(j) as
(
values ('{
"username": "jsmith",
"location": "United States",
"posts": [
{
"id":"1", "title":"Welcome", "newKey":true
},
{
"id":"4", "title":"What started it all", "newKey":true
}]
}'::jsonb)
)
select j ||
jsonb_build_object
(
'posts',
(select jsonb_agg(je||'{"active":true}') from jsonb_array_elements(j->'posts') je)
)
from t;

SQL query to get a joinned table

I have two tables that I need to join and need to get the data that I can use to plot.
Sample data for two tables are:
**table1**
mon_pjt month planned_hours
pjt1 01-10-2019 24
pjt2 01-01-2020 67
pjt3 01-02-2019 12
**table2**
date project hrs_consumed
07-12-2019 pjt1 7
09-09-2019 pjt2 3
12-10-2019 pjt1 4
01-02-2019 pjt3 5
11-10-2019 pjt1 4
Sample Output, where the actual hours are summation of column hrs_consumed in table2. Following is the sample output:
project label planned_hours actual_hours
pjt1 Oct-19 24 8
pjt1 Dec-19 0 7
pjt2 Sep-19 0 3
pjt2 Jan-20 67 0
pjt3 Feb-19 12 5
I have tried the following query but it gives error:
Select Sum(a.hrs_consumed), a.date, a.planned_hours
From (SELECT t1.date, t2.month, t1.project, t1.hrs_consumed, t2.planned_hours
from table1 t1 JOIN
table2 t2
on t2.month = t1.date
UNION
SELECT t1.date, t2.month, t1.mon_pjt, t2.hrs_consumed, t1.planned_hours
from table t1 JOIN
table2 t2
on t1.date != t2.month
)
I have tried another way also extracting two tables separately and in javascript trying to join it and sort it but that was also vain.
In Javascript, you could mimic an SQL like request.
This code takes a pipe and
selects wanted key and formats date into a comparable format,
groups by date,
gets the sum of hrs_consumed for each group,
makes a full join (with an updated data set for comparable keys/columns),
selects wanted keys,
applies a sorting.
const
pipe = (...functions) => input => functions.reduce((acc, fn) => fn(acc), input),
groupBy = key => array => array.reduce((r, o) => {
var fn = typeof key === 'function' ? key : o => o[key],
temp = r.find(([p]) => fn(o) === fn(p));
if (temp) temp.push(o);
else r.push([o]);
return r;
}, []),
sum = key => array => array.reduce((a, b) => ({ ...a, [key]: a[key] + b[key] })),
select = fn => array => array.map(fn),
fullJoin = (b, ...keys) => a => {
const iter = (array, key) => array.forEach(o => {
var k = typeof key === 'function' ? key(o) : o[key];
temp[k] = { ...(temp[k] || {}), ...o };
});
var temp = {};
iter(a, keys[0]);
iter(b, keys[1] || keys[0]);
return Object.values(temp);
},
order = keys => array => array.sort((a, b) => {
var result;
[].concat(keys).some(k => result = a[k] > b[k] || -(a[k] < b[k]));
return result
});
var table1 = [{ mon_pjt: 'pjt1', month: '2019-10', planned_hours: 24 }, { mon_pjt: 'pjt2', month: '2020-01', planned_hours: 67 }, { mon_pjt: 'pjt3', month: '2019-02', planned_hours: 12 }],
table2 = [{ date: '2019-12-07', project: 'pjt1', hrs_consumed: 7 }, { date: '2019-09-09', project: 'pjt2', hrs_consumed: 3 }, { date: '2019-10-12', project: 'pjt1', hrs_consumed: 4 }, { date: '2019-02-01', project: 'pjt3', hrs_consumed: 5 }, { date: '2019-10-11', project: 'pjt1', hrs_consumed: 4 }],
result = pipe(
select(o => ({ ...o, date: o.date.slice(0, 7) })),
groupBy('date'),
select(sum('hrs_consumed')),
fullJoin(
select
(({ mon_pjt: project, month: date, ...o }) => ({ project, date, ...o }))
(table1),
'date'
),
select(({ project, date: label, planned_hours = 0, hrs_consumed = 0 }) => ({ project, label, planned_hours, hrs_consumed })),
order(['project', 'label'])
)(table2);
console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }
SELECT project, label,planned_hours,(planned_hours-hours_consumed) AS actual_hours
FROM(
SELECT t1.mon_pjt AS project,date_format(t1.month,'%M-%Y') AS label,
t1.planned_hours,0 AS hours_consumed
FROM table1 t1
UNION
SELECT t2.project,date_format(t2.date,'%M-%Y') AS label,0 as planned_hours,
sum(t2.hours_consumed) AS hours_consumed
FROM table1 t2
GROUP BY project)t
GROUP BY t.project
ORDER BY project

Update every value in an array in postgres json

In my postgres database I have json that looks similar to this:
{
"myArray": [
{
"myValue": 1
},
{
"myValue": 2
},
{
"myValue": 3
}
]
}
Now I want to rename myValue to otherValue. I can't be sure about the length of the array! Preferably I would like to use something like set_jsonb with a wildcard as the array index, but that does not seem to be supported. So what is the nicest solution?
You have to decompose a whole jsonb object, modify individual elements and build the object back.
The custom function will be helpful:
create or replace function jsonb_change_keys_in_array(arr jsonb, old_key text, new_key text)
returns jsonb language sql as $$
select jsonb_agg(case
when value->old_key is null then value
else value- old_key || jsonb_build_object(new_key, value->old_key)
end)
from jsonb_array_elements(arr)
$$;
Use:
with my_table (id, data) as (
values(1,
'{
"myArray": [
{
"myValue": 1
},
{
"myValue": 2
},
{
"myValue": 3
}
]
}'::jsonb)
)
select
id,
jsonb_build_object(
'myArray',
jsonb_change_keys_in_array(data->'myArray', 'myValue', 'otherValue')
)
from my_table;
id | jsonb_build_object
----+------------------------------------------------------------------------
1 | {"myArray": [{"otherValue": 1}, {"otherValue": 2}, {"otherValue": 3}]}
(1 row)
Using json functions are definitely the most elegant, but you can get by on using character replacement. Cast the json(b) as text, perform the replace, then change it back to json(b). In this example I included the quotes and colon to help the text replace target the json keys without conflict with values.
CREATE TABLE mytable ( id INT, data JSONB );
INSERT INTO mytable VALUES (1, '{"myArray": [{"myValue": 1},{"myValue": 2},{"myValue": 3}]}');
INSERT INTO mytable VALUES (2, '{"myArray": [{"myValue": 4},{"myValue": 5},{"myValue": 6}]}');
SELECT * FROM mytable;
UPDATE mytable
SET data = REPLACE(data :: TEXT, '"myValue":', '"otherValue":') :: JSONB;
SELECT * FROM mytable;
http://sqlfiddle.com/#!17/1c28a/9/4

How to get the number of elements in a JSON array stored as CLOB with Oracle 12c?

I'm storing a java class A as A_DOC in a clob column in my database.
The structure of A is like:
{
id : 123
var1: abc
subvalues : [{
id: 1
value : a
},
{
id: 1
value :b
}
...
}
]}
I know I can do things like
select json_query(a.A_DOC, '$.subvalues.value') from table_name a;
and so on, but how I'm looking for a way to count the number of elements in the subvalues array through an sql query. Is this possible?
the function exists in Oracle 18 only
SELECT json_query('[19, 15, [16,2,3]]','$[*].size()' WITH ARRAY WRAPPER) FROM dual;
SELECT json_value('[19, 15, [16,2,3]]','$.size()') FROM dual;
You can use JSON_TABLE:
SELECT
id, var1, count(sub_id) subvalues
FROM
JSON_TABLE (
to_clob('{ id: 123, var1: "abc", subvalues : [{ id: 1, value: "a", }, { id: 2, value: "b" } ]}'),
'$'
COLUMNS (
id NUMBER PATH '$.id',
var1 VARCHAR PATH '$.var1',
NESTED PATH '$.subvalues[*]'
COLUMNS (
sub_id NUMBER PATH '$.id'
)
)
)
GROUP BY id, var1