Convert flattened key/value table into hierarchical JSON in PostgreSQL - json

I have a PostgreSQL table with unique key/value pairs, which were originally in a JSON format, but have been normalized and melted:
key | value
-----------------------------
name | Bob
address.city | Vancouver
address.country | Canada
I need to turn this into a hierarchical JSON:
{
"name": "Bob",
"address": {
"city": "Vancouver",
"country": "Canada"
}
}
Is there a way to do this easily within SQL?

jsonb_set() almost does everything for you, but unfortunately it can only create missing leafs (i.e. missing last keys on a path), but not whole missing branches. To overcome this, here is a modified version of it, which can set values on any missing levels:
create function jsonb_set_rec(jsonb, jsonb, text[])
returns jsonb
language sql
as $$
select case
when array_length($3, 1) > 1 and ($1 #> $3[:array_upper($3, 1) - 1]) is null
then jsonb_set_rec($1, jsonb_build_object($3[array_upper($3, 1)], $2), $3[:array_upper($3, 1) - 1])
else jsonb_set($1, $3, $2, true)
end
$$;
Now you only need to apply this function one-by-one to your rows, starting with an empty json object: {}. You can do this with either recursive CTEs:
with recursive props as (
(select distinct on (grp)
pk, grp, jsonb_set_rec('{}', to_jsonb(value), string_to_array(key, '.')) json_object
from eav_tbl
order by grp, pk)
union all
(select distinct on (grp)
eav_tbl.pk, grp, jsonb_set_rec(json_object, to_jsonb(value), string_to_array(key, '.'))
from props
join eav_tbl using (grp)
where eav_tbl.pk > props.pk
order by grp, eav_tbl.pk)
)
select distinct on (grp)
grp, json_object
from props
order by grp, pk desc;
Or, with a custom aggregate defined as:
create aggregate jsonb_set_agg(jsonb, text[]) (
sfunc = jsonb_set_rec,
stype = jsonb,
initcond = '{}'
);
your query could became as simple as:
select grp, jsonb_set_agg(to_jsonb(value), string_to_array(key, '.'))
from eav_tbl
group by grp;
https://rextester.com/TULNU73750

There are no ready to use tools for this. The function generates a hierarchical json object based on a path:
create or replace function jsonb_build_object_from_path(path text, value text)
returns jsonb language plpgsql as $$
declare
obj jsonb;
keys text[] := string_to_array(path, '.');
level int := cardinality(keys);
begin
obj := jsonb_build_object(keys[level], value);
while level > 1 loop
level := level- 1;
obj := jsonb_build_object(keys[level], obj);
end loop;
return obj;
end $$;
You also need the aggregate function jsonb_merge_agg(jsonb) described in this answer. The query:
with my_table (path, value) as (
values
('name', 'Bob'),
('address.city', 'Vancouver'),
('address.country', 'Canada'),
('first.second.third', 'value')
)
select jsonb_merge_agg(jsonb_build_object_from_path(path, value))
from my_table;
gives this object:
{
"name": "Bob",
"first":
{
"second":
{
"third": "value"
}
},
"address":
{
"city": "Vancouver",
"country": "Canada"
}
}
The function do not recognize json arrays.

I can't really think of something simpler, although I think there should be an easier way.
I assume there is some additional column that can be used to bring the keys that belong to one "person" together, I used p_id for that in my example.
select p_id,
jsonb_object_agg(k, case level when 1 then v -> k else v end)
from (
select p_id,
elements[1] k,
jsonb_object_agg(case cardinality(elements) when 1 then ky else elements[2] end, value) v,
max(cardinality(elements)) as level
from (
select p_id,
"key" as ky,
string_to_array("key", '.') as elements, value
from kv
) t1
group by p_id, k
) t2
group by p_id;
The innermost query just converts the dot notation to an array for easier access later.
The next level then builds JSON objects depending on the "key". For the "single level" keys, it just uses key/value, for the others it uses the second element + the value and then aggregates those that belong together.
The second query level returns the following:
p_id | k | v | level
-----+---------+--------------------------------------------+------
1 | address | {"city": "Vancouver", "country": "Canada"} | 2
1 | name | {"name": "Bob"} | 1
2 | address | {"city": "Munich", "country": "Germany"} | 2
2 | name | {"name": "John"} | 1
The aggregation done in the second step, leaves one level too much for the "single element" keys, and that's what we need level for.
If that distinction wasn't made, the final aggregation would return {"name": {"name": "Bob"}, "address": {"city": "Vancouver", "country": "Canada"}} instead of the wanted: {"name": "Bob", "address": {"city": "Vancouver", "country": "Canada"}}.
The expression case level when 1 then v -> k else v end essentially turns {"name": "Bob"} back to "Bob".
So, with the following sample data:
create table kv (p_id integer, "key" text, value text);
insert into kv
values
(1, 'name','Bob'),
(1, 'address.city','Vancouver'),
(1, 'address.country','Canada'),
(2, 'name','John'),
(2, 'address.city','Munich'),
(2, 'address.country','Germany');
then query returns:
p_id | jsonb_object_agg
-----+-----------------------------------------------------------------------
1 | {"name": "Bob", "address": {"city": "Vancouver", "country": "Canada"}}
2 | {"name": "John", "address": {"city": "Munich", "country": "Germany"}}
Online example: https://rextester.com/SJOTCD7977

create table kv (key text, value text);
insert into kv
values
('name','Bob'),
('address.city','Vancouver'),
('address.country','Canada'),
('name','John'),
('address.city','Munich'),
('address.country','Germany');
create view v_kv as select row_number() over() as nRec, key, value from kv;
create view v_datos as
select k1.nrec, k1.value as name, k2.value as address_city, k3.value as address_country
from v_kv k1 inner join v_kv k2 on (k1.nrec + 1 = k2.nrec)
inner join v_kv k3 on ((k1.nrec + 2= k3.nrec) and (k2.nrec + 1 = k3.nrec))
where mod(k1.nrec, 3) = 1;
select json_agg(json_build_object('name',name, 'address', json_build_object('city',address_city, 'country', address_country)))
from v_datos;

Related

How to split column values by comma and return it as an array

As you can see below I have Name column. I want to split it by / and return the value in array.
MyTable
Id
Name
1
John/Warner/Jacob
2
Kol
If I write a query as
Select Id, Name from MyTable
it will return
{
"id": 1,
"name": "John/Warner/Jacob",
},
{
"id": 2,
"name": "Kol",
},
Which query should I write to get below result ?
{
"id": 1,
"name": ["John", "Warner", "Jacob"],
},
{
"id": 2,
"name": ["Kol"] ,
},
Don't think you can return an array in the query itself, but you could do this...
SELECT id,
SUBSTRING_INDEX(name, '/', 1)
AS name_part_1,
SUBSTRING_INDEX(name, '/', -1)
AS name_part_2
FROM tableName;
Only way to build it as an array would be when processing the result accordingly in whatever language you are using.
You can define a function split, which is based on the fact that substring_index(substring_index(name,'/',x),'/',-1) will return the x-th part of a name when separated by '/'.
CREATE FUNCTION `test`.`SPLIT`(s varchar(200), c char, i integer) RETURNS varchar(200) CHARSET utf8mb4
DETERMINISTIC
BEGIN
DECLARE retval varchar(200);
WITH RECURSIVE split as (
select 1 as x,substring_index(substring_index(s,c,1),c,-1) as y, s
union all
select x+1,substring_index(substring_index(s,c,x+1),c,-1),s from split where x<= (LENGTH(s) - LENGTH(REPLACE(s,c,'')))
)
SELECT y INTO retval FROM split WHERE x=i ;
return retval;
END
and then do:
with mytable as (
select 1 as Id, 'John/Warner/Jacob' as Name
union all
select 2, 'Kol')
select
id, split(Name,'/',x) as name
from mytable
cross join (select 1 as x union all select 2 union all select 3) x
order by id, name;
output:
Id
name
1
Jacob
1
John
1
Warner
2
[NULL]
2
[NULL]
2
Kol
It is, of course, possible to refine this, and leave out the NULL values ...
I will not convert this output to JSON for you ...

MySQL Parse and Split JSON value

I have a column which contains a JSON value of different lengths
["The Cherries:2.50","Draw:3.25","Swansea Jacks:2.87"]
I want to split them and store into a JSON like so:
[
{
name: "The Cherries",
odds: 2.50
},
{
name: "Draw",
odds: 3.25
},
{
name: "Swansea",
odds: 2.87
},
]
What I did right now is looping and splitting them in the UI which to me is quite heavy for the client. I want to parse and split them all in a single query.
If you are running MySQL 8.0, you can use json_table() to split the original arrayto rows, and then build new objects and aggregate them with json_arrayagg().
We need a primary key column (or set of columns) so we can properly aggreate the generated rows, I assumed id:
select
t.id,
json_arrayagg(json_object(
'name', substring(j.val, 1, locate(':', j.val) - 1),
'odds', substring(j.val, locate(':', j.val) + 1)
)) new_js
from mytable t
cross join json_table(t.js, '$[*]' columns (val varchar(500) path '$')) as j
group by t.id
Demo on DB Fiddle
Sample data:
id | js
-: | :-------------------------------------------------------
1 | ["The Cherries:2.50", "Draw:3.25", "Swansea Jacks:2.87"]
Query results:
id | new_js
-: | :----------------------------------------------------------------------------------------------------------------------
1 | [{"name": "The Cherries", "odds": "2.50"}, {"name": "Draw", "odds": "3.25"}, {"name": "Swansea Jacks", "odds": "2.87"}]
You can use json_table to create rows from the json object.
Just replace table_name with your table name and json with the column that contains json
SELECT json_arrayagg(json_object('name',SUBSTRING_INDEX(person, ':', 1) ,'odds',SUBSTRING_INDEX(person, ':', -1) ))
FROM table_name,
JSON_TABLE(json, '$[*]' COLUMNS (person VARCHAR(40) PATH '$') people;
Here is a Db fiddle you can refer
https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=801de9f067e89a48d45ef9a5bd2d094a

How to select JSON object from JSON array field of mysql by some condition

I have a table with JSON field which contains an array of JSON objects. I need to select objects by some condition.
Create and fill a table:
CREATE TABLE test (
id INT AUTO_INCREMENT PRIMARY KEY,
json_list JSON
);
INSERT INTO test(json_list) VALUES
("{""list"": [{""type"": ""color"", ""value"": ""red""}, {""type"": ""shape"", ""value"": ""oval""}, {""type"": ""color"", ""value"": ""green""}]}"),
("{""list"": [{""type"": ""shape"", ""value"": ""rect""}, {""type"": ""color"", ""value"": ""olive""}]}"),
("{""list"": [{""type"": ""color"", ""value"": ""red""}]}")
;
Now I need to select all objects with type = color from all rows.
I want to see this output:
id extracted_value
1 {"type": "color", "value": "red"}
1 {"type": "color", "value": "green"}
2 {"type": "color", "value": "olive"}
3 {"type": "color", "value": "red"}
It would be good to get this too:
id color
1 red
1 green
2 olive
3 red
I can't change the DB or JSON.
I'm using MySQL 5.7
My current solution
My solution is to cross join the table with some index set and then extract all elements of JSON array.
I don't like it as if possible object count in one array is large it is required to have all indexes till the maximum one. It makes the query slow as it won't stop calculation of JSON value when the end of array is reached.
SELECT
test.id,
JSON_EXTRACT(test.json_list, CONCAT('$.list[', ind.ind, ']')),
ind.ind
FROM
test
CROSS JOIN
(SELECT 0 AS ind UNION ALL SELECT 1 AS ind UNION ALL SELECT 2 AS ind) ind
WHERE
JSON_LENGTH(json_list, "$.list") > ind.ind
AND JSON_EXTRACT(json_list, CONCAT('$.list[', ind.ind, '].type')) = "color";
It is easy to get only values by changing JSON_EXTRACT path. But is it there a better way?
Edits
Added a check for json_list.list length. This filtered out 67% of derived table rows in this case.
SELECT JSON_EXTRACT(json_list, '$.list[*]')
FROM `test`
where JSON_CONTAINS(json_list, '{"type":"color"}', '$.list')
So current best solution is mine:
SELECT
test.id,
JSON_EXTRACT(test.json_list, CONCAT('$.list[', ind.ind, ']')),
ind.ind
FROM
test
CROSS JOIN
(SELECT 0 AS ind UNION ALL SELECT 1 AS ind UNION ALL SELECT 2 AS ind) ind
WHERE
JSON_LENGTH(json_list, "$.list") > ind.ind
AND JSON_EXTRACT(json_list, CONCAT('$.list[', ind.ind, '].type')) = "color";

How to update jsonb string with PostgreSQL?

I'm using PostgreSQL 9.4.5. I'd like to update a jsonb column.
My table is structured this way:
CREATE TABLE my_table (
gid serial PRIMARY KEY,
"data" jsonb
);
JSON strings are like this:
{"files": [], "ident": {"id": 1, "country": null, "type ": "20"}}
The following SQL doesn't do the job (syntax error - SQL state = 42601):
UPDATE my_table SET "data" -> 'ident' -> 'country' = 'Belgium';
Is there a way to achieve that?
Ok there are two functions:
create or replace function set_jsonb_value(p_j jsonb, p_key text, p_value jsonb) returns jsonb as $$
select jsonb_object_agg(t.key, t.value) from (
select
key,
case
when jsonb_typeof(value) = 'object' then set_jsonb_value(value, p_key, p_value)
when key = p_key then p_value
else value
end as value from jsonb_each(p_j)) as t;
$$ language sql immutable;
First one just changes the value of the existing key regardless of the key path:
postgres=# select set_jsonb_value(
'{"files": [], "country": null, "ident": {"id": 1, "country": null, "type ": "20"}}',
'country',
'"foo"');
set_jsonb_value
--------------------------------------------------------------------------------------
{"files": [], "ident": {"id": 1, "type ": "20", "country": "foo"}, "country": "foo"}
(1 row)
create or replace function set_jsonb_value(p_j jsonb, p_path text[], p_value jsonb) returns jsonb as $$
select jsonb_object_agg(t.key, t.value) from (
select
key,
case
when jsonb_typeof(value) = 'object' then set_jsonb_value(value, p_path[2:1000], p_value)
when key = p_path[1] then p_value
else value
end as value from jsonb_each(p_j)
union all
select
p_path[1],
case
when array_length(p_path,1) = 1 then p_value
else set_jsonb_value('{}', p_path[2:1000], p_value) end
where not p_j ? p_path[1]) as t;
$$ language sql immutable;
Second one changes the value of the existing key using the path specified or creates it if the path does not exists:
postgres=# select set_jsonb_value(
'{"files": [], "country": null, "ident": {"id": 1, "type ": "20"}}',
'{ident,country}'::text[],
'"foo"');
set_jsonb_value
-------------------------------------------------------------------------------------
{"files": [], "ident": {"id": 1, "type ": "20", "country": "foo"}, "country": null}
(1 row)
postgres=# select set_jsonb_value(
'{"files": [], "country": null, "ident": {"id": 1, "type ": "20"}}',
'{ident,foo,bar,country}'::text[],
'"foo"');
set_jsonb_value
-------------------------------------------------------------------------------------------------------
{"files": [], "ident": {"id": 1, "foo": {"bar": {"country": "foo"}}, "type ": "20"}, "country": null}
(1 row)
Hope it will help to someone who uses the PostgreSQL < 9.5
Disclaimer: Tested on PostgreSQL 9.5
In PG 9.4 you are out of luck with "easy" solutions like jsonb_set() (9.5). Your only option is to unpack the JSON object, make the changes and re-build the object. That sounds very cumbersome and it is indeed: JSON is horrible to manipulate, no matter how advanced or elaborate the built-in functions.
CREATE TYPE data_ident AS (id integer, country text, "type" integer);
UPDATE my_table
SET "data" = json_build_object('files', "data"->'files', 'ident', ident.j)::jsonb
FROM (
SELECT gid, json_build_object('id', j.id, 'country', 'Belgium', 'type', j."type") AS j
FROM my_table
JOIN LATERAL jsonb_populate_record(null::data_ident, "data"->'ident') j ON true) ident
WHERE my_table.gid = ident.gid;
In the SELECT clause "data"->'ident' is unpacked into a record (for which you need to CREATE TYPE a structure). Then it is built right back into a JSON object with the new country name. In the UPDATE that "ident" object is re-joined with the "files" object and the whole thing cast to a jsonb.
A pure thing of beauty -- just so long as speed is not your thing...
My previous solution relied on 9.5 functionality.
I would recommend instead either going with abelisto's solutions below or using pl/perlu, plpythonu, or plv8js to write json mutators in a language that has better support for them.

Automatically creating a table and inserting data from json file

I have nearly 50 json files. I want to create Postgres database based on these files. Each file contains data of one table. The files are not very large (at maximum several thousand records). Example data from customers.json (in fact there are more fields, I have simplified it):
[
{
"Id": 55948,
"FullName": "Full name #1",
"Address": "Address #1",
"Turnover": 120400.5,
"DateOfRegistration": "2014-02-13",
"LastModifiedAt": "2015-11-03 12:04:44" },
{
"Id": 55949,
"FullName": "Full name %2",
"Address": "Address #2",
"Turnover": 120000.0,
"DateOfRegistration": "2012-12-01",
"LastModifiedAt": "2015-11-04 17:14:21" }
]
I try to write a function which creates a table and inserts all the data to it. My attempt is based on dynamic query using EXECUTE:
CREATE OR REPLACE FUNCTION import_json(table_name text, data json)
RETURNS VOID AS $$
DECLARE
query text;
colname text;
BEGIN
query := 'CREATE TABLE ' || table_name || ' (';
FOR colname IN SELECT json_object_keys(data->0)
LOOP query := query || lower(colname) || ' text,';
END LOOP;
query := rtrim(query, ',') || ');';
EXECUTE(query);
END $$ LANGUAGE plpgsql;
My function creates a table with expected column names, but all columns are of type text. The problem is that I do not know how to define proper types of columns.
The json files are formatted well and contain integer, numeric, date, timestamp and text values. I would like to get the table:
CREATE TABLE customers (
id integer,
fullname text,
address text,
turnover numeric,
date_of_registration date,
last_modified_at timestamp);
The main question: how can I recognize types of columns in generated table?
Additionally, is there an easy way to transform Pascal to underscore notation ("DateOfRegistration" -> "date_of_registration")?
You can determine the type of a column by examining a value.
The function below formats column's definition from a pair (key, value).
It uses regex pattern matching.
It also transforms column's name to the notation with underscores (using regexp_replace() function).
Of course, the function wont work properly if the value represents NULL, so you have to check that the first json record has all not-null values.
create or replace function format_column(ckey text, cval text)
returns text language sql immutable as $$
select format('%s %s',
lower(regexp_replace(ckey, '(.)([A-Z])', '\1_\2', 'g')),
case
when cval ~ '^[\+-]{0,1}\d+$' then 'integer'
when cval ~ '^[\+-]{0,1}\d*\.\d+$' then 'numeric'
when cval ~ '^"\d\d\d\d-\d\d-\d\d"$' then 'date'
when cval ~ '^"\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d"$' then 'timestamp'
else 'text'
end
)
$$;
select format_column(key, value)
from (
values
('Id', '55948'),
('FullName', '"Full name #1"'),
('Turnover', '120400.5'),
('DateOfRegistration', '"2014-02-13"')
) val(key, value);
format_column
---------------------------
id integer
full_name text
turnover numeric
date_of_registration date
(4 rows)
In the main function you do not need variables or loops.
Use format() function to format strings with parameters and string_agg() to create textual lists.
Since you need both keys and values, use json_each() instead of json_object_keys(). In the second query you can use row_number() to ensure that the aggregated list of values is divided for consecutive records.
create or replace function import_table(table_name text, jdata json)
returns void language plpgsql as $$
begin
execute format('create table %s (%s)', table_name, string_agg(col, ', '))
from (
select format_column(key::text, value::text) col
from json_each(jdata->0)
) sub;
execute format('insert into %s values %s', table_name, string_agg(val, ','))
from (
with lines as (
select row_number() over () rn, line
from (
select json_array_elements(jdata) line
) sub
)
select rn, format('(%s)', string_agg(value, ',')) val
from (
select rn, format('%L', trim(value::text, '"')) as value
from lines, json_each(line)
) sub
group by 1
) sub;
end $$;
Test:
select import_table('customers',
'[{ "Id": 55948,
"FullName": "Full name #1",
"Address": "Address #1",
"Turnover": 120400.5,
"DateOfRegistration": "2014-02-13",
"LastModifiedAt": "2015-11-03 12:04:44" },
{ "Id": 55949,
"FullName": "Full name %2",
"Address": "Address #2",
"Turnover": 120000.0,
"DateOfRegistration": "2012-12-01",
"LastModifiedAt": "2015-11-04 17:14:21" }]');
\d customers
Table "public.customers"
Column | Type | Modifiers
----------------------+-----------------------------+-----------
id | integer |
full_name | text |
address | text |
turnover | numeric |
date_of_registration | date |
last_modified_at | timestamp without time zone |
select * from customers;
id | full_name | address | turnover | date_of_registration | last_modified_at
-------+--------------+------------+----------+----------------------+---------------------
55948 | Full name #1 | Address #1 | 120400.5 | 2014-02-13 | 2015-11-03 12:04:44
55949 | Full name %2 | Address #2 | 120000.0 | 2012-12-01 | 2015-11-04 17:14:21
(2 rows)