Oracle select JSON column as key / value table [duplicate] - json

This question already has an answer here:
Query json dictionary data in SQL
(1 answer)
Closed 1 year ago.
In Oracle 12c, having a column with JSON data in this format:
{
"user_name": "Dave",
"phone_number": "13326415",
"married": false,
"age": 18
}
How can I select it in this format:
key val
-------------- ----------
"user_name" "Dave"
"phone_number" "13326415"
"married" "false"
"age" "18"

As stated in the comment, there is no way to get the keys of a JSON object using just SQL. With PL/SQL you can create a pipelined function to get the information you need. Below is a very simple pipelined function that will get the keys of a JSON object and print the type each key is, as well as the key name and the value.
First, you will need to create the types that will be used by the function
CREATE OR REPLACE TYPE key_value_table_rec FORCE AS OBJECT
(
TYPE VARCHAR2 (100),
key VARCHAR2 (200),
VALUE VARCHAR2 (200)
);
/
CREATE OR REPLACE TYPE key_value_table_t AS TABLE OF key_value_table_rec;
/
Next, create the pipelined function that will return the information in the format of the types defined above.
CREATE OR REPLACE FUNCTION get_key_value_table (p_json CLOB)
RETURN key_value_table_t
PIPELINED
AS
l_json json_object_t;
l_json_keys json_key_list;
l_json_element json_element_t;
BEGIN
l_json := json_object_t (p_json);
l_json_keys := l_json.get_keys;
FOR i IN 1 .. l_json_keys.COUNT
LOOP
l_json_element := l_json.get (l_json_keys (i));
PIPE ROW (key_value_table_rec (
CASE
WHEN l_json_element.is_null THEN 'null'
WHEN l_json_element.is_boolean THEN 'boolean'
WHEN l_json_element.is_number THEN 'number'
WHEN l_json_element.is_timestamp THEN 'timestamp'
WHEN l_json_element.is_date THEN 'date'
WHEN l_json_element.is_string THEN 'string'
WHEN l_json_element.is_object THEN 'object'
WHEN l_json_element.is_array THEN 'array'
ELSE 'unknown'
END,
l_json_keys (i),
l_json.get_string (l_json_keys (i))));
END LOOP;
RETURN;
EXCEPTION
WHEN OTHERS
THEN
CASE SQLCODE
WHEN -40834
THEN
--JSON format is not valid
NULL;
ELSE
RAISE;
END CASE;
END;
/
Finally, you can call the pipelined function from a SELECT statement
SELECT * FROM TABLE (get_key_value_table (p_json => '{
"user_name": "Dave",
"phone_number": "13326415",
"married": false,
"age": 18
}'));
TYPE KEY VALUE
__________ _______________ ___________
string user_name Dave
string phone_number 13326415
boolean married false
number age 18
If your JSON values are stored in a column in a table, you can view the keys/values using CROSS JOIN
WITH
sample_table (id, json_col)
AS
(SELECT 1, '{"key1":"val1","key_obj":{"nested_key":"nested_val"},"key_bool":false}'
FROM DUAL
UNION ALL
SELECT 2, '{"key3":3.14,"key_arr":[1,2,3]}' FROM DUAL)
SELECT t.id, j.*
FROM sample_table t CROSS JOIN TABLE (get_key_value_table (p_json => t.json_col)) j;
ID TYPE KEY VALUE
_____ __________ ___________ ________
1 string key1 val1
1 object key_obj
1 boolean key_bool false
2 number key3 3.14
2 array key_arr

Related

How to split column values by comma and return it as an array

As you can see below I have Name column. I want to split it by / and return the value in array.
MyTable
Id
Name
1
John/Warner/Jacob
2
Kol
If I write a query as
Select Id, Name from MyTable
it will return
{
"id": 1,
"name": "John/Warner/Jacob",
},
{
"id": 2,
"name": "Kol",
},
Which query should I write to get below result ?
{
"id": 1,
"name": ["John", "Warner", "Jacob"],
},
{
"id": 2,
"name": ["Kol"] ,
},
Don't think you can return an array in the query itself, but you could do this...
SELECT id,
SUBSTRING_INDEX(name, '/', 1)
AS name_part_1,
SUBSTRING_INDEX(name, '/', -1)
AS name_part_2
FROM tableName;
Only way to build it as an array would be when processing the result accordingly in whatever language you are using.
You can define a function split, which is based on the fact that substring_index(substring_index(name,'/',x),'/',-1) will return the x-th part of a name when separated by '/'.
CREATE FUNCTION `test`.`SPLIT`(s varchar(200), c char, i integer) RETURNS varchar(200) CHARSET utf8mb4
DETERMINISTIC
BEGIN
DECLARE retval varchar(200);
WITH RECURSIVE split as (
select 1 as x,substring_index(substring_index(s,c,1),c,-1) as y, s
union all
select x+1,substring_index(substring_index(s,c,x+1),c,-1),s from split where x<= (LENGTH(s) - LENGTH(REPLACE(s,c,'')))
)
SELECT y INTO retval FROM split WHERE x=i ;
return retval;
END
and then do:
with mytable as (
select 1 as Id, 'John/Warner/Jacob' as Name
union all
select 2, 'Kol')
select
id, split(Name,'/',x) as name
from mytable
cross join (select 1 as x union all select 2 union all select 3) x
order by id, name;
output:
Id
name
1
Jacob
1
John
1
Warner
2
[NULL]
2
[NULL]
2
Kol
It is, of course, possible to refine this, and leave out the NULL values ...
I will not convert this output to JSON for you ...

Convert flattened key/value table into hierarchical JSON in PostgreSQL

I have a PostgreSQL table with unique key/value pairs, which were originally in a JSON format, but have been normalized and melted:
key | value
-----------------------------
name | Bob
address.city | Vancouver
address.country | Canada
I need to turn this into a hierarchical JSON:
{
"name": "Bob",
"address": {
"city": "Vancouver",
"country": "Canada"
}
}
Is there a way to do this easily within SQL?
jsonb_set() almost does everything for you, but unfortunately it can only create missing leafs (i.e. missing last keys on a path), but not whole missing branches. To overcome this, here is a modified version of it, which can set values on any missing levels:
create function jsonb_set_rec(jsonb, jsonb, text[])
returns jsonb
language sql
as $$
select case
when array_length($3, 1) > 1 and ($1 #> $3[:array_upper($3, 1) - 1]) is null
then jsonb_set_rec($1, jsonb_build_object($3[array_upper($3, 1)], $2), $3[:array_upper($3, 1) - 1])
else jsonb_set($1, $3, $2, true)
end
$$;
Now you only need to apply this function one-by-one to your rows, starting with an empty json object: {}. You can do this with either recursive CTEs:
with recursive props as (
(select distinct on (grp)
pk, grp, jsonb_set_rec('{}', to_jsonb(value), string_to_array(key, '.')) json_object
from eav_tbl
order by grp, pk)
union all
(select distinct on (grp)
eav_tbl.pk, grp, jsonb_set_rec(json_object, to_jsonb(value), string_to_array(key, '.'))
from props
join eav_tbl using (grp)
where eav_tbl.pk > props.pk
order by grp, eav_tbl.pk)
)
select distinct on (grp)
grp, json_object
from props
order by grp, pk desc;
Or, with a custom aggregate defined as:
create aggregate jsonb_set_agg(jsonb, text[]) (
sfunc = jsonb_set_rec,
stype = jsonb,
initcond = '{}'
);
your query could became as simple as:
select grp, jsonb_set_agg(to_jsonb(value), string_to_array(key, '.'))
from eav_tbl
group by grp;
https://rextester.com/TULNU73750
There are no ready to use tools for this. The function generates a hierarchical json object based on a path:
create or replace function jsonb_build_object_from_path(path text, value text)
returns jsonb language plpgsql as $$
declare
obj jsonb;
keys text[] := string_to_array(path, '.');
level int := cardinality(keys);
begin
obj := jsonb_build_object(keys[level], value);
while level > 1 loop
level := level- 1;
obj := jsonb_build_object(keys[level], obj);
end loop;
return obj;
end $$;
You also need the aggregate function jsonb_merge_agg(jsonb) described in this answer. The query:
with my_table (path, value) as (
values
('name', 'Bob'),
('address.city', 'Vancouver'),
('address.country', 'Canada'),
('first.second.third', 'value')
)
select jsonb_merge_agg(jsonb_build_object_from_path(path, value))
from my_table;
gives this object:
{
"name": "Bob",
"first":
{
"second":
{
"third": "value"
}
},
"address":
{
"city": "Vancouver",
"country": "Canada"
}
}
The function do not recognize json arrays.
I can't really think of something simpler, although I think there should be an easier way.
I assume there is some additional column that can be used to bring the keys that belong to one "person" together, I used p_id for that in my example.
select p_id,
jsonb_object_agg(k, case level when 1 then v -> k else v end)
from (
select p_id,
elements[1] k,
jsonb_object_agg(case cardinality(elements) when 1 then ky else elements[2] end, value) v,
max(cardinality(elements)) as level
from (
select p_id,
"key" as ky,
string_to_array("key", '.') as elements, value
from kv
) t1
group by p_id, k
) t2
group by p_id;
The innermost query just converts the dot notation to an array for easier access later.
The next level then builds JSON objects depending on the "key". For the "single level" keys, it just uses key/value, for the others it uses the second element + the value and then aggregates those that belong together.
The second query level returns the following:
p_id | k | v | level
-----+---------+--------------------------------------------+------
1 | address | {"city": "Vancouver", "country": "Canada"} | 2
1 | name | {"name": "Bob"} | 1
2 | address | {"city": "Munich", "country": "Germany"} | 2
2 | name | {"name": "John"} | 1
The aggregation done in the second step, leaves one level too much for the "single element" keys, and that's what we need level for.
If that distinction wasn't made, the final aggregation would return {"name": {"name": "Bob"}, "address": {"city": "Vancouver", "country": "Canada"}} instead of the wanted: {"name": "Bob", "address": {"city": "Vancouver", "country": "Canada"}}.
The expression case level when 1 then v -> k else v end essentially turns {"name": "Bob"} back to "Bob".
So, with the following sample data:
create table kv (p_id integer, "key" text, value text);
insert into kv
values
(1, 'name','Bob'),
(1, 'address.city','Vancouver'),
(1, 'address.country','Canada'),
(2, 'name','John'),
(2, 'address.city','Munich'),
(2, 'address.country','Germany');
then query returns:
p_id | jsonb_object_agg
-----+-----------------------------------------------------------------------
1 | {"name": "Bob", "address": {"city": "Vancouver", "country": "Canada"}}
2 | {"name": "John", "address": {"city": "Munich", "country": "Germany"}}
Online example: https://rextester.com/SJOTCD7977
create table kv (key text, value text);
insert into kv
values
('name','Bob'),
('address.city','Vancouver'),
('address.country','Canada'),
('name','John'),
('address.city','Munich'),
('address.country','Germany');
create view v_kv as select row_number() over() as nRec, key, value from kv;
create view v_datos as
select k1.nrec, k1.value as name, k2.value as address_city, k3.value as address_country
from v_kv k1 inner join v_kv k2 on (k1.nrec + 1 = k2.nrec)
inner join v_kv k3 on ((k1.nrec + 2= k3.nrec) and (k2.nrec + 1 = k3.nrec))
where mod(k1.nrec, 3) = 1;
select json_agg(json_build_object('name',name, 'address', json_build_object('city',address_city, 'country', address_country)))
from v_datos;

Converting JSON to table in SQL Server 2016

I'm working on a Web project where the client application communicates with the DB via JSONs.
The initial implementation took place with SQL Server 2012 (NO JSON support and hence we implemented a Stored Function that handled the parsing) and now we are moving to 2016 (YES JSON support).
So far, we are reducing processing time by a significant factor (in some cases, over 200 times faster!).
There are some interactions that contain arrays that need to be converted into tables. To achieve that, the OPENJSON function does ALMOST what we need.
In some of these (array-based) cases, records within the arrays have one or more fields that are also OBJECTS (in this particular case, also arrays), for instance:
[{
"Formal_Round_Method": "Floor",
"Public_Round_Method": "Closest",
"Formal_Precision": "3",
"Public_Precision": "3",
"Formal_Significant_Digits": "3",
"Public_Significant_Digits": "3",
"General_Comment": [{
"Timestamp": "2018-07-16 09:19",
"From": "1",
"Type": "Routine_Report",
"Body": "[To + Media + What]: Comment 1",
"$$hashKey": "object:1848"
}, {
"Timestamp": "2018-07-16 09:19",
"From": "1",
"Type": "User_Comment",
"Body": "[]: Comment 2",
"$$hashKey": "object:1857"
}, {
"Timestamp": "2018-07-16 09:19",
"From": "1",
"Type": "Routine_Report",
"Body": "[To + Media + What]: Comment 3",
"$$hashKey": "object:1862"
}]
}, {
"Formal_Round_Method": "Floor",
"Public_Round_Method": "Closest",
"Formal_Precision": "3",
"Public_Precision": "3",
"Formal_Significant_Digits": "3",
"Public_Significant_Digits": "3",
"General_Comment": []
}]
Here, General_Comment is also an array.
When running the command:
SELECT *
FROM OPENJSON(#_l_Table_Data)
WITH ( Formal_Round_Method NVARCHAR(16) '$.Formal_Round_Method' ,
Public_Round_Method NVARCHAR(16) '$.Public_Round_Method' ,
Formal_Precision INT '$.Formal_Precision' ,
Public_Precision INT '$.Public_Precision' ,
Formal_Significant_Digits INT '$.Formal_Significant_Digits' ,
Public_Significant_Digits INT '$.Public_Significant_Digits' ,
General_Comment NVARCHAR(4000) '$.General_Comment'
) ;
[#_l_Table_Data is a variable holding the JSON string]
we are getting the column General_Comment = NULL even though the is data in there (at least in the first element of the array).
I guess that I should be using a different syntax for those columns that may contain OBJECTS and not SIMPLE VALUES, but I have no idea what that syntax should be.
I found a Microsoft page that actually solves the problem.
Here is how the query should look like:
SELECT *
FROM OPENJSON(#_l_Table_Data)
WITH ( Formal_Round_Method NVARCHAR(16) '$.Formal_Round_Method' ,
Public_Round_Method NVARCHAR(16) '$.Public_Round_Method' ,
Formal_Precision INT '$.Formal_Precision' ,
Public_Precision INT '$.Public_Precision' ,
Formal_Significant_Digits INT '$.Formal_Significant_Digits' ,
Public_Significant_Digits INT '$.Public_Significant_Digits' ,
General_Comment NVARCHAR(MAX) '$.General_Comment' AS JSON
) ;
So, you need to add AS JSON at the end of the column definition and (God knows why) the type MUST be NVARCHAR(MAX).
Very simple indeed!!!
Create Function ParseJson:
Create or Alter FUNCTION [dbo].[ParseJson] (#JSON NVARCHAR(MAX))
RETURNS #Unwrapped TABLE
(
[id] INT IDENTITY, --just used to get a unique reference to each json item
[level] INT, --the hierarchy level
[key] NVARCHAR(100), --the key or name of the item
[Value] NVARCHAR(MAX),--the value, if it is a null, int,binary,numeric or string
type INT, --0 TO 5, the JSON type, null, numeric, string, binary, array or object
SQLDatatype sysname, --whatever the datatype can be parsed to
parent INT, --the ID of the parent
[path] NVARCHAR(4000) --the path as used by OpenJSON
)
AS begin
INSERT INTO #Unwrapped ([level], [key], Value, type, SQLDatatype, parent,
[path])
VALUES
(0, --the level
NULL, --the key,
#json, --the value,
CASE WHEN Left(ltrim(#json),1)='[' THEN 4 ELSE 5 END, --the type
'json', --SQLDataType,
0 , --no parent
'$' --base path
);
DECLARE #ii INT = 0,--the level
#Rowcount INT = -1; --the number of rows from the previous iteration
WHILE #Rowcount <> 0 --while we are still finding levels
BEGIN
INSERT INTO #Unwrapped ([level], [key], Value, type, SQLDatatype, parent,
[path])
SELECT [level] + 1 AS [level], new.[Key] AS [key],
new.[Value] AS [value], new.[Type] AS [type],
-- SQL Prompt formatting off
/* in order to determine the datatype of a json value, the best approach is to a determine
the datatype that can be parsed. It JSON, an array of objects can contain attributes that arent
consistent either in their name or value. */
CASE
WHEN new.Type = 0 THEN 'bit null'
WHEN new.[type] IN (1,2) then COALESCE(
CASE WHEN TRY_CONVERT(INT,new.[value]) IS NOT NULL THEN 'int' END,
CASE WHEN TRY_CONVERT(NUMERIC(14,4),new.[value]) IS NOT NULL THEN 'numeric' END,
CASE WHEN TRY_CONVERT(FLOAT,new.[value]) IS NOT NULL THEN 'float' END,
CASE WHEN TRY_CONVERT(MONEY,new.[value]) IS NOT NULL THEN 'money' END,
CASE WHEN TRY_CONVERT(DateTime,new.[value],126) IS NOT NULL THEN 'Datetime2' END,
CASE WHEN TRY_CONVERT(Datetime,new.[value],127) IS NOT NULL THEN 'Datetime2' END,
'nvarchar')
WHEN new.Type = 3 THEN 'bit'
WHEN new.Type = 5 THEN 'object' ELSE 'array' END AS SQLDatatype,
old.[id],
old.[path] + CASE WHEN old.type = 5 THEN '.' + new.[Key]
ELSE '[' + new.[Key] COLLATE DATABASE_DEFAULT + ']' END AS path
-- SQL Prompt formatting on
FROM #Unwrapped old
CROSS APPLY OpenJson(old.[Value]) new
WHERE old.[level] = #ii AND old.type IN (4, 5);
SELECT #Rowcount = ##RowCount;
SELECT #ii = #ii + 1;
END;
return
END
For Usage:
select * from ParseJson(jsonString)

Update every value in an array in postgres json

In my postgres database I have json that looks similar to this:
{
"myArray": [
{
"myValue": 1
},
{
"myValue": 2
},
{
"myValue": 3
}
]
}
Now I want to rename myValue to otherValue. I can't be sure about the length of the array! Preferably I would like to use something like set_jsonb with a wildcard as the array index, but that does not seem to be supported. So what is the nicest solution?
You have to decompose a whole jsonb object, modify individual elements and build the object back.
The custom function will be helpful:
create or replace function jsonb_change_keys_in_array(arr jsonb, old_key text, new_key text)
returns jsonb language sql as $$
select jsonb_agg(case
when value->old_key is null then value
else value- old_key || jsonb_build_object(new_key, value->old_key)
end)
from jsonb_array_elements(arr)
$$;
Use:
with my_table (id, data) as (
values(1,
'{
"myArray": [
{
"myValue": 1
},
{
"myValue": 2
},
{
"myValue": 3
}
]
}'::jsonb)
)
select
id,
jsonb_build_object(
'myArray',
jsonb_change_keys_in_array(data->'myArray', 'myValue', 'otherValue')
)
from my_table;
id | jsonb_build_object
----+------------------------------------------------------------------------
1 | {"myArray": [{"otherValue": 1}, {"otherValue": 2}, {"otherValue": 3}]}
(1 row)
Using json functions are definitely the most elegant, but you can get by on using character replacement. Cast the json(b) as text, perform the replace, then change it back to json(b). In this example I included the quotes and colon to help the text replace target the json keys without conflict with values.
CREATE TABLE mytable ( id INT, data JSONB );
INSERT INTO mytable VALUES (1, '{"myArray": [{"myValue": 1},{"myValue": 2},{"myValue": 3}]}');
INSERT INTO mytable VALUES (2, '{"myArray": [{"myValue": 4},{"myValue": 5},{"myValue": 6}]}');
SELECT * FROM mytable;
UPDATE mytable
SET data = REPLACE(data :: TEXT, '"myValue":', '"otherValue":') :: JSONB;
SELECT * FROM mytable;
http://sqlfiddle.com/#!17/1c28a/9/4

Automatically creating a table and inserting data from json file

I have nearly 50 json files. I want to create Postgres database based on these files. Each file contains data of one table. The files are not very large (at maximum several thousand records). Example data from customers.json (in fact there are more fields, I have simplified it):
[
{
"Id": 55948,
"FullName": "Full name #1",
"Address": "Address #1",
"Turnover": 120400.5,
"DateOfRegistration": "2014-02-13",
"LastModifiedAt": "2015-11-03 12:04:44" },
{
"Id": 55949,
"FullName": "Full name %2",
"Address": "Address #2",
"Turnover": 120000.0,
"DateOfRegistration": "2012-12-01",
"LastModifiedAt": "2015-11-04 17:14:21" }
]
I try to write a function which creates a table and inserts all the data to it. My attempt is based on dynamic query using EXECUTE:
CREATE OR REPLACE FUNCTION import_json(table_name text, data json)
RETURNS VOID AS $$
DECLARE
query text;
colname text;
BEGIN
query := 'CREATE TABLE ' || table_name || ' (';
FOR colname IN SELECT json_object_keys(data->0)
LOOP query := query || lower(colname) || ' text,';
END LOOP;
query := rtrim(query, ',') || ');';
EXECUTE(query);
END $$ LANGUAGE plpgsql;
My function creates a table with expected column names, but all columns are of type text. The problem is that I do not know how to define proper types of columns.
The json files are formatted well and contain integer, numeric, date, timestamp and text values. I would like to get the table:
CREATE TABLE customers (
id integer,
fullname text,
address text,
turnover numeric,
date_of_registration date,
last_modified_at timestamp);
The main question: how can I recognize types of columns in generated table?
Additionally, is there an easy way to transform Pascal to underscore notation ("DateOfRegistration" -> "date_of_registration")?
You can determine the type of a column by examining a value.
The function below formats column's definition from a pair (key, value).
It uses regex pattern matching.
It also transforms column's name to the notation with underscores (using regexp_replace() function).
Of course, the function wont work properly if the value represents NULL, so you have to check that the first json record has all not-null values.
create or replace function format_column(ckey text, cval text)
returns text language sql immutable as $$
select format('%s %s',
lower(regexp_replace(ckey, '(.)([A-Z])', '\1_\2', 'g')),
case
when cval ~ '^[\+-]{0,1}\d+$' then 'integer'
when cval ~ '^[\+-]{0,1}\d*\.\d+$' then 'numeric'
when cval ~ '^"\d\d\d\d-\d\d-\d\d"$' then 'date'
when cval ~ '^"\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d"$' then 'timestamp'
else 'text'
end
)
$$;
select format_column(key, value)
from (
values
('Id', '55948'),
('FullName', '"Full name #1"'),
('Turnover', '120400.5'),
('DateOfRegistration', '"2014-02-13"')
) val(key, value);
format_column
---------------------------
id integer
full_name text
turnover numeric
date_of_registration date
(4 rows)
In the main function you do not need variables or loops.
Use format() function to format strings with parameters and string_agg() to create textual lists.
Since you need both keys and values, use json_each() instead of json_object_keys(). In the second query you can use row_number() to ensure that the aggregated list of values is divided for consecutive records.
create or replace function import_table(table_name text, jdata json)
returns void language plpgsql as $$
begin
execute format('create table %s (%s)', table_name, string_agg(col, ', '))
from (
select format_column(key::text, value::text) col
from json_each(jdata->0)
) sub;
execute format('insert into %s values %s', table_name, string_agg(val, ','))
from (
with lines as (
select row_number() over () rn, line
from (
select json_array_elements(jdata) line
) sub
)
select rn, format('(%s)', string_agg(value, ',')) val
from (
select rn, format('%L', trim(value::text, '"')) as value
from lines, json_each(line)
) sub
group by 1
) sub;
end $$;
Test:
select import_table('customers',
'[{ "Id": 55948,
"FullName": "Full name #1",
"Address": "Address #1",
"Turnover": 120400.5,
"DateOfRegistration": "2014-02-13",
"LastModifiedAt": "2015-11-03 12:04:44" },
{ "Id": 55949,
"FullName": "Full name %2",
"Address": "Address #2",
"Turnover": 120000.0,
"DateOfRegistration": "2012-12-01",
"LastModifiedAt": "2015-11-04 17:14:21" }]');
\d customers
Table "public.customers"
Column | Type | Modifiers
----------------------+-----------------------------+-----------
id | integer |
full_name | text |
address | text |
turnover | numeric |
date_of_registration | date |
last_modified_at | timestamp without time zone |
select * from customers;
id | full_name | address | turnover | date_of_registration | last_modified_at
-------+--------------+------------+----------+----------------------+---------------------
55948 | Full name #1 | Address #1 | 120400.5 | 2014-02-13 | 2015-11-03 12:04:44
55949 | Full name %2 | Address #2 | 120000.0 | 2012-12-01 | 2015-11-04 17:14:21
(2 rows)