I want to merge JSON string in a table and sum its value after group
for Eg:-
2023, {"hen":4, "owl":3}
2023, {"crow":4, "owl":2}
2022, {"owl":6, "crow":2}
2022, {"hen":5}
2021, {"hen":2, "crow":1}
Result could be like below
2023, {"hen":4, "owl":5, "crow":4}
2022, {"hen":5, "owl":6, "crow":2}
2021, {"hen":2, "crow":1}
below might be an option when you don't know json object keys beforehand.
WITH sample_table AS (
SELECT 2023 year, '{"hen":4, "owl":3}' json UNION ALL
SELECT 2023, '{"crow":4, "owl":2}' UNION ALL
SELECT 2022, '{"owl":6, "crow":2}' UNION ALL
SELECT 2022, '{"hen":5}' UNION ALL
SELECT 2021, '{"hen":2, "crow":1}'
)
SELECT year, '{' || STRING_AGG(k || ':' || v, ', ') || '}' json FROM (
SELECT year,
SPLIT(kv, ':')[OFFSET(0)] k,
SUM(SAFE_CAST(SPLIT(kv, ':')[OFFSET(1)] AS INT64)) v
FROM sample_table, UNNEST(SPLIT(TRIM(json, '{}'), ', ')) kv
GROUP BY 1, 2
) GROUP BY 1;
Query results
Consider also below approach
create temp function get_keys(input string) returns array<string> language js as """
return Object.keys(JSON.parse(input));
""";
create temp function get_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));
""";
select distinct year,
'{' || string_agg(format('"%s":%i', key ,sum(cast(val as int64))), ', ') over(partition by year) || '}' json
from your_table, unnest(get_keys(json)) key with offset
join unnest(get_values(json)) val with offset
using (offset)
group by year, key
if applied to sample data in your question - output is
Related
I have the following document
{
"agentId": "agent2",
"date": "2022-08-30",
"metaData": {
"documentCreationDate": "2022-08-30T15:49:21Z",
"documentVersion": "1.0",
"expiry": 1662479361
},
"emailInteractions": [
"0c99ea2a-c235-4c5a-a0bd-aeffba559bca",
"12846a9d-7cc1-4755-b527-cd8aee9d2de4"
],
"voiceInteractions": [
"1c99ea2a-c235-4c5a-a0bd-aeffba559bca",
"22846a9d-7cc1-4755-b527-cd8aee9d2de4"
]
}
And I'm trying to retrieve a list of ids found in emailInteraction and/or voiceInteraction, I'm currently doing the following
SELECT ARRAY_UNION(IFMISSINGORNULL(emailInteractions, []), IFMISSINGORNULL(voiceInteractions, [])) AS ids
FROM `agent-activities`
WHERE agentId = "agent2"
My issue is that it returns this :
[
[
"12846a9d-7cc1-4755-b527-cd8aee9d2de4",
"22846a9d-7cc1-4755-b527-cd8aee9d2de4",
"0c99ea2a-c235-4c5a-a0bd-aeffba559bca",
"1c99ea2a-c235-4c5a-a0bd-aeffba559bca"
]
]
and it bothers me because I need a plain array of strings and not an array of array of strings, because I need to use this list of ids in a where clause in another query with something along the lines of :
WHERE lst.interactionId IN (SELECT raw ARRAY_UNION(IFMISSINGORNULL(emailInteractions, []), IFMISSINGORNULL(voiceInteractions, [])) AS ids
FROM `agent-activities`
WHERE agentId = $agentId)
FYI my main query is this :
SELECT lst.*
FROM (
SELECT iv.id AS interventionId,
vi.direction,
vi.channel,
vi.startDate AS startDate,
vi.id AS interactionId,
vi.customerProfileId
FROM `voice-interactions` AS vi
UNNEST vi.distributions AS dv
UNNEST dv.interventions AS iv
UNION
SELECT ie.id AS interventionId,
ei.direction,
ei.channel,
ei.startDate AS startDate,
ei.id AS interactionId,
ei.customerProfileId
FROM `email-interactions` AS ei
UNNEST ei.distributions AS de
UNNEST de.interventions AS ie) AS lst
WHERE lst.interactionId IN (
SELECT raw ARRAY_UNION(IFMISSINGORNULL(emailInteractions, []), IFMISSINGORNULL(voiceInteractions, [])) AS ids
FROM `agent-activities`
WHERE agentId = $agentId)
ORDER BY startDate ASC
LIMIT $limit
OFFSET $offset
I could use some help to either flatten my array of array into an array somehow or find a better solution than this where clause with the 'property in subquery'
WITH interactionIds AS ( SELECT DISTINCT RAW u
FROM `agent-activities` AS a
UNNEST ARRAY_CONCAT(IFMISSINGORNULL(a.emailInteractions, []), IFMISSINGORNULL(a.voiceInteractions, [])) AS u
WHERE a.agentId = $agentId)
)
SELECT lst.*
FROM (
SELECT iv.id AS interventionId,
vi.direction,
vi.channel,
vi.startDate AS startDate,
vi.id AS interactionId,
vi.customerProfileId
FROM `voice-interactions` AS vi
UNNEST vi.distributions AS dv
UNNEST dv.interventions AS iv
UNION
SELECT ie.id AS interventionId,
ei.direction,
ei.channel,
ei.startDate AS startDate,
ei.id AS interactionId,
ei.customerProfileId
FROM `email-interactions` AS ei
UNNEST ei.distributions AS de
UNNEST de.interventions AS ie) AS lst
WHERE lst.interactionId IN interactionIds
ORDER BY startDate ASC
LIMIT $limit
OFFSET $offset;
Also you can use ARRAY_DISTINCT(ARRAY_FLATTEN((subquery),1))
NOTE: In function argument you need () around subquery treat it as expression
As you can see below I have Name column. I want to split it by / and return the value in array.
MyTable
Id
Name
1
John/Warner/Jacob
2
Kol
If I write a query as
Select Id, Name from MyTable
it will return
{
"id": 1,
"name": "John/Warner/Jacob",
},
{
"id": 2,
"name": "Kol",
},
Which query should I write to get below result ?
{
"id": 1,
"name": ["John", "Warner", "Jacob"],
},
{
"id": 2,
"name": ["Kol"] ,
},
Don't think you can return an array in the query itself, but you could do this...
SELECT id,
SUBSTRING_INDEX(name, '/', 1)
AS name_part_1,
SUBSTRING_INDEX(name, '/', -1)
AS name_part_2
FROM tableName;
Only way to build it as an array would be when processing the result accordingly in whatever language you are using.
You can define a function split, which is based on the fact that substring_index(substring_index(name,'/',x),'/',-1) will return the x-th part of a name when separated by '/'.
CREATE FUNCTION `test`.`SPLIT`(s varchar(200), c char, i integer) RETURNS varchar(200) CHARSET utf8mb4
DETERMINISTIC
BEGIN
DECLARE retval varchar(200);
WITH RECURSIVE split as (
select 1 as x,substring_index(substring_index(s,c,1),c,-1) as y, s
union all
select x+1,substring_index(substring_index(s,c,x+1),c,-1),s from split where x<= (LENGTH(s) - LENGTH(REPLACE(s,c,'')))
)
SELECT y INTO retval FROM split WHERE x=i ;
return retval;
END
and then do:
with mytable as (
select 1 as Id, 'John/Warner/Jacob' as Name
union all
select 2, 'Kol')
select
id, split(Name,'/',x) as name
from mytable
cross join (select 1 as x union all select 2 union all select 3) x
order by id, name;
output:
Id
name
1
Jacob
1
John
1
Warner
2
[NULL]
2
[NULL]
2
Kol
It is, of course, possible to refine this, and leave out the NULL values ...
I will not convert this output to JSON for you ...
my dataframe looks like this:
id value
a 0:3,1:0,2:0,3:4
a 0:0,1:0,2:2,3:0
a 0:0,1:5,2:4,3:0
I want to write a query to get average values of keys in column value?
So for example for 0:3,1:0,2:0,3:4 it must be (0+0+0+3+3+3+3)/7 = 1.71.
For 0:0,1:0,2:2,3:0 it must be (2+2)/2=2.
For 0:0,1:5,2:4,3:0 it must be (1+1+1+1+1+2+2+2+2)/9 = 1.44.
So desired result is:
id value
a 1.71
a 2.00
a 1.44
How to do that? Are there sql functions to get this result?
See this DBFIDDLE
code:
CREATE PROCEDURE `avg_dict`(s varchar(100))
BEGIN
SET #result = CONCAT('SELECT (', replace(replace(s, ":","*"),",","+"), ')/(',regexp_replace(s,",?[0-9]:","+"),')');
PREPARE stmt FROM #result;
EXECUTE stmt ;
DEALLOCATE PREPARE stmt;
END
results:
stmt
output
CALL avg_dict("0:3,1:0,2:0,3:4");
1.1743
CALL avg_dict("0:0,1:0,2:2,3:0");
2.0000
CALL avg_dict("0:0,1:5,2:4,3:0");
1.4444
With some combination of split's, transforms and repeat you can achieve your goal:
WITH dataset(id, value) AS (
values ('a', '0:3,1:0,2:0,3:4'),
('a', '0:0,1:0,2:2,3:0'),
('a', '0:0,1:5,2:4,3:0')
)
SELECT id,
reduce(arr, 0.0, (s, x)->s + x, s->s) / cardinality(arr)
FROM(
SELECT *,
flatten(
transform(
transform(
split(value, ','),
s->split(s, ':')
),
arr->repeat(
cast(arr [ 1 ] as INTEGER),
cast(arr [ 2 ] as INTEGER)
)
)
) as arr
FROM dataset
)
Output:
id
_col1
a
1.7142857142857142
a
2.0
a
1.4444444444444444
Note:
Outer select can be substituted with array_average but I used he select cause Athena's version of Presto does not support it.
UPD
Another version which can be more performant:
SELECT id,
reduce(
arr,
CAST(ROW(0.0, 0) AS ROW(sum DOUBLE, count INTEGER)),
(s, r)->CAST(
ROW(r.num * r.count + s.sum, s.count + r.count) AS ROW(sum DOUBLE, count INTEGER)
),
s->IF(s.count = 0, NULL, s.sum / s.count)
)
FROM(
SELECT *,
transform(
split(value, ','),
s->CAST(
ROW(
CAST(split(s, ':') [ 1 ] AS DOUBLE),
(CAST(split(s, ':') [ 2 ] AS INTEGER))
) AS ROW(num DOUBLE, count INTEGER)
)
) as arr
FROM dataset
)
I have a json field in a table as below, i am unable to query the "day" from it :
{"FID":54,"header_json":"{\"date\":{\"day\":2,\"month\":6,\"year\":2020},\"amt\":10,\"count\":1}"}
SQL tried:
select jt.*
from order_json o,
json_table(o.order_json,'$.header_json.date[*]'
columns ("day" varchar2(2) path '$.day')) as jt;
That's pretty easy: as you can see header_json is just a string, not usual nested json. So you need to get this quoted string and parse as a json again:
select *
from
(
select--+ no_merge
jh.*
from order_json o,
json_table(o.order_json,'$.header_json[*]'
columns (
header_json varchar2(200) path '$')
) as jh
) headers,
json_table(headers.header_json,'$.date[*]'
columns (
"day" varchar2(2) path '$.day')
) as j
;
Full example with sample data:
-- sample data:
with order_json(order_json) as (
select
'{"FID":54,"header_json":"{\"date\":{\"day\":2,\"month\":6,\"year\":2020},\"amt\":10,\"count\":1}"}'
from dual
)
-- main query
select *
from
(
select--+ no_merge
jh.*
from order_json o,
json_table(o.order_json,'$.header_json[*]'
columns (
header_json varchar2(200) path '$')
) as jh
) headers,
json_table(headers.header_json,'$.date[*]'
columns (
"day" varchar2(2) path '$.day')
) as j
;
i have JSON string in one column in oracle 10g database like
[{"id":"1","contactBy":"Rajesh Kumar"},{"id":"2","contactBy":"Rakesh Kumar"}]
I have to get the value for ContactBy in that column for one of the reports.
is there any built in function to parse the JSON string in Oracle 10g or any user defined funciton to parse the String
As said by Jens in comments, JSON support is only available from 12c, but you can use regular expressions as a workaround to get what you want:
select regexp_replace(regexp_substr('[{"id": "1", "contactBy":"Rajesh Kumar"},{"id": "2","contactBy": "Emmanuel Test"}]',
'"contactBy":\s*("(\w| )*")', 1, level),
'"contactBy":\s*"((\w| )*)"', '\1', 1, 1) contact
from dual
connect by regexp_substr('[{"id": "1","contactBy":"Rajesh Kumar"},{"id": "2","contactBy": "Emmanuel Test"}]', '"contactBy":\s*("(\w| )*")', 1, level) is not null
;
EDIT : request modified to take both special characters and display answers in a single row:
select listagg(contact, ', ') within group (order by lev)
from
(
select regexp_replace(regexp_substr('[{"id": "1", "contactBy":"Rajesh Kumar"},{"id": "2","contactBy": "Emmanuel Test+-"}]',
'"contactBy":\s*(".*?")', 1, level),
'"contactBy":\s*"(.*?)"', '\1', 1, 1) contact, level lev
from dual
connect by regexp_substr('[{"id": "1","contactBy":"Rajesh Kumar"},{"id": "2","contactBy": "Emmanuel Test+-"}]', '"contactBy":\s*(".*?")', 1, level) is not null
)
;
# Emmanuel your code is really helped a lot, thank you very much. but your query is taking too much of time, so i changed to a function , which will return the required values.
CREATE OR REPLACE FUNCTION SFGETCRCONTACTBY(INCRID NUMBER) RETURN VARCHAR2 AS
TEMPINT NUMBER :=0;
OUTPUT VARCHAR2(10000) ;
TEMPVAR VARCHAR2(1000);
BEGIN
SELECT REGEXP_COUNT(CR_CONTACT_BY, '"contactBy":\S*(".*?")')
INTO TEMPINT
FROM T_LOAN_REQUEST_MARKET WHERE CR_ID=INCRID;
WHILE TEMPINT > 0
LOOP
SELECT REGEXP_REPLACE(REGEXP_SUBSTR(CR_CONTACT_BY, '"contactBy":\S*(".*?")', 1,TEMPINT), '"contactBy":\S*"(.*?)"', '\1', 1, 1) INTO TEMPVAR
FROM T_LOAN_REQUEST_MARKET WHERE CR_ID=INCRID;
IF OUTPUT IS NULL THEN
OUTPUT := TEMPVAR;
ELSE
OUTPUT := OUTPUT ||',' || TEMPVAR;
END IF;
TEMPINT := TEMPINT-1;
END LOOP;
RETURN OUTPUT;
END;
/