Extract value from JSON ARRAY in BigQuery - json

I am trying to extract value from JSON ARRAY as below
with `project.dataset.table` as (
select '{"fruit":[{"apples":"5","oranges":"10","pear":"20"},
{"apples":"5","oranges":"4"},
{"apples":"5","oranges":"9","pear":"40"}]}' as json union all
select '{"fruit":[{"lettuce":"7","kale": "8"}]}'
)
select json, if(regexp_contains(json, '"apples":"5"'), (SELECT
ARRAY_AGG(json_extract_scalar(arr, '$.oranges') ignore nulls)
from
UNNEST(json_extract_ARRAY(json, '$.fruit')) as arr ), null) as oranges,
if(regexp_contains(json, '"apples":"5"'), (SELECT
ARRAY_AGG(json_extract_scalar(arr, '$.pear') ignore nulls)
from
UNNEST(json_extract_ARRAY(json, '$.fruit')) as arr ), null) as pear,
from `project.dataset.table`
It gives output as below
I am expecting output like
json
oranges
pear
{"fruit":[{"apples":"5","oranges":"10","pear":"20"},{"apples":"5","oranges":"4"},{"apples":"5","oranges":"9","pear":"40"}]}
10
20
4
null
9
40
{"fruit":[{"lettuce":"7","kale": "8"}]}.
null
null

Consider below approach with more explicit alignment of respective entries
select json,
array(
select
if(
json_extract_scalar(val, '$.apples') = '5',
struct(
json_extract_scalar(val, '$.oranges') as oranges,
json_extract_scalar(val, '$.pear') as pear
),
struct(null, null)
)
from t.arr val
) fruits
from `project.dataset.table`,
unnest([struct(json_extract_array(json, '$.fruit') as arr)]) t
It is less verbose and has output of fruits as a repeated record as opposed to two arrays
If applied to sample data in your question output is
In case if you really need to keep output is separate columns - use below
select json,
array(
select
if(
json_extract_scalar(val, '$.apples') = '5',
ifnull(json_extract_scalar(val, '$.oranges'), '0'),
'0'
)
from t.arr val
) as oranges,
array(
select
if(
json_extract_scalar(val, '$.apples') = '5',
ifnull(json_extract_scalar(val, '$.pear'), '0'),
'0'
)
from t.arr val
) as pear
from `project.dataset.table`,
unnest([struct(json_extract_array(json, '$.fruit') as arr)]) t
with output

When working with array_agg
an error is raised if an array in the final query result contains a
NULL element.
You can try using a string 'null' like this:
with `project.dataset.table` as (
select '{"fruit":[{"apples":"5","oranges":"10","pear":"20"},{"apples":"5","oranges":"4"},{"apples":"5","oranges":"9","pear":"40"}]}' as json union all
select '{"fruit":[{"lettuce":"7","kale": "8"}]}'
)
select
json,
if( regexp_contains(json, '"apples":"5"'),
(SELECT ARRAY_AGG(ifnull(json_extract_scalar(arr, '$.oranges'), 'null')) from UNNEST(json_extract_ARRAY(json, '$.fruit')) as arr ),
['null']
) as oranges,
if( regexp_contains(json, '"apples":"5"'),
(SELECT ARRAY_AGG(ifnull(json_extract_scalar(arr, '$.pear'), 'null')) from UNNEST(json_extract_ARRAY(json, '$.fruit')) as arr ),
['null']
) as pear,
from `project.dataset.table`

Related

How to I prevent repetitive columns in FOR JSON PATH SQL queries?

So, I'm trying to get data from MSSQL to update some fields in an HTML form, which includes 1 checkbox and a set of options for a select input.
I thought I was being smart by writing my query as shown below. It gets BOTH the two fields at once, instead of two independent queries... I mean, it's OKAY, but I have a lot of repeated items.
Is there a way to flatten this out?
// how do I flatten this
{
"Calculated": [
{
"Calculated": false
}
],
"Schedule": [
{
"Schedule": "THX-1138"
},
{
"Schedule": "LUH-3417"
},
{
"Schedule": "SEN-5241"
}
]
}
// into something more like this?
{
"Calculated": false,
"Schedule": [
"THX-1138",
"LUH-3417",
"SEN-5241"
]
}
here is the query:
declare
#EffectDate smalldatetime = '07-01-2012'
,#Grade varchar(3) = '001'
,#Schedule varchar(9) = 'THX-1138'
,#Step smallint = '15'
,#jsonResponse nvarchar(max)
;
select #jsonResponse = (
select
[Calculated] =
(
select
b.Calculated
from
tblScalesHourly a
inner join
tblSchedules b
on a.EffectDate = b.EffectDate
and a.Schedule = b.Schedule
where
a.EffectDate = #EffectDate
and a.Schedule = #Schedule
and a.Grade = #Grade
and a.Step = #Step
for json path
)
,[Schedule] =
(
select
Schedule
from
tblSchedules
where
EffectDate = #EffectDate
and Calculated = 0
order by
Schedule asc
for json path
)
for json path, without_array_wrapper
)
It's probably a late answer, but I'm able to reproduce this issue with the following test data:
declare #jsonResponse nvarchar(max)
select #jsonResponse = (
select
[Calculated] =
(
select CONVERT(bit, 0) AS Calculated
for json path
)
,
[Schedule] =
(
select Schedule
from (values ('THX-1138'), ('LUH-3417'), ('SEN-5241')) tblSchedules (Schedule)
order by Schedule asc
for json path
)
for json path, without_array_wrapper
)
You can get the expected results with the following approach. Note, that you can't generate a JSON array of scalar values using FOR JSON, so you need to use a string aggregation (FOR XML PATH('') for SQL Server 2016 or STRING_AGG() for SQL Server 2017+):
select #jsonResponse = (
select
[Calculated] = (
select CONVERT(bit, 0) AS Calculated
)
,
[Schedule] = JSON_QUERY(CONCAT(
'["',
STUFF(
(
select CONCAT('","', Schedule)
from (values ('THX-1138'), ('LUH-3417'), ('SEN-5241')) tblSchedules (Schedule)
order by Schedule asc
for xml path('')
), 1, 3, ''
),
'"]'
))
for json path, without_array_wrapper
)
Output:
{"Calculated":false,"Schedule":["LUH-3417","SEN-5241","THX-1138"]}
And finally, using the statement from the question (not tested):
declare
#EffectDate smalldatetime = '07-01-2012'
,#Grade varchar(3) = '001'
,#Schedule varchar(9) = 'THX-1138'
,#Step smallint = '15'
,#jsonResponse nvarchar(max)
;
select #jsonResponse = (
select
[Calculated] = (
select
b.Calculated
from
tblScalesHourly a
inner join
tblSchedules b
on a.EffectDate = b.EffectDate
and a.Schedule = b.Schedule
where
a.EffectDate = #EffectDate
and a.Schedule = #Schedule
and a.Grade = #Grade
and a.Step = #Step
),
[Schedule] = JSON_QUERY(CONCAT(
'["',
STUFF(
(
select CONCAT('","', Schedule)
from
tblSchedules
where
EffectDate = #EffectDate
and Calculated = 0
for xml path('')
), 1, 3, ''
),
'"]'
))
for json path, without_array_wrapper
)

Split comma separated values in a particular comma postion using SQL or SSRS report

I have a field in SSRS that is concatenated values like
1234,1456,3456,7890,3457,3245,4345
I need to break/split after 8th comma or in a particular position in next row like:
1234,1456,3456,
7890,3457,3245,
4345
Here values are dynamic but, we have to split/break at every 8th or particular comma
In your example text, all the values have four characters. If that is the case, a simple recursive CTE does what you want:
with cte as (
select convert(varchar(max), NULL) as val, convert(varchar(max), field) as rest, 0 as lev
from t
union all
select left(rest, 15) as val, stuff(rest, 1, 15, '') as rest, lev+1
from cte
where rest <> ''
)
select val
from cte
where lev > 0;
Here is a db<>fiddle.
Once you've grabbed a copy of DelimitedSplit8K_LEAD (as STRING_SPLIT has no concept of ordinal positions) you can split the string and then "re-aggregate" it.
Using SQL Server 2017+:
DECLARE #YourString varchar(8000) = '1234,1456,3456,7890,3457,3245,4345';
WITH Split AS(
SELECT DS.Item,
DS.ItemNumber,
(DS.ItemNumber - 1) / 3 AS Grp
FROM dbo.DelimitedSplit8K_LEAD(#YourString,',') DS)
SELECT STRING_AGG(S.Item,',') WITHIN GROUP (ORDER BY S.ItemNumber ASC) AS NewString
FROM Split S
GROUP BY S.Grp;
SQL Server 2016-:
DECLARE #YourString varchar(8000) = '1234,1456,3456,7890,3457,3245,4345';
WITH Split AS(
SELECT DS.Item,
DS.ItemNumber,
(DS.ItemNumber - 1) / 3 AS Grp
FROM dbo.DelimitedSplit8K_LEAD(#YourString,',') DS)
SELECT STUFF((SELECT ',' + sq.Item
FROM Split sq
WHERE sq.Grp = S.Grp
ORDER BY sq.ItemNumber
FOR XML PATH(''),TYPE).value('.','varchar(8000)'),1,1,'') AS MewString
FROM Split S
GROUP BY S.Grp;
If you use SQL Server 2016+, you may use an approach, based on JSON. Just transform the input text into a valid JSON array and parse this array with OPENJSON():
Example with text:
Statement:
DECLARE #json nvarchar(max) = N'1234,1456,3456,7890,3457,3245,4345'
SELECT CONCAT(
MAX(CASE WHEN CONVERT(int, [key]) % 3 = 0 THEN [value] END),
MAX(CASE WHEN CONVERT(int, [key]) % 3 = 1 THEN [value] END),
MAX(CASE WHEN CONVERT(int, [key]) % 3 = 2 THEN [value] END)
) AS OutputText
FROM OPENJSON(CONCAT(N'["', REPLACE(#json, N',', N',","'), N'"]'))
GROUP BY (CONVERT(int, [key]) / 3)
Result:
---------------
OutputText
---------------
1234,1456,3456,
7890,3457,3245,
4345
Example with table:
Table:
CREATE TABLE Data (TextData nvarchar(max))
INSERT INTO Data (TextData)
VALUES (N'1234,1456,3456,7890,3457,3245,4345')
Statement:
SELECT d.TextData, c.OutputData
FROM Data d
CROSS APPLY (
SELECT CONCAT(
MAX(CASE WHEN CONVERT(int, [key]) % 3 = 0 THEN [value] END),
MAX(CASE WHEN CONVERT(int, [key]) % 3 = 1 THEN [value] END),
MAX(CASE WHEN CONVERT(int, [key]) % 3 = 2 THEN [value] END)
) AS OutputData
FROM OPENJSON(CONCAT(N'["', REPLACE(d.TextData, N',', N',","'), N'"]'))
GROUP BY (CONVERT(int, [key]) / 3)
) c
Result:
---------------------------------------------------
TextData OutputData
---------------------------------------------------
1234,1456,3456,7890,3457,3245,4345 1234,1456,3456,
1234,1456,3456,7890,3457,3245,4345 7890,3457,3245,
1234,1456,3456,7890,3457,3245,4345 4345

Hierarchical JSON output from table

I've got this table structure
| User | Type | Data |
|------|---------|------|
| 1 | "T1" | "A" |
| 1 | "T1" | "B" |
| 1 | "T2" | "C" |
| 2 | "T1" | "D" |
I want to get a hierarchical JSON string returned from my query
{
"1": {
"T1": [
"A",
"B"
],
"T2": [
"C"
]
},
"2": {
"T1": [
"D"
]
}
}
So one entry for each User with a sub-entry for each Type and then a sub-entry for each Data
All I'm finding is the FOR JSON PATH, ROOT ('x') or AUTO statement but nothing that would make this hierarchical. Is this even possible out of the box? I couldn't find anything so I've experimented with (recursive) CTE but didn't get very far. I'd much appreciate if someone could just point me in the right direction.
I'm not sure that you can create JSON with variable key names using FOR JSON AUTO and FOR JSON PATH. I suggest the following solutions:
using FOR XML PATH to generate JSON with string manipulations
using STRING_AGG() to generate JSON with string manipulations for SQL Server 2017+
using STRING_AGG() and JSON_MODIFY() for SQL Server 2017+
Table:
CREATE TABLE #InputData (
[User] int,
[Type] varchar(2),
[Data] varchar(1)
)
INSERT INTO #InputData
([User], [Type], [Data])
VALUES
(1, 'T1', 'A'),
(1, 'T1', 'B'),
(1, 'T2', 'C'),
(2, 'T1', 'D')
Statement using FOR XML PATH:
;WITH SecondLevelCTE AS (
SELECT
d.[User],
d.[Type],
Json1 = CONCAT(
'[',
STUFF(
(
SELECT CONCAT(',"', [Data], '"')
FROM #InputData
WHERE [User] = d.[User] AND [Type] = d.[Type]
FOR XML PATH('')
), 1, 1, ''),
']')
FROM #InputData d
GROUP BY d.[User], d.[Type]
), FirstLevelCTE AS (
SELECT
d.[User],
Json2 = CONCAT(
'{',
STUFF(
(
SELECT CONCAT(',"', [Type], '":', [Json1])
FROM SecondLevelCTE
WHERE [User] = d.[User]
FOR XML PATH('')
), 1, 1, ''),
'}'
)
FROM SecondLevelCTE d
GROUP BY d.[User]
)
SELECT CONCAT(
'{',
STUFF(
(
SELECT CONCAT(',"', [User], '":', Json2)
FROM FirstLevelCTE
FOR XML PATH('')
), 1, 1, '') ,
'}'
)
Statement using STRING_AGG():
;WITH SecondLevelCTE AS (
SELECT
d.[User],
d.[Type],
Json1 = (
SELECT CONCAT('["', STRING_AGG([Data], '","'), '"]')
FROM #InputData
WHERE [User] = d.[User] AND [Type] = d.[Type]
)
FROM #InputData d
GROUP BY d.[User], d.[Type]
), FirstLevelCTE AS (
SELECT
d.[User],
Json2 = (
SELECT STRING_AGG(CONCAT('"', [Type], '":', [Json1]), ',')
FROM SecondLevelCTE
WHERE [User] = d.[User]
)
FROM SecondLevelCTE d
GROUP BY d.[User]
)
SELECT CONCAT('{', STRING_AGG(CONCAT('"', [User], '":{', Json2, '}'), ','), '}')
FROM FirstLevelCTE
Statement using STRING_AGG() and JSON_MODIFY():
DECLARE #json nvarchar(max) = N'{}'
SELECT
#json = JSON_MODIFY(
CASE
WHEN JSON_QUERY(#json, CONCAT('$."', [User] , '"')) IS NULL THEN JSON_MODIFY(#json, CONCAT('$."', [User] , '"'), JSON_QUERY('{}'))
ELSE #json
END,
CONCAT('$."', [User] , '".', [Type]),
JSON_QUERY(Json)
)
FROM (
SELECT
d.[User],
d.[Type],
Json = (
SELECT CONCAT('["', STRING_AGG([Data], '","'), '"]')
FROM #InputData
WHERE [User] = d.[User] AND [Type] = d.[Type]
)
FROM #InputData d
GROUP BY d.[User], d.[Type]
) t
Output:
{"1":{"T1":["A","B"],"T2":["C"]},"2":{"T1":["D"]}}
This isn't exactly what you want (I'm not great with FOR JSON) but it does get you close to the shape you need until something better comes along...
(https://jsonformatter.org/json-parser/974b6b)
use tempdb
GO
drop table if exists users
create table users (
[user] integer
, [type] char(2)
, [data] char(1)
)
insert into users
values (1, 'T1', 'A')
, (1, 'T1', 'B')
, (1, 'T2', 'C')
, (2, 'T1', 'D')
select DISTINCT ONE.[user], two.[type], three.[data]
from users AS ONE
inner join users two
on one.[user] = two.[user]
inner join users three
on one.[user] = three.[user]
and two.[type] = three.[type]
for JSON AUTO

Compare Months list with date field of table

I am using sql server 2008
I have table in my database is like this:
And I want output like this:
As it is shown in my table I have DateField which has smalldatetime datatype and along with fruits and vegi fields. I want output like which shows data month-wise.. month comparison should be performed based on DateField of my table.
You can use something like:
select [Month] = month(DateField)
, [MonthName] = left(datename(mm, DateField), 3)
, TotalAmountApple = sum(case when fruits = 'Apple' then 1 else 0 end)
, TotalAmountOnion = sum(case when vegi = 'Onion' then 1 else 0 end)
from produce
group by month(DateField)
, left(datename(mm, DateField), 3)
order by [Month]
Full test details (no SQL Fiddle as it's experiencing issues):
create table produce
(
id int
, fruits varchar(10)
, vegi varchar(10)
, DateField smalldatetime
)
insert into produce
select 1, 'Apple', 'Chilly', '01-jan-2013'
insert into produce
select 1, 'Mango', 'Onion', '15-jan-2013'
insert into produce
select 1, 'Mango', 'Chilly', '20-jan-2013'
insert into produce
select 1, 'Apple', 'Chilly', '01-Feb-2013'
insert into produce
select 1, 'Mango', 'Onion', '15-Feb-2013'
insert into produce
select 1, 'Apple', 'Onion', '20-Feb-2013'
select [Month] = month(DateField)
, [MonthName] = left(datename(mm, DateField), 3)
, TotalAmountApple = sum(case when fruits = 'Apple' then 1 else 0 end)
, TotalAmountOnion = sum(case when vegi = 'Onion' then 1 else 0 end)
from produce
group by month(DateField)
, left(datename(mm, DateField), 3)
order by [Month]

Recursive Fill Calculations with CTE or anything efficient

Please help me with ideas (preferably CTE) to solve this as efficient as possible.
So... In the table shown, the cells in column "Value" which are red are the known values
and the highlighted greens are values to be calculated with formulas shown next to them.
I am trying to see if this is possible with CTEs at all.
It's like the last known value and its respective interval; the next known value and the respective interval; and the interval for which the value is calculated for; all are used to find the value which then intern will be used the very same way for the next unknown value.
Here is a solution.
Hope it helps. :)
;with testdata(store,shipntrvl,value)
as
(
select 'abc', 1, 0.56
union all
select 'abc', 5, null
union all
select 'abc', 10, 0.63
union all
select 'abc', 15, null
union all
select 'abc', 20, null
union all
select 'abc', 25, null
union all
select 'abc', 30, 0.96
union all
select 'xyz', 1, 0.36
union all
select 'xyz', 5, 0.38
union all
select 'xyz', 10, null
union all
select 'xyz', 15, 0.46
union all
select 'xyz', 20, null
union all
select 'xyz', 25, null
union all
select 'xyz', 30, 0.91
)
,calc
as
(
select *
,ROW_NUMBER() OVER(partition by store order by shipntrvl) as row_no
from testdata
)
,extra
as
(
select *
,(select top 1 row_no
from calc c2
where c2.row_no < c1.row_no
and c1.value is null
and c2.value is not null
and c1.store = c2.store
order by c2.row_no desc) as prev_nr
,(select top 1 row_no
from calc c2
where c2.row_no > c1.row_no
and c1.value is null
and c2.value is not null
and c1.store = c2.store
order by c2.row_no asc) as next_nr
from calc c1
)
select c.store
,c.shipntrvl
,c.value
,isnull(c.value,
(cnext.value-cprev.value)/
(cnext.shipntrvl-cprev.shipntrvl)*
(c.shipntrvl-cprev.shipntrvl)+cprev.value
) as calculated_value
from calc c
join extra
on extra.row_no = c.row_no
and extra.store = c.store
join calc cnext
on cnext.row_no = case when c.value is null
then extra.next_nr
else c.row_no
end
and c.store = cnext.store
join calc cprev
on cprev.row_no = case when c.value is null
then extra.prev_nr
else c.row_no
end
and c.store = cprev.store
Here is what I came up with (storevalue is the beginning table in your example)
with knownvalues as (
select store, shipNtrvl,value
from storevalue where Value is not null
), valueranges as
(
select
k.store,
k.ShipNtrvl as lowrange,
MIN(s.ShipNtrvl) as highrange,
(select value from storevalue where store = k.store and ShipNtrvl = MIN(s.shipNtrvl))-
(select value from storevalue where store = k.store and ShipNtrvl = k.ShipNtrvl) as term1,
MIN(s.ShipNtrvl) - k.ShipNtrvl as term2,min(k.Value) as lowval
from knownvalues k
join storevalue s on s.Value is not null and s.store= k.store and s.ShipNtrvl > k.ShipNtrvl
group by k.store, k.shipntrvl
)
select s.store,s.ShipNtrvl,v.term1/v.term2*(s.ShipNtrvl-v.lowrange)+ v.lowval as value
from storevalue s join valueranges v on v.store = s.store and s.ShipNtrvl between v.lowrange and v.highrange
where s.Value is null
union
select * from storevalue where value is not null
Just change the select to an update to write the values into the table.