query json document SQL Server - json

I'm trying read data from a json document stored on azure data lake storage from an Azure SQL database using the below query. I tried a couple of ways and nothing seems to bring the data back. I tried using this '$.data[0].AccID' in OPENJSON for instance and brings back only the first record from multiple arrays but, not sure how to bring back all the data.
json:
[
{
"data": [
{
"AccID": 1234,
"CustID": 456,
"Total": 1234.1234,
"OrderDate": "2022-12-01"
},
{
"AccID": 5678,
"CustID": 890,
"Total": 5678.5678,
"OrderDate": "2022-12-01"
}
],
"count": 2
},
{
"data": [
{
"AccID": 1234,
"CustID": 456,
"Total": 100.0,
"OrderDate": "2021-12-01"
},
{
"AccID": 5678,
"CustID": 890,
"Total": 200.0,
"OrderDate": "2021-12-01"
},
{
"AccID": 8900,
"CustID": 235,
"Total": 300.0,
"OrderDate": "2021-12-01"
}
],
"count": 3
}
]
Query:
SELECT *
FROM OPENROWSET (
BULK 'blobpath/file.json',
DATA_SOURCE = 'adls',
SINGLE_CLOB
) AS [data]
CROSS APPLY OPENJSON (X.BulkColumn, '$.value')
WITH (
AccID int,
CustID int,
Total float,
OrderDate date)

I created storage account and uploaded Json file into container,
reference image.
input .json:
[
{
"data": [
{
"AccID": 1234,
"CustID": 456,
"Total": 1234.1234,
"OrderDate": "2022-12-01"
},
{
"AccID": 5678,
"CustID": 890,
"Total": 5678.5678,
"OrderDate": "2022-12-01"
}
],
"count": 2
},
{
"data": [
{
"AccID": 1234,
"CustID": 456,
"Total": 100.0,
"OrderDate": "2021-12-01"
},
{
"AccID": 5678,
"CustID": 890,
"Total": 200.0,
"OrderDate": "2021-12-01"
},
{
"AccID": 8900,
"CustID": 235,
"Total": 300.0,
"OrderDate": "2021-12-01"
}
],
"count": 3
}
]
I generated sas token and created masterkey and data source. I created a table in sql with following columns
Create table data1( ACCID varchar(100),
CustID varchar(100),
Total float(100),
OrderDate date,
count int )
Insert data into that table using below code:
INSERT INTO data1
SELECT ACCID,CustID,Total,OrderDate,count FROM OPENROWSET(
BULK 'jsonfile path',
DATA_SOURCE = 'your data source'
SINGLE_CLOB
) AS DataFile
cross apply openjson(BulkColumn)
WITH (
AccID varchar(100) '$.data[0].AccID',
CustID varchar(100) '$.data[0].CustID',
Total float '$.data[0].Total',
OrderDate date '$.data[0].OrderDate',
count int '$.count'
)
INSERT INTO data1
SELECT ACCID,CustID,Total,OrderDate,count FROM OPENROWSET(
BULK 'jsonfile path',
DATA_SOURCE = 'your data source'
SINGLE_CLOB
) AS DataFile
cross apply openjson(BulkColumn)
WITH (
AccID varchar(100) '$.data[1].AccID',
CustID varchar(100) '$.data[1].CustID',
Total float '$.data[1].Total',
OrderDate date '$.data[1].OrderDate',
count int '$.count'
)
INSERT INTO data1
SELECT ACCID,CustID,Total,OrderDate,count FROM OPENROWSET(
BULK 'jsonfile path',
DATA_SOURCE = 'your data source'
SINGLE_CLOB
) AS DataFile
cross apply openjson(BulkColumn)
WITH (
AccID varchar(100) '$.data[2].AccID',
CustID varchar(100) '$.data[2].CustID',
Total float '$.data[2].Total',
OrderDate date '$.data[2].OrderDate',
count int '$.count'
)
Data is inserted successfully.
I retrieved the data of table I got as below
I deleted the Null values row using below code
Image for reference:
delete from data1 where ACCID is Null
I retrieved the data of table order by count according to the json using below code
select * from data1 order by count
Output:
In this way I retrieve all the data from Json document.

Related

SQL OPENJSON is not returning the values from sub-arrays

I have a JSON file that is properly formatted according to the Microsoft ISJSON function. However, it refuses to return a value from the nested array.
Here is an excerpt from the JSON file.
I want to return the following fields: id, symbol, name, and price.
I can get the first three, but the price always shows up null in the SQL query results.
JSON FILE SNIPPET:
{
"status": {
"timestamp": "2021-01-06T07:14:42.132Z",
"error_code": 0,
"error_message": null,
"elapsed": 14,
"credit_count": 1,
"notice": null,
"total_count": 4180
},
"data": [
{
"id": 1,
"name": "Bitcoin",
"symbol": "BTC",
"slug": "bitcoin",
"num_market_pairs": 9772,
"date_added": "2013-04-28T00:00:00.000Z",
"tags": [
"mineable",
"pow",
"sha-256",
"store-of-value",
"state-channels"
],
"max_supply": 21000000,
"circulating_supply": 18592156,
"total_supply": 18592156,
"platform": null,
"cmc_rank": 1,
"last_updated": "2021-01-06T07:13:02.000Z",
"quote": {
"USD": {
"price": 36248.609255662224,
"volume_24h": 225452557837159.16,
"percent_change_1h": 2.74047145,
"percent_change_24h": 19.54362963,
"percent_change_7d": 29.31750604,
"market_cap": 673939798064.3159,
"last_updated": "2021-01-06T07:13:02.000Z"
}
}
}
Here is the SQL Query that I'm using:
DECLARE #JSON VARCHAR(MAX)
SELECT #JSON = BulkColumn
FROM OPENROWSET
(BULK 'C:\TSP\output.json', SINGLE_CLOB) AS j
Select iif(ISJSON(#JSON)=1,'YES','NO') JSON_OK
Select * FROM OPENJSON (#JSON, '$.data')
WITH (
id int
,symbol varchar(20)
,[name] varchar(50)
,price float '$.data.quote.USD[0]'
)
I've tried everything I can think of to get the price to appear, but I'm missing something as it's not cooperating. Also, I set the database compatibility level to 130 as I read that could be the problem.... Still no luck.
Any help would be much appreciated.
$.data.quote.USD is not an array, it's a set of properties. It's also already inside the $.data context so should not include data in its path. Try the following instead:
select *
from openjson(#JSON, '$.data') with
(
id int
,symbol varchar(20)
,[name] varchar(50)
,price float '$.quote.USD.price'
)

Convert flat SQL rows into nested JSON array using FOR JSON

So, I have a simple view that looks like this:
Name | Type | Product | QuantitySold
------------------------------------------------------
Walmart | Big Store | Gummy Bears | 10
Walmart | Big Store | Toothbrush | 6
Target | Small Store | Toothbrush | 2
Without using nested queries, using sql's FOR JSON clause, can this be easily converted to this json.
[
{
"Type": "Big Store",
"Stores": [
{
"Name": "Walmart",
"Products": [
{
"Name": "Gummy Bears",
"QuantitySold": 10
},
{
"Name": "Toothbrush",
"QuantitySold": 6
}
]
}
]
},
{
"Type": "Smaller Store",
"Stores": [
{
"Name": "Target",
"Products": [
{
"Name": "Toothbrush",
"QuantitySold": 2
}
]
}
]
}
]
Essentially Group by Type, Store then, line items. My attempt so far below. Not sure how to properly group the rows.
SELECT Type, (
SELECT Store,
(SELECT Product,QuantitySold from MyTable m3 where m3.id=m2.id for json path) as Products
FROM MyTable m2 where m1.ID = m2.ID for json path) as Stores
) as Types FROM MyTable m1
You can try something like this:
DECLARE #Data TABLE (
Name VARCHAR(20), Type VARCHAR(20), Product VARCHAR(20), QuantitySold INT
);
INSERT INTO #Data ( Name, Type, Product, QuantitySold ) VALUES
( 'Walmart', 'Big Store', 'Gummy Bears', 10 ),
( 'Walmart', 'Big Store', 'Toothbrush', 6 ),
( 'Target', 'Small Store', 'Toothbrush', 2 );
SELECT DISTINCT
t.[Type],
Stores
FROM #Data AS t
OUTER APPLY (
SELECT (
SELECT DISTINCT [Name], Products FROM #Data x
OUTER APPLY (
SELECT (
SELECT Product AS [Name], QuantitySold FROM #Data n WHERE n.[Name] = x.[Name]
FOR JSON PATH
) AS Products
) AS p
WHERE x.[Type] = t.[Type]
FOR JSON PATH
) AS Stores
) AS Stores
ORDER BY [Type]
FOR JSON PATH;
Returns
[{
"Type": "Big Store",
"Stores": [{
"Name": "Walmart",
"Products": [{
"Name": "Gummy Bears",
"QuantitySold": 10
}, {
"Name": "Toothbrush",
"QuantitySold": 6
}]
}]
}, {
"Type": "Small Store",
"Stores": [{
"Name": "Target",
"Products": [{
"Name": "Toothbrush",
"QuantitySold": 2
}]
}]
}]
If you had normalized data structure you could use a another approach.
--Let's assume that Types are stored like this
DECLARE #Types TABLE (
id int,
Type nvarchar(20)
);
INSERT INTO #Types VALUES (1, N'Big Store'), (2, N'Small Store');
--Stores in separate table
DECLARE #Stores TABLE (
id int,
Name nvarchar(10),
TypeId int
);
INSERT INTO #Stores VALUES (1, N'Walmart', 1), (2, N'Target', 2),
(3, N'Tesco', 2); -- I added one more just for fun
--Products table
DECLARE #Products TABLE (
id int,
Name nvarchar(20)
);
INSERT INTO #Products VALUES (1, N'Gummy Bears'), (2, N'Toothbrush'),
(3, N'Milk'), (4, N'Ball') -- Added some here
-- And here comes the sales
DECLARE #Sales TABLE (
StoreId int,
ProductId int,
QuantitySold int
);
INSERT INTO #Sales VALUES (1, 1, 10), (1, 2, 6), (2, 2, 2),
(3, 4, 15), (3, 3, 7); -- I added few more
Now we can join the tables a get result that you need
SELECT Type = Type.Type,
Name = [Stores].Name,
Name = Products.Product,
QuantitySold = Products.QuantitySold
FROM (
SELECT s.StoreId,
p.Name Product,
s.QuantitySold
FROM #Sales s
INNER JOIN #Products p
ON p.id = s.ProductId
) Products
INNER JOIN #Stores Stores
ON Stores.Id = Products.StoreId
INNER JOIN #Types [Type]
ON Stores.TypeId = [Type].id
ORDER BY Type.Type, [Stores].Name
FOR JSON AUTO;
Output:
[
{
"Type": "Big Store",
"Stores": [
{
"Name": "Walmart",
"Products": [
{
"Name": "Gummy Bears",
"QuantitySold": 10
},
{
"Name": "Toothbrush",
"QuantitySold": 6
}
]
}
]
},
{
"Type": "Small Store",
"Stores": [
{
"Name": "Target",
"Products": [
{
"Name": "Toothbrush",
"QuantitySold": 2
}
]
},
{
"Name": "Tesco",
"Products": [
{
"Name": "Ball",
"QuantitySold": 15
},
{
"Name": "Milk",
"QuantitySold": 7
}
]
}
]
}
]

Convert table result to a form of JSON

I am trying to form json from a table result set :
create table testmalc(
appid int identity(1,1),
propertyid1 int ,
propertyid1val varchar(10) ,
propertyid2 int,
propertyid2val varchar(10) ,
)
insert into testmalc values(456,'t1',789,'t2')
insert into testmalc values(900,'t3',902,'t4')
need below desired JSON result :
{
"data": {
"record": [{
"id": appid,
"customFields": [{
"customfieldid": propertyid1 ,
"customfieldvalue": propertyid1val
},
{
"customfieldid": propertyid2 ,
"customfieldvalue": propertyid2val
}
]
},
{
"id": appid,
"customFields": [{
"customfieldid": propertyid1 ,
"customfieldvalue": propertyid1val
},
{
"customfieldid": propertyid2 ,
"customfieldvalue": propertyid2val
}
]
}
]
}
}
I am trying to use stuff but was not getting the desired result. Now trying with UnPivot.
If you cannot upgrade to SQL-Server 2016 for JSON support you should try to solve this in any application / programming language you know of.
Just for fun, I provide an approach, which works, but is more a hack than a solution:
Your test data:
DECLARE #testmalc table (
appid int identity(1,1),
propertyid1 int ,
propertyid1val varchar(10) ,
propertyid2 int,
propertyid2val varchar(10)
);
insert into #testmalc values(456,'t1',789,'t2')
,(900,'t3',902,'t4');
--create a XML, which is the most similar structure and read it as a NVARCHAR string
DECLARE #intermediateXML NVARCHAR(MAX)=
(
SELECT t.appid AS id
,(
SELECT t2.propertyid1 AS [prop1/#customfieldid]
,t2.propertyid1val AS [prop1/#customfieldvalue]
,t2.propertyid2 AS [prop2/#customfieldid]
,t2.propertyid2val AS [prop2/#customfieldvalue]
FROM #testmalc t2
WHERE t2.appid=t.appid
FOR XML PATH('customFields'),TYPE
) AS [*]
FROM #testmalc t
GROUP BY t.appid
FOR XML PATH('row')
);
--Now a bunch of replacements
SET #intermediateXML=REPLACE(REPLACE(REPLACE(REPLACE(#intermediateXML,'=',':'),'/>','}'),'<prop1 ','{'),'<prop2 ','{');
SET #intermediateXML=REPLACE(REPLACE(REPLACE(REPLACE(#intermediateXML,'<customFields>','"customFields":['),'</customFields>',']'),'customfieldid','"customfieldid"'),'customfieldvalue',',"customfieldvalue"');
SET #intermediateXML=REPLACE(REPLACE(#intermediateXML,'<id>','"id":'),'</id>',',');
SET #intermediateXML=REPLACE(REPLACE(REPLACE(#intermediateXML,'<row>','{'),'</row>','}'),'}{','},{');
DECLARE #json NVARCHAR(MAX)=N'{"data":{"record":[' + #intermediateXML + ']}}';
PRINT #json;
The result (formatted)
{
"data": {
"record": [
{
"id": 1,
"customFields": [
{
"customfieldid": "456",
"customfieldvalue": "t1"
},
{
"customfieldid": "789",
"customfieldvalue": "t2"
}
]
},
{
"id": 2,
"customFields": [
{
"customfieldid": "900",
"customfieldvalue": "t3"
},
{
"customfieldid": "902",
"customfieldvalue": "t4"
}
]
}
]
}
}

reading nested json data from Flex table in vertica

I have this json file.
[
{
"Modified": "2016-09-0",
"Id": 16,
"Name": "ABC",
"Filters": [],
"ScoreComponents":[
{
"Id": 86,
"Name": "Politeness",
"Bins": [],
"Ranges": [
{
"ComponentId": 86,
"LastUser": "CDE\\John.Doe"
},
{
"ComponentId": 86,
"LastUser": "CDE\\John.Doe"
}
],
"Filters": []
},
{
"Id": 87,
"Name": "Empathy",
"Bins": [],
"Ranges": [
{
"ComponentId": 87,
"LastUser": "CDE\\John.Doe"
}
],
"Filters": [
{
"ComponentID": -30356,
"BucketID": 81
}
]
},
{
"Id": 88,
"Name": "Ownership",
"Bins": [],
"Ranges": [
{
"ComponentId": 88,
"User": "CDE\\John.Doe"
}
],
"Filters": []
}]
}
]
I have loaded this file In Vertica flex table
CREATE FLEX TABLE flex_test();
copy events_stg.flex_test from LOCAL 'C:/test2.json' PARSER fjsonparser (flatten_maps= true, flatten_arrays = false)
I want to read all data from ScoreComponents including nested arrays.
I tried query this query
select "Id" as scoreid,mapitems("ScoreComponents") OVER(PARTITION BY
"Id") from flex_test
getting output like:
I just dont understand those small squares in output. I am a student and this vertica DB and Flex tables are new me.
I have tried with flatten_arrays = true but its giving me empty result set.
You're getting squares because the values field contains a binary VMap.
This should do it:
create flex table so_flex();
create table so_score_components(
id int,
name varchar(100)
);
create table so_ranges(
parent_id int,
component_id int,
last_user varchar(100)
);
create table so_filters(
parent_id int,
component_id int,
bucket_id int
);
copy so_flex from local 'E:\Demos\so.json'
parser fjsonparser(start_point='ScoreComponents',
flatten_maps = false, flatten_arrays = false);
insert into so_score_components(id, name)
select id::int, name::varchar from so_flex;
insert into so_ranges(parent_id, component_id, last_user)
select id::int, values['ComponentId']::int, values['LastUser']::varchar
from (
select id, mapitems(ranges) over (partition by id)
from so_flex
) t;
insert into so_filters(parent_id, component_id, bucket_id)
select id::int, values['ComponentID']::int, values['BucketID']::int
from (
select id, mapitems(filters) over (partition by id)
from so_flex
) t;

Get aggregate sum of json array in Postgres NOSQL json data

How to get aggregate SUM(amount) from "refunds" array in postgres json select
Following is my data schema and structure:
Table Name: transactions
Column name: data
{
"id": "tran_6ac25129951962e99f28fa488993",
"amount": 1200,
"origin_amount": 3900,
"status": "partial_refunded",
"description": "Subscription#sub_a67d59efb2bcbf73485a ",
"livemode": false,
"refunds": [
{
"id": "refund_ee4192ffb6d2caa490a1",
"amount": 1200,
"status": "refunded",
"created_at": 1426412340,
"updated_at": 1426412340,
},
{
"id": "refund_0e4a34e4ee7281d369df",
"amount": 1500,
"status": "refunded",
"created_at": 1426412353,
"updated_at": 1426412353,
}
]
}
Out put should be: 1200+1500 = 2700
Output
|---------
|total
|---------
|2700
Please provide global solution and not with static data
This should work on 9.3+
WITH x AS( SELECT
'{
"id": "tran_6ac25129951962e99f28fa488993",
"amount": 1200,
"origin_amount": 3900,
"status": "partial_refunded",
"description": "Subscription#sub_a67d59efb2bcbf73485a ",
"livemode": false,
"refunds": [
{
"id": "refund_ee4192ffb6d2caa490a1",
"amount": 1200,
"status": "refunded",
"created_at": 1426412340,
"updated_at": 1426412340
},
{
"id": "refund_0e4a34e4ee7281d369df",
"amount": 1500,
"status": "refunded",
"created_at": 1426412353,
"updated_at": 1426412353
}
]
}'::json as y),
refunds AS(
SELECT json_array_elements(y->'refunds') as j FROM x)
SELECT sum((j->>'amount')::int) FROM refunds;
WITH AllRefunds AS ( SELECT jsonb_array_elements(data->'refunds') AS refund FROM transactions)
SELECT SUM( CAST ( refund ->> 'amount' AS INTEGER )) FROM AllRefunds;
If you need to know how the query is built:
1.
WITH AllRefunds AS ( SELECT jsonb_array_elements(data->'refunds') FROM transactions)
SELECT * FROM AllRefunds;
This selects all elements as JSON objects (done via ->) from the array refunds that were found in transactions table and stores it in a new table AllRefunds. This new table only consists of one unnamed column.
2.
WITH AllRefunds AS ( SELECT jsonb_array_elements(data->'refunds') AS refund FROM transactions)
SELECT * FROM AllRefunds;
Here the added (second) AS renames the currently unnamed column inside AllRefunds to refund
3.
WITH AllRefunds AS ( SELECT jsonb_array_elements(data->'refunds') AS refund FROM transactions)
SELECT SUM( CAST ( refund ->> 'amount' AS INTEGER )) FROM AllRefunds;
Our array entries are JSON objects. So we return the field amount as a simple string with ->> that we then cast to Integers and SUM all entries up.