Multi Nested JSON in hive - json

I have the following input with nested JSON, I want to ingest this data into hive in multiple rows
"taxes": [{
"line_id": 1,
"commodity_code": "997159",
"fee": {
"amt": {
"curr_code": "USD",
"value": "71.4"
},
"type": "receiver"
},
"ship_addr": {
"admin_area_1": "MAHARASHTRA",
"country_code": "IN"
},
"total_tax": {
"curr_code": "USD",
"value": "12.8520000000"
},
"tax-details": [{
"exempt_option": false,
"auth_name": "India Maharashtra Central GST",
"doc_amt": {
"currency_code": "USD",
"value": "6.43"
},
"unrnd_doc_amt": {
"currency_code": "USD",
"value": "6.4260000000"
},
"rate": "0.09",
"rate_code": "SR",
"non_basis_doc_amt": "0.00",
"exempt_doc_amt": "0.00",
"jdx_memo": "INSS2: Tax payable in Seller location.",
"seller_reg_no": "27AAGCP4442G1ZF",
"admin_zone_level": "Country",
"auth_type": "CGST",
"erp_code": "MHCGST",
"inv_desc": "Standard Rate - CGST",
"basis_doc_amt": "71.40"
}, {
"exempt_option": false,
"auth_name": "India Maharashtra State GST",
"doc_amt": {
"currency_code": "USD",
"value": "6.43"
},
"unrnd_doc_amt": {
"currency_code": "USD",
"value": "6.4260000000"
},
"rate": "0.09",
"rate_code": "SR",
"non_basis_doc_amt": "0.00",
"exempt_doc_amt": "0.00",
"jdx_memo": "INSS2: Tax payable in Seller location.",
"seller_reg_no": "27AAGCP4442G1ZF",
"admin_zone_level": "Province",
"auth_type": "SGST",
"erp_code": "MHSGST",
"inv_desc": "Standard Rate - SGST",
"basis_doc_amt": "71.40"
}],
"transaction_type": "DS"
}]
I am using the following DDL
select
get_json_object(t.json,concat('$.taxes[',e.i,'].line_id')) as line_id
, get_json_object(t.json,concat('$.taxes[',e.i,'].commodity_code')) as commodity_code
, get_json_object(t.json,concat('$.taxes[',e.i,'].fee.amt.curr_code')) as curr_code
, get_json_object(t.json,concat('$.taxes[',e.i,'].fee.amt.value')) as value
, get_json_object(t.json,concat('$.taxes[',e.i,'].fee.type')) as type
, get_json_object(t.json,concat('$.taxes[',e.i,'].ship_addr.admin_area_1')) as admin_area
, get_json_object(t.json,concat('$.taxes[',e.i,'].ship_addr.country_code')) as country_code
, get_json_object(t.json,concat('$.taxes[',e.i,'].total_tax.curr_code')) as total_tax_curr_code
, get_json_object(t.json,concat('$.taxes[',e.i,'].total_tax.value')) as total_tax_value
get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].exempt_option')) as exempt_option
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].auth_name')) as auth_name
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].doc_amt.currency_code')) as doc_amt_currency_code
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].doc_amt.value')) as doc_amt_value
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].unrnd_doc_amt.currency_code')) as unrnd_doc_amt_currency_code
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].unrnd_doc_amt.value')) as unrnd_doc_amt_value
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].rate')) as rate
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].rate_code')) as rate_code
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].non_basis_doc_amt')) as non_basis_doc_amt
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].exempt_doc_amt')) as exempt_doc_amount
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].jdx_memo')) as jdx_memo
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].seller_reg_no')) as seller_reg_no
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].admin_zone_level')) as admin_zone_level
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].auth_type')) as auth_type
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].erp_code')) as erp_code
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].inv_desc')) as inv_desc
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].basis_doc_amt')) as basis_doc_amt
FROM json_2002 t
LATERAL VIEW POSEXPLODE (split(get_json_object(json,'$.taxes[*].line_id'),'","')) e as i,x
LATERAL VIEW POSEXPLODE (split(get_json_object(json,'$taxes.tax_details[*].exempt_option'),'","')) f as g,h

I was able to fix the issue by using regex_replace
LATERAL VIEW POSEXPLODE (split(regexp_replace(get_json_object(json,'$.taxes.tax_details[*].exempt_option'), '\[|\]', ''), ',' ) ) f as g,h
solved my problem

Related

How to get array/object number value in postgreSQL?

Is it possible to get array/object number values.
I have a table called tableA:
create table "tableA" (
"_id" serial,
"userId" integer,
"dependentData" jsonb);
INSERT INTO "tableA"
("_id", "userId", "dependentData")
VALUES('55555', '1191', '[{"_id": 133, "type": "radio", "title": "questionTest7", "question": "questionTest7", "response": {"value": ["option_11"]}, "dependentQuestionResponse": [{"_id": 278, "type": "text", "title": "questionTest8", "question": "questionTest8", "response": {"value": ["street no 140"]}, "dependentQuestionResponse": []}]}, {"_id": 154, "type": "dropdown", "title": "questionTest8", "question": "questionTest8", "response": {"value": ["option_14"]}, "dependentQuestionResponse": []}]');
Array number is to be fetched. Output should be require below.
_id
userId
array/object
55555
1191
[0,0,1]
You can try something like this
select id, user_id,
(
with
base as (
select o1, oo1.a oo1 from (
select jsonb_array_elements(t.a) o1
from (select depend_data as a) t
) o
left join lateral (select a from jsonb_array_elements(o.o1-> 'dependentQuestionResponse') a) oo1 on true
)
select json_agg(nn) from (
select dense_rank() over(order by b.o1) - 1 nn, b.o1 from base b
union all
select dense_rank() over(order by b.o1) - 1 nn, b.oo1 from base b where oo1 is not null
order by nn
) tz
) as array_object
from
(select 55555 as id,
1191 as user_id,
'[{"_id": 133, "type": "radio", "title": "questionTest7", "question": "questionTest7", "response": {"value": ["option_11"]}, "dependentQuestionResponse": [
{"_id": 278, "type": "text", "title": "questionTest8", "question": "questionTest8", "response": {"value": ["street no 140"]},"dependentQuestionResponse": []}]},
{"_id": 154, "type": "dropdown", "title": "questionTest8", "question": "questionTest8", "response": {"value": ["option_14"]}, "dependentQuestionResponse": []}]'::jsonb as depend_data) t

Query a JSONB object array

I did a DB Fiddle of what the table is kinda looking like https://www.db-fiddle.com/f/4jyoMCicNSZpjMt4jFYoz5/3382
Data in the table looks like this
[
{
"id": 1,
"form_id": 1,
"questionnaire_response": [
{
"id": "1",
"title": "Are you alive?",
"value": "Yes",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "2",
"title": "Did you sleep good?",
"value": "No",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "3",
"title": "Whats favorite color(s)?",
"value": [
"Red",
"Blue"
],
"form_id": 0,
"shortTitle": "",
"description": ""
}
]
},
{
"id": 2,
"form_id": 1,
"questionnaire_response": [
{
"id": "1",
"title": "Are you alive?",
"value": "Yes",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "2",
"title": "Did you sleep good?",
"value": "Yes",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "3",
"title": "Whats favorite color(s)?",
"value": "Black",
"form_id": 0,
"shortTitle": "",
"description": ""
}
]
},
{
"id": 3,
"form_id": 1,
"questionnaire_response": [
{
"id": "1",
"title": "Are you alive?",
"value": "Yes",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "2",
"title": "Did you sleep good?",
"value": "No",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "3",
"title": "Whats favorite color(s)?",
"value": [
"Black",
"Red"
],
"form_id": 0,
"shortTitle": "",
"description": ""
}
]
}
]
I have a query select * from form_responses,jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text) where (items.id = '3' AND items.value like '%Black%');
But unable to do more than one object like select * from form_responses,jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text) where (items.id = '3' AND items.value like '%Black%') AND (items.id = '2' AND items.value like '%Yes%');
The value field in the object could be an array or a single value also.. unpredictable.. I feel like I'm close but also not sure if im using the correct query in the first place.
Any help would be appreciated!
EDIT
select * from form_responses where(
questionnaire_response #> '[{"id": "2", "value":"No"},{"id": "3", "value":["Red"]}]')
Seems to work but not sure if this is the best way to do it
Your current query returns one result row per item. None of these rows has both id = 3 and id = 2. If your goal is to select the entire form response, you need to use a subquery (or rather, two of them):
SELECT *
FROM form_responses
WHERE EXISTS(
SELECT *
FROM jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text)
WHERE items.id = '3'
AND items.value like '%Black%'
)
AND EXISTS(
SELECT *
FROM jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text)
WHERE items.id = '2'
AND items.value like '%Yes%'
);
or alternatively
SELECT *
FROM form_responses
WHERE (
SELECT value
FROM jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text)
WHERE items.id = '3'
) like '%Black%'
AND (
SELECT value
FROM jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text)
WHERE items.id = '2'
) like '%Yes%';
A nicer alternative would be using json path queries:
SELECT *
FROM form_responses
WHERE questionnaire_response ## '$[*]?(#.id == "1").value == "Yes"'
AND questionnaire_response ## '$[*]?(#.id == "3").value[*] == "Black"'
-- in one:
SELECT *
FROM form_responses
WHERE questionnaire_response ## '$[*]?(#.id == "1").value == "Yes" && $[*]?(#.id == "3").value[*] == "Black"'
The [*] even has the correct semantics for that sometimes-string-sometimes-array value. And if you know the indices of the items with those ids, you can even simplify to
SELECT *
FROM form_responses
WHERE questionnaire_response ## '$[0].value == "Yes" && $[2].value[*] == "Black"'
(dbfiddle demo)

couchbase N1QL group-by in sub-document

given the below data model:
{
"events": [
{
"customerId": "a",
"type": "credit" ,
"value": 10
},
{
"customerId": "a",
"type": "credit" ,
"value": 10
},
{
"customerId": "b",
"type": "credit" ,
"value": 5
},
{
"customerId": "b",
"type": "credit" ,
"value": 5
}
]
}
how can i query the sum of credits by customerId ? i.e:
{
{
"customerId": "a",
"total": "20
},
{
"customerId": "b",
"total": "10
}
}
Use SUBQUERY expression per document aggregation
SELECT d.*,
(SELECT e.customerId, SUM(e.`value`) AS total
FROM d.events AS e
WHERE ......
GROUP BY e.customerId) AS events
FROM default AS d
WHERE ...........;
For Whole Query
SELECT e.customerId, SUM(e.`value`) AS total
FROM default AS d
UNNEST d.events AS e
WHERE ......
GROUP BY e.customerId;

Select Json formatted like a report to a table using T-SQL

I have JSON stored in a SQL Server database table in the below format. I have been able to fudge a way to get the values I need but feel like there must be a better way to do it using T-SQL. The JSON is output from a report in the below format where the column names in "columns" correspond to the "rows"-"data" array values.
So column "Fiscal Month" corresponds to data value "11", "Fiscal Year" to "2019", etc.
{
"report": "Property ETL",
"id": 2648,
"columns": [
{
"name": "Fiscal Month",
"dataType": "int"
},
{
"name": "Fiscal Year",
"dataType": "int"
},
{
"name": "Portfolio",
"dataType": "varchar(50)"
},
{
"name": "Rent",
"dataType": "int"
}
],
"rows": [
{
"rowName": "1",
"type": "Detail",
"data": [
11,
2019,
"West Group",
10
]
},
{
"rowName": "2",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "3",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "Totals: ",
"type": "Total",
"data": [
null,
null,
null,
30
]
}
]
}
In order to get at the data in the 'data' array I currently have a 2 step process in T-SQL where I create a temp table, and insert the row key/values from '$.Rows' there. Then I can then select the individual columns for each row
CREATE TABLE #TempData
(
Id INT,
JsonData VARCHAR(MAX)
)
DECLARE #json VARCHAR(MAX);
DECLARE #LineageKey INT;
SET #json = (SELECT JsonString FROM Stage.Report);
SET #LineageKey = (SELECT LineageKey FROM Stage.Report);
INSERT INTO #TempData(Id, JsonData)
(SELECT [key], value FROM OPENJSON(#json, '$.rows'))
MERGE [dbo].[DestinationTable] TARGET
USING
(
SELECT
JSON_VALUE(JsonData, '$.data[0]') AS FiscalMonth,
JSON_VALUE(JsonData, '$.data[1]') AS FiscalYear,
JSON_VALUE(JsonData, '$.data[2]') AS Portfolio,
JSON_VALUE(JsonData, '$.data[3]') AS Rent
FROM #TempData
WHERE JSON_VALUE(JsonData, '$.data[0]') is not null
) AS SOURCE
...
etc., etc.
This works, but I want to know if there is a way to directly select the data values without the intermediate step of putting it into the temp table. The documentation and examples I've read seem to all require that the data have a name associated with it in order to access it. When I try and access the data directly at a position by index I just get Null.
I hope I understand your question correctly. If you know the columns names you need one OPENJSON() call with explicit schema, but if you want to read the JSON structure from $.columns, you need a dynamic statement.
JSON:
DECLARE #json nvarchar(max) = N'{
"report": "Property ETL",
"id": 2648,
"columns": [
{
"name": "Fiscal Month",
"dataType": "int"
},
{
"name": "Fiscal Year",
"dataType": "int"
},
{
"name": "Portfolio",
"dataType": "varchar(50)"
},
{
"name": "Rent",
"dataType": "int"
}
],
"rows": [
{
"rowName": "1",
"type": "Detail",
"data": [
11,
2019,
"West Group",
10
]
},
{
"rowName": "2",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "3",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "Totals: ",
"type": "Total",
"data": [
null,
null,
null,
30
]
}
]
}'
Statement for fixed structure:
SELECT *
FROM OPENJSON(#json, '$.rows') WITH (
[Fiscal Month] int '$.data[0]',
[Fiscal Year] int '$.data[1]',
[Portfolio] varchar(50) '$.data[2]',
[Rent] int '$.data[3]'
)
Dynamic statement:
DECLARE #stm nvarchar(max) = N''
SELECT #stm = CONCAT(
#stm,
N',',
QUOTENAME(j2.name),
N' ',
j2.dataType,
N' ''$.data[',
j1.[key],
N']'''
)
FROM OPENJSON(#json, '$.columns') j1
CROSS APPLY OPENJSON(j1.value) WITH (
name varchar(50) '$.name',
dataType varchar(50) '$.dataType'
) j2
SELECT #stm = CONCAT(
N'SELECT * FROM OPENJSON(#json, ''$.rows'') WITH (',
STUFF(#stm, 1, 1, N''),
N')'
)
PRINT #stm
EXEC sp_executesql #stm, N'#json nvarchar(max)', #json
Result:
--------------------------------------------
Fiscal Month Fiscal Year Portfolio Rent
--------------------------------------------
11 2019 West Group 10
11 2019 East Group 10
11 2019 East Group 10
30
Yes, it is possible without temporary table:
DECLARE #json NVARCHAR(MAX) =
N'
{
"report": "Property ETL",
"id": 2648,
"columns": [
{
"name": "Fiscal Month",
"dataType": "int"
},
{
"name": "Fiscal Year",
"dataType": "int"
},
{
"name": "Portfolio",
"dataType": "varchar(50)"
},
{
"name": "Rent",
"dataType": "int"
}
],
"rows": [
{
"rowName": "1",
"type": "Detail",
"data": [
11,
2019,
"West Group",
10
]
},
{
"rowName": "2",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "3",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "Totals: ",
"type": "Total",
"data": [
null,
null,
null,
30
]
}
]
}
}';
And query:
SELECT s.value,
rowName = JSON_VALUE(s.value, '$.rowName'),
[type] = JSON_VALUE(s.value, '$.type'),
s2.[key],
s2.value
FROM OPENJSON(JSON_QUERY(#json, '$.rows')) s
CROSS APPLY OPENJSON(JSON_QUERY(s.value, '$.data')) s2;
db<>fiddle demo
Or as a single row per detail:
SELECT s.value,
rowName = JSON_VALUE(s.value, '$.rowName'),
[type] = JSON_VALUE(s.value, '$.type'),
JSON_VALUE(s.value, '$.data[0]') AS FiscalMonth,
JSON_VALUE(s.value, '$.data[1]') AS FiscalYear,
JSON_VALUE(s.value, '$.data[2]') AS Portfolio,
JSON_VALUE(s.value, '$.data[3]') AS Rent
FROM OPENJSON(JSON_QUERY(#json, '$.rows')) s;
db<>fiddle demo 2

Postgres get search and get multiple array json to each row

I wants to get all subscriptions with interval "1 WEEK" from the following 'data' column
[
{
"id": "tran_6ac25129951962e99f28fa488993",
"amount": 1200,
"client": {
"id": "client_622bdf4cce2351f28243",
"subscription": [
{
"id": "sub_a67d59efb2bcbf73485a",
"amount": 3900,
"currency": "USD",
"interval": "1 WEEK"
},
{
"id": "sub_a67d59efb2bcbf73485a",
"amount": 3900,
"currency": "USD",
"interval": "1 WEEK"
}
]
},
"currency": "USD"
},
{
"id": "tran_xxxxxxx",
"amount": 1200,
"client": {
"id": "client_xxxxxx8243",
"subscription": [
{
"id": "sub_xxefb2bcbf73485a",
"amount": 3900,
"currency": "USD",
"interval": "1 Year"
},
{
"id": "sub_yyyyyb2bcbf73485a",
"amount": 3900,
"currency": "USD",
"interval": "1 WEEK"
}
]
},
"currency": "USD"
}
]
My table structure:
CREATE TABLE transactions
(
data json,
id bigserial NOT NULL,
created_date time without time zone,
CONSTRAINT transactions_pkey PRIMARY KEY (id)
)
In output I wants to get all "1 WEEk" subscription as rows. Above data should give 3 rows
I am using Postgres 9.3+
Its a nested query and I have tried writing it in as readable form as I can. I hope you can understand it -
select subscriptions from
(
select
cast
(
json_array_elements
(
json_array_elements(data)->'client'->'subscription'
)
as text
)
as subscriptions,
json_array_elements
(
json_array_elements(data)->'client'->'subscription'
)
->>'interval'
as intervals
from
transactions
)
as
xyz
where
intervals = '1 WEEK';
For information regarding these functions, you can refer to -
http://www.postgresql.org/docs/9.3/static/functions-json.html
Edit:-
As per performance requirements, I guess this will work better than the previous one -
select * from (
select cast (
json_array_elements (
json_array_elements(data)->'client'->'subscription'
) as text
) as subscription from transactions
) as temp
where subscription LIKE '%"interval":"1 WEEK"%';