couchbase N1QL group-by in sub-document - couchbase

given the below data model:
{
"events": [
{
"customerId": "a",
"type": "credit" ,
"value": 10
},
{
"customerId": "a",
"type": "credit" ,
"value": 10
},
{
"customerId": "b",
"type": "credit" ,
"value": 5
},
{
"customerId": "b",
"type": "credit" ,
"value": 5
}
]
}
how can i query the sum of credits by customerId ? i.e:
{
{
"customerId": "a",
"total": "20
},
{
"customerId": "b",
"total": "10
}
}

Use SUBQUERY expression per document aggregation
SELECT d.*,
(SELECT e.customerId, SUM(e.`value`) AS total
FROM d.events AS e
WHERE ......
GROUP BY e.customerId) AS events
FROM default AS d
WHERE ...........;
For Whole Query
SELECT e.customerId, SUM(e.`value`) AS total
FROM default AS d
UNNEST d.events AS e
WHERE ......
GROUP BY e.customerId;

Related

Select from mysql array of objects with integer number and value null or source to file without duplicates

Let's say I have a table with rows like number, image. But image is not mandatory, It can be null and when I'm selecting I want to prioritize the row with image over the one with null so i could get clean array with only one row per number.
SELECT DISTINCT number, image FROM table ORDER BY number ASC
What's now with SELECT DISTINCT:
[
{
"number": 1,
"image": null
},
{
"number": 1,
"image": "https://example.com/image1.png"
},
{
"number": 2,
"image": null
},
{
"number": 2,
"image": "https://example.com/image2.png"
},
{
"number": 3,
"image": "https://example.com/image3.png"
},
{
"number": 3,
"image": null
},
{
"number": 4,
"image": null
}
]
What I want to get:
[
{
"number": 1,
"image": "https://example.com/image1.png"
},
{
"number": 2,
"image": "https://example.com/image2.png"
},
{
"number": 3,
"image": "https://example.com/image3.png"
},
{
"number": 4,
"image": null
}
]
Try this:
SELECT number, image FROM table ORDER BY ISNULL(LEFT(image,0));

Query a JSONB object array

I did a DB Fiddle of what the table is kinda looking like https://www.db-fiddle.com/f/4jyoMCicNSZpjMt4jFYoz5/3382
Data in the table looks like this
[
{
"id": 1,
"form_id": 1,
"questionnaire_response": [
{
"id": "1",
"title": "Are you alive?",
"value": "Yes",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "2",
"title": "Did you sleep good?",
"value": "No",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "3",
"title": "Whats favorite color(s)?",
"value": [
"Red",
"Blue"
],
"form_id": 0,
"shortTitle": "",
"description": ""
}
]
},
{
"id": 2,
"form_id": 1,
"questionnaire_response": [
{
"id": "1",
"title": "Are you alive?",
"value": "Yes",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "2",
"title": "Did you sleep good?",
"value": "Yes",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "3",
"title": "Whats favorite color(s)?",
"value": "Black",
"form_id": 0,
"shortTitle": "",
"description": ""
}
]
},
{
"id": 3,
"form_id": 1,
"questionnaire_response": [
{
"id": "1",
"title": "Are you alive?",
"value": "Yes",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "2",
"title": "Did you sleep good?",
"value": "No",
"form_id": 0,
"shortTitle": "",
"description": ""
},
{
"id": "3",
"title": "Whats favorite color(s)?",
"value": [
"Black",
"Red"
],
"form_id": 0,
"shortTitle": "",
"description": ""
}
]
}
]
I have a query select * from form_responses,jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text) where (items.id = '3' AND items.value like '%Black%');
But unable to do more than one object like select * from form_responses,jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text) where (items.id = '3' AND items.value like '%Black%') AND (items.id = '2' AND items.value like '%Yes%');
The value field in the object could be an array or a single value also.. unpredictable.. I feel like I'm close but also not sure if im using the correct query in the first place.
Any help would be appreciated!
EDIT
select * from form_responses where(
questionnaire_response #> '[{"id": "2", "value":"No"},{"id": "3", "value":["Red"]}]')
Seems to work but not sure if this is the best way to do it
Your current query returns one result row per item. None of these rows has both id = 3 and id = 2. If your goal is to select the entire form response, you need to use a subquery (or rather, two of them):
SELECT *
FROM form_responses
WHERE EXISTS(
SELECT *
FROM jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text)
WHERE items.id = '3'
AND items.value like '%Black%'
)
AND EXISTS(
SELECT *
FROM jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text)
WHERE items.id = '2'
AND items.value like '%Yes%'
);
or alternatively
SELECT *
FROM form_responses
WHERE (
SELECT value
FROM jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text)
WHERE items.id = '3'
) like '%Black%'
AND (
SELECT value
FROM jsonb_to_recordset(form_responses.questionnaire_response) as items(value text, id text)
WHERE items.id = '2'
) like '%Yes%';
A nicer alternative would be using json path queries:
SELECT *
FROM form_responses
WHERE questionnaire_response ## '$[*]?(#.id == "1").value == "Yes"'
AND questionnaire_response ## '$[*]?(#.id == "3").value[*] == "Black"'
-- in one:
SELECT *
FROM form_responses
WHERE questionnaire_response ## '$[*]?(#.id == "1").value == "Yes" && $[*]?(#.id == "3").value[*] == "Black"'
The [*] even has the correct semantics for that sometimes-string-sometimes-array value. And if you know the indices of the items with those ids, you can even simplify to
SELECT *
FROM form_responses
WHERE questionnaire_response ## '$[0].value == "Yes" && $[2].value[*] == "Black"'
(dbfiddle demo)

Select Json formatted like a report to a table using T-SQL

I have JSON stored in a SQL Server database table in the below format. I have been able to fudge a way to get the values I need but feel like there must be a better way to do it using T-SQL. The JSON is output from a report in the below format where the column names in "columns" correspond to the "rows"-"data" array values.
So column "Fiscal Month" corresponds to data value "11", "Fiscal Year" to "2019", etc.
{
"report": "Property ETL",
"id": 2648,
"columns": [
{
"name": "Fiscal Month",
"dataType": "int"
},
{
"name": "Fiscal Year",
"dataType": "int"
},
{
"name": "Portfolio",
"dataType": "varchar(50)"
},
{
"name": "Rent",
"dataType": "int"
}
],
"rows": [
{
"rowName": "1",
"type": "Detail",
"data": [
11,
2019,
"West Group",
10
]
},
{
"rowName": "2",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "3",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "Totals: ",
"type": "Total",
"data": [
null,
null,
null,
30
]
}
]
}
In order to get at the data in the 'data' array I currently have a 2 step process in T-SQL where I create a temp table, and insert the row key/values from '$.Rows' there. Then I can then select the individual columns for each row
CREATE TABLE #TempData
(
Id INT,
JsonData VARCHAR(MAX)
)
DECLARE #json VARCHAR(MAX);
DECLARE #LineageKey INT;
SET #json = (SELECT JsonString FROM Stage.Report);
SET #LineageKey = (SELECT LineageKey FROM Stage.Report);
INSERT INTO #TempData(Id, JsonData)
(SELECT [key], value FROM OPENJSON(#json, '$.rows'))
MERGE [dbo].[DestinationTable] TARGET
USING
(
SELECT
JSON_VALUE(JsonData, '$.data[0]') AS FiscalMonth,
JSON_VALUE(JsonData, '$.data[1]') AS FiscalYear,
JSON_VALUE(JsonData, '$.data[2]') AS Portfolio,
JSON_VALUE(JsonData, '$.data[3]') AS Rent
FROM #TempData
WHERE JSON_VALUE(JsonData, '$.data[0]') is not null
) AS SOURCE
...
etc., etc.
This works, but I want to know if there is a way to directly select the data values without the intermediate step of putting it into the temp table. The documentation and examples I've read seem to all require that the data have a name associated with it in order to access it. When I try and access the data directly at a position by index I just get Null.
I hope I understand your question correctly. If you know the columns names you need one OPENJSON() call with explicit schema, but if you want to read the JSON structure from $.columns, you need a dynamic statement.
JSON:
DECLARE #json nvarchar(max) = N'{
"report": "Property ETL",
"id": 2648,
"columns": [
{
"name": "Fiscal Month",
"dataType": "int"
},
{
"name": "Fiscal Year",
"dataType": "int"
},
{
"name": "Portfolio",
"dataType": "varchar(50)"
},
{
"name": "Rent",
"dataType": "int"
}
],
"rows": [
{
"rowName": "1",
"type": "Detail",
"data": [
11,
2019,
"West Group",
10
]
},
{
"rowName": "2",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "3",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "Totals: ",
"type": "Total",
"data": [
null,
null,
null,
30
]
}
]
}'
Statement for fixed structure:
SELECT *
FROM OPENJSON(#json, '$.rows') WITH (
[Fiscal Month] int '$.data[0]',
[Fiscal Year] int '$.data[1]',
[Portfolio] varchar(50) '$.data[2]',
[Rent] int '$.data[3]'
)
Dynamic statement:
DECLARE #stm nvarchar(max) = N''
SELECT #stm = CONCAT(
#stm,
N',',
QUOTENAME(j2.name),
N' ',
j2.dataType,
N' ''$.data[',
j1.[key],
N']'''
)
FROM OPENJSON(#json, '$.columns') j1
CROSS APPLY OPENJSON(j1.value) WITH (
name varchar(50) '$.name',
dataType varchar(50) '$.dataType'
) j2
SELECT #stm = CONCAT(
N'SELECT * FROM OPENJSON(#json, ''$.rows'') WITH (',
STUFF(#stm, 1, 1, N''),
N')'
)
PRINT #stm
EXEC sp_executesql #stm, N'#json nvarchar(max)', #json
Result:
--------------------------------------------
Fiscal Month Fiscal Year Portfolio Rent
--------------------------------------------
11 2019 West Group 10
11 2019 East Group 10
11 2019 East Group 10
30
Yes, it is possible without temporary table:
DECLARE #json NVARCHAR(MAX) =
N'
{
"report": "Property ETL",
"id": 2648,
"columns": [
{
"name": "Fiscal Month",
"dataType": "int"
},
{
"name": "Fiscal Year",
"dataType": "int"
},
{
"name": "Portfolio",
"dataType": "varchar(50)"
},
{
"name": "Rent",
"dataType": "int"
}
],
"rows": [
{
"rowName": "1",
"type": "Detail",
"data": [
11,
2019,
"West Group",
10
]
},
{
"rowName": "2",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "3",
"type": "Detail",
"data": [
11,
2019,
"East Group",
10
]
},
{
"rowName": "Totals: ",
"type": "Total",
"data": [
null,
null,
null,
30
]
}
]
}
}';
And query:
SELECT s.value,
rowName = JSON_VALUE(s.value, '$.rowName'),
[type] = JSON_VALUE(s.value, '$.type'),
s2.[key],
s2.value
FROM OPENJSON(JSON_QUERY(#json, '$.rows')) s
CROSS APPLY OPENJSON(JSON_QUERY(s.value, '$.data')) s2;
db<>fiddle demo
Or as a single row per detail:
SELECT s.value,
rowName = JSON_VALUE(s.value, '$.rowName'),
[type] = JSON_VALUE(s.value, '$.type'),
JSON_VALUE(s.value, '$.data[0]') AS FiscalMonth,
JSON_VALUE(s.value, '$.data[1]') AS FiscalYear,
JSON_VALUE(s.value, '$.data[2]') AS Portfolio,
JSON_VALUE(s.value, '$.data[3]') AS Rent
FROM OPENJSON(JSON_QUERY(#json, '$.rows')) s;
db<>fiddle demo 2

Multi Nested JSON in hive

I have the following input with nested JSON, I want to ingest this data into hive in multiple rows
"taxes": [{
"line_id": 1,
"commodity_code": "997159",
"fee": {
"amt": {
"curr_code": "USD",
"value": "71.4"
},
"type": "receiver"
},
"ship_addr": {
"admin_area_1": "MAHARASHTRA",
"country_code": "IN"
},
"total_tax": {
"curr_code": "USD",
"value": "12.8520000000"
},
"tax-details": [{
"exempt_option": false,
"auth_name": "India Maharashtra Central GST",
"doc_amt": {
"currency_code": "USD",
"value": "6.43"
},
"unrnd_doc_amt": {
"currency_code": "USD",
"value": "6.4260000000"
},
"rate": "0.09",
"rate_code": "SR",
"non_basis_doc_amt": "0.00",
"exempt_doc_amt": "0.00",
"jdx_memo": "INSS2: Tax payable in Seller location.",
"seller_reg_no": "27AAGCP4442G1ZF",
"admin_zone_level": "Country",
"auth_type": "CGST",
"erp_code": "MHCGST",
"inv_desc": "Standard Rate - CGST",
"basis_doc_amt": "71.40"
}, {
"exempt_option": false,
"auth_name": "India Maharashtra State GST",
"doc_amt": {
"currency_code": "USD",
"value": "6.43"
},
"unrnd_doc_amt": {
"currency_code": "USD",
"value": "6.4260000000"
},
"rate": "0.09",
"rate_code": "SR",
"non_basis_doc_amt": "0.00",
"exempt_doc_amt": "0.00",
"jdx_memo": "INSS2: Tax payable in Seller location.",
"seller_reg_no": "27AAGCP4442G1ZF",
"admin_zone_level": "Province",
"auth_type": "SGST",
"erp_code": "MHSGST",
"inv_desc": "Standard Rate - SGST",
"basis_doc_amt": "71.40"
}],
"transaction_type": "DS"
}]
I am using the following DDL
select
get_json_object(t.json,concat('$.taxes[',e.i,'].line_id')) as line_id
, get_json_object(t.json,concat('$.taxes[',e.i,'].commodity_code')) as commodity_code
, get_json_object(t.json,concat('$.taxes[',e.i,'].fee.amt.curr_code')) as curr_code
, get_json_object(t.json,concat('$.taxes[',e.i,'].fee.amt.value')) as value
, get_json_object(t.json,concat('$.taxes[',e.i,'].fee.type')) as type
, get_json_object(t.json,concat('$.taxes[',e.i,'].ship_addr.admin_area_1')) as admin_area
, get_json_object(t.json,concat('$.taxes[',e.i,'].ship_addr.country_code')) as country_code
, get_json_object(t.json,concat('$.taxes[',e.i,'].total_tax.curr_code')) as total_tax_curr_code
, get_json_object(t.json,concat('$.taxes[',e.i,'].total_tax.value')) as total_tax_value
get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].exempt_option')) as exempt_option
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].auth_name')) as auth_name
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].doc_amt.currency_code')) as doc_amt_currency_code
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].doc_amt.value')) as doc_amt_value
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].unrnd_doc_amt.currency_code')) as unrnd_doc_amt_currency_code
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].unrnd_doc_amt.value')) as unrnd_doc_amt_value
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].rate')) as rate
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].rate_code')) as rate_code
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].non_basis_doc_amt')) as non_basis_doc_amt
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].exempt_doc_amt')) as exempt_doc_amount
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].jdx_memo')) as jdx_memo
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].seller_reg_no')) as seller_reg_no
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].admin_zone_level')) as admin_zone_level
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].auth_type')) as auth_type
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].erp_code')) as erp_code
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].inv_desc')) as inv_desc
, get_json_object(t.json,concat('$.taxes.tax_details[',f.g,'].basis_doc_amt')) as basis_doc_amt
FROM json_2002 t
LATERAL VIEW POSEXPLODE (split(get_json_object(json,'$.taxes[*].line_id'),'","')) e as i,x
LATERAL VIEW POSEXPLODE (split(get_json_object(json,'$taxes.tax_details[*].exempt_option'),'","')) f as g,h
I was able to fix the issue by using regex_replace
LATERAL VIEW POSEXPLODE (split(regexp_replace(get_json_object(json,'$.taxes.tax_details[*].exempt_option'), '\[|\]', ''), ',' ) ) f as g,h
solved my problem

How to index nested array

How to index (N1QL query in Couchbase) above document to speed up searching by SerialNumber field in nested array (doc => groups => items => item.SerialNumber)?
Sample:
{
"Id": "0012ed6e-41af-4e45-b53f-bac3b2eb0b82",
"Machine": "Machine2",
"Groups": [
{
"Id": "0fed9b14-fa38-e511-893a-001125665867",
"Name": "Name",
"Items": [
{
"Id": "64e69b14-fa38-e511-893a-001125665867",
"SerialNumber": "1504H365",
"Position": 73
},
{
"Id": "7be69b14-fa38-e511-893a-001125665867",
"SerialNumber": "1504H364",
"Position": 72
}
]
},
{
"Id": "0fed9b14-fa38-e511-893a-001125665867",
"Name": "Name",
"Items": [
{
"Id": "64e69b14-fa38-e511-893a-001125665867",
"SerialNumber": "1504H365",
"Position": 73
},
{
"Id": "7be69b14-fa38-e511-893a-001125665867",
"SerialNumber": "1504H364",
"Position": 72
}
]
}
]
}
my query:
CREATE INDEX idx_serial ON `aplikomp-bucket`
(ALL ARRAY(ALL ARRAY i.SerialNumber FOR i IN g.Items END ) FOR g In Groups END);
CREATE INDEX idx_serial ON `aplikomp-bucket` (DISTINCT ARRAY(DISTINCT ARRAY i.SerialNumber FOR i IN g.Items END ) FOR g In Groups END);
SELECT META().id FROM `aplikomp-bucket` AS a
WHERE ANY g IN a.Groups SATISFIES (ANY i IN g.Items SATISFIES i.SerialNumber > 123 END) END;