I'm using inner join to join 3 tables, Owner, Store and Machine.
I'm trying to view output JSON from multiple tables like this:
SELECT ow.*, st.*, ma.*
FROM owner ow
INNER JOIN st.store ON ow.OwnerId = st.OwnerId
INNER JOIN machine ma ON ma.StoreId = st.StoreId;
I want JSON formatted like this:
{
"OwnerId": "1d2dd",
"Name": "name test",
"Store":[{
"StoreId": "s3ss5",
"Name": "Store1",
"Code": "bla",
"Machine":[{
"MachineId": "axpeo",
"Name": "Machine1",
"Type": "type1"
}]
},
{
"StoreId": "ddf22",
"Name": "Store2",
"Code": "ble",
"Machine":[{
"MachineId": "weds",
"Name": "Machine2",
"Type": "type2"
},
{
"MachineId": "axdso",
"Name": "Machine3",
"Type": "type3"
}]
}]
}
but the return JSON is not formatted like this
I'm using PostgreSQL.
The easiest (and probably only sensible) way to do this is to build JSON sub-documents from individual records at table level and only then hierarchically joining them:
SELECT json_build_object('OwnerId', ownerid,
'Name', name,
'Store', stores)
FROM owner
JOIN (
SELECT ownerid,
json_agg(
json_build_object('StoreId', storeid,
'Name', name,
'Code', code,
'Machine', machines)) AS stores
FROM store
JOIN (
SELECT storeid,
json_agg(
json_build_object('MachineId', machineid,
'Name', name,
'Type', type)) AS machines
FROM machine
GROUP BY storeid) m USING (storeid)
GROUP BY ownerid) s USING (ownerid);
The output is not exactly what i want, but it is better...this is the output
[{
"OwnerId": "1d2dd",
"Name": "name test",
"Store":{
"StoreId": "s3ss5",
"Name": "Store1",
"Code": "bla",
"Machine":{
"MachineId": "axpeo",
"Name": "Machine1",
"Type": "type1"
}
}
},
{
"OwnerId": "1d2dd",
"Name": "name test",
"Store":{
"StoreId": "ddf22",
"Name": "Store2",
"Code": "ble",
"Machine":{
"MachineId": "weds",
"Name": "Machine2",
"Type": "type2"
}
}
},
{
"OwnerId": "1d2dd",
"Name": "name test",
"Store":{
"StoreId": "ddf22",
"Name": "Store2",
"Code": "ble",
"Machine":{
"MachineId": "axdso",
"Name": "Machine3",
"Type": "type3"
}
}
}]
it does not join the machines from the same store yet like an array
For one-to-many relationships formatted to JSON try something like this:
SELECT "owner"."id",
json_agg(DISTINCT "store".*) AS "stores",
json_agg(DISTINCT "machine".*) AS "machines"
FROM "owners"
INNER JOIN "stores"
ON "stores"."ownerId" = "owners"."id"
INNER JOIN "machines"
ON "machines"."storeId" = "stores"."id"
WHERE "owner" = 1
GROUP BY "owner"."id";
Related
I need some help querying this JSON file I've ingested into a temp table in Snowflake. So, I've created a JSON_DATA variant column and plan to query and do a COPY INTO another table, but my query isn't working yet... I feel I'm close (possibly?)
JSON layout:
{
"nextPage": "01",
"page": "0",
"status": "ok",
"transactions": [
{
"id": "65985",
"recordTp": "vendorbill",
"values": {
"account": [
{
"text": "14500 Deferred Expenses",
"value": "249"
}
],
"account.number": "1450",
"account.type": [
{
"text": "Deferred Expense",
"value": "DeferExpense"
}
],
"amount": "51733",
"classnohierarchy": [
{
"text": "901 Corporate",
"value": "139"
}
],
"currency": [
{
"text": "Canadian Dollar",
"value": "3"
}
],
"customer.altname": "V Sties expenses (Tor)",
"customer.custate": "12/31/2019",
"customer.custentient": "ada Inc.",
"customer.custendate": "1/1/2019",
"customer.entyid": "PR781",
"departmentnohierarchy": [
{
"text": "8rity",
"value": "37"
}
],
"fxamount": "689",
"location": [
{
"text": "Othad Projects",
"value": "48"
}
],
"postingperiod": [
{
"text": "Jan 2020",
"value": "1"
}
],
"subsidiary.custrecord_region": [
{
"text": "CANADA",
"value": "3"
}
],
"subsidiarynohierarchy": [
{
"text": "ada Inc.",
"value": "25"
}
]
}
},
I've been able to query the values that are not (deeply) nested but I need help getting, for example, the values from 'classnohierarchy', to get both the 'text' and 'value' I tried:
transactions.value:"values".classnohierarchy.text::string as class_txt,
transactions.value:"values".classnohierarchy.value::string as class_val,
but it's returning NULL values.
Below is my entire query:
SELECT
JSON_DATA:status::string as connection_status,
transactions.value:id::string as id,
transactions.value:recordType::string as record_type,
transactions.value:"values"::variant as trans_val,
transactions.value:"values".account as acc,
transactions.value:"values".account.text as text,
transactions.value:"values".account.value as val,
transactions.value:"values"."account.number"::string as acc_num,
transactions.value:"values"."account.type".text::string as acc_type_txt,
transactions.value:"values"."account.type".value::string as acc_type_val,
transactions.value:"values".amount::string as amount,
**transactions.value:"values".classnohierarchy.text::string as class_txt,
transactions.value:"values".classnohierarchy.value::string as class_val,**
transactions.value:"values".currency.text::string as currency_text,
transactions.value:"values".currency.value::string as currency_val,
transactions.value:"values"."customer.altname"::string as customer_project_name,
transactions.value:"values"."customer.custate"::string as customer_end_date,
transactions.value:"values"."customer.custentient"::string as customer_end_client,
transactions.value:"values"."customer.custendate"::string as customer_start_date,
transactions.value:"values"."customer.entyid"::string as customer_project_id,
transactions.value:"values".departmentnohierarchy.text::string as department_name,
transactions.value:"values".departmentnohierarchy.value::string as department_value,
transactions.value:"values".fxamount::string as fx_amount,
transactions.value:"values".location.text::string as product_name,
transactions.value:"values".postingperiod.text::string as postingperiod,
transactions.value:"values".postingperiod.value::string as postingperiod,
transactions.value:"values"."subsidiary.custrecord_region".text::string as region_name,
transactions.value:"values"."subsidiary.custrecord_region".value::string as region_value,
transactions.value:"values".subsidiarynohierarchy.text::string as entity_name,
transactions.value:"values".subsidiarynohierarchy.value::string as entity_value,
FROM MY_TABLE,
LATERAL FLATTEN (JSON_DATA:transactions) as transactions
and here's a picture of whats showing in Snowflake:
SNOWFLAKE_SCREENSHOT
departmentnohierarchy is an array. you need to mention the index as below.
select *,transactions.VALUE:"values".departmentnohierarchy[0].value::text as department_name
FROM jsont1,
LATERAL FLATTEN (JSON_DATA:transactions) as transactions
I am new to Python and JSON data structures and was looking for some assistance
I have been able to create some Python code that calls a Web API and converts the returning JSON data (report_rows) into a dataframe successfully using json_normalize()
I am having some issues converting and sorting the JSON column names into the dataframe column names and was wondering if I could get some help on the following...
Get Column Names from JSON data - In the dataframe I would like to convert the column names: c1, c2, c3, etc to RECORD_NO, REF_RECORD_NO, SOV_LINEITEM_NO. The column names are in the JSON data [data][report_header][cXX][name] where cXX is the column number
Sort Column Names - I would like to order the dataframe columns so instead of c1, c10, c11, c12, c2, c3, etc it is c1, c2, c3 ... c10, c11,c12
If someone is able to provide some help, it would be greatly appreciated
Thanks in advance
Python Code
json_data = json.loads(res.read())
data = pd.json_normalize(json_data['data'], record_path=['report_row'])
print(data)
which outputs the following
c1 c10 c11 ... c7 c8 c9
0 CON-0000001 71 VEN-0000001 ... Build IT System Contract 123 Pending
1 CON-0000002 72 VEN-0000002 ... Build IT System Contract XYZ Approved
JSON Data
"data": [
{
"report_header": {
"c11": {
"name": "VENDOR_RECORD",
"type": "java.lang.String"
},
"c10": {
"name": "VENDOR_ID",
"type": "java.lang.Integer"
},
"c12": {
"name": "VENDOR_NAME",
"type": "java.lang.String"
},
"c1": {
"name": "RECORD_NO",
"type": "java.lang.String"
},
"c2": {
"name": "REF_RECORD_NO",
"type": "java.lang.String"
},
"c3": {
"name": "SOV_LINEITEM_NO",
"type": "java.lang.String"
},
"c4": {
"name": "REF_ITEM",
"type": "java.lang.String"
},
"c5": {
"name": "PROJECTNUMBER",
"type": "java.lang.String"
},
"c6": {
"name": "PROJECTNAME",
"type": "java.lang.String"
},
"c7": {
"name": "TITLE",
"type": "java.lang.String"
},
"c8": {
"name": "CONTRACT_NO",
"type": "java.lang.String"
},
"c9": {
"name": "STATUS",
"type": "java.lang.String"
}
},
"report_row": [
{
"c1": "CON-0000001",
"c10": "71 ",
"c11": "VEN-0000001",
"c12": "Microsoft",
"c2": "",
"c3": "1",
"c4": "",
"c5": "P-0037",
"c6": "Project ABC",
"c7": "Build IT System",
"c8": "Contract 123",
"c9": "Pending"
},
{
"c1": "CON-0000002",
"c10": "72 ",
"c11": "VEN-0000002",
"c12": "Google",
"c2": "",
"c3": "1.1",
"c4": "",
"c5": "P-0037",
"c6": "Project ABC",
"c7": "Build IT System",
"c8": "Contract XYZ",
"c9": "Approved"
}
]
}
],
"message": [
"OK"
],
"status": 200
}
i was able to resolve the issue by adding the following code...
# Get the number of fields/columns in the JSON data
number_of_fields = len((json_data['data'][0]['report_header']))
reorder_columns = []
new_column_names = []
field_index = 0
# Loop through the Columns and do the following...
# reorder_columns - this is the column order that i want: c1, c2, c3 ... c10, c11, c12
# new_column_name - this will retrieve the column names from the header: c1.name, c2.name, etc
while field_index < number_of_fields:
field_index += 1
new_column = "c" + str(field_index)
reorder_columns.append(new_column)
column_header = new_column + '.name'
new_column_name = header.iloc[0][new_column + '.name']
new_column_names.append(new_column_name)
data = pd.json_normalize(json_data['data'], record_path=['report_row'])
data = data.reindex(columns=reorder_columns)
data.columns = new_column_names
Below is the sample document for organization
{
"org": {
"id": "org_2_1084",
"organizationId": 1084,
"organizationName": "ABC",
"organizationRoles": [
{
"addressAssociations": [
{
"activeDate": "2019-08-03T18:52:00.857Z",
"addressAssocTypeId": -2,
"addressId": 100,
"ownershipStatus": 1,
"srvAddressStatus": 1
},
{
"activeDate": "2019-08-03T18:52:00.857Z",
"addressAssocTypeId": -2,
"addressId": 105,
"ownershipStatus": 1,
"srvAddressStatus": 1
}
],
"name": "NLUZ",
"organizationRoleId": 893,
"roleSpecId": -104,
"statusId": 1,
"statusLastChangedDate": "2019-08-04T13:14:44.616Z"
},
{
"addressAssociations": [
{
"activeDate": "2019-08-03T18:52:00.857Z",
"addressAssocTypeId": -2,
"addressId": 582,
"ownershipStatus": 1,
"srvAddressStatus": 1
},
{
"activeDate": "2019-08-03T18:52:00.857Z",
"addressAssocTypeId": -2,
"addressId": 603,
"ownershipStatus": 1,
"srvAddressStatus": 1
}
],
"name": "TXR",
"organizationRoleId": 894,
"partyRoleAssocs": [
{
"partyRoleAssocId": "512"
}
],
"roleSpecId": -103,
"statusId": 1,
"statusLastChangedDate": "2019-08-04T13:14:44.616Z"
},
}
and below is the sample document for address
{
"address": {
"address1": "string",
"address2": "string",
"addressId": "1531",
"changeWho": "string",
"city": "string",
"fxGeocode": "string",
"houseNumber": "string",
"id": "1531",
"isActive": true,
"postalCode": "string",
"state": "string",
"streetName": "string",
"tenantId": "2",
"type": "address",
"zip": "string"
}
}
In an organization there are multiple organizationRoles and in an organizationRole there are multiple addressAssociations.Each addressAssociation contains an addressId and corresponding to this addressId
address is stored in address document.
Now i have to get organizationRole name, organizationRole id, city, zip from the two documents.
What should be the best way to approach this situation for the best performance in couchbase?
I am thinking about using join but not able to come up with an exact query for this scenario.
I have tried the below query but its not working.
select *
from 'contact' As A UNNEST 'contact'.organizationRoles as Roles
UNNEST Roles.addressAssociations address
Join 'contact' As B
on address.addressID=B.addressID
where A.type="organization" and B.type="address";
You are in the right direction.
In addressAssociations the addressId is number, In address addressId is string. string and number not same and no implicit type casting. You must fix data or do explicit type casting using TOSTRING(), TONUMBER() etc...
Also N1QL field names are case-sensitive your query using addressID vs addressId (in the document)
SELECT r.name AS organizationRoleName, r.organizationRoleId, a.city, a.zip
FROM contact AS c
UNNEST c.organizationRoles AS r
UNNEST r.addressAssociations AS aa
jOIN contact AS a
ON aa.addressId = a.addressId
WHERE c.type = "organization" AND a.type = "address";
CREATE INDEX ix1 ON contact(addressId, city, zip) WHERE type = "address";
Check out https://blog.couchbase.com/ansi-join-support-n1ql/
I have nested JSON files on S3 and am trying to query them with Athena.
However, I am having problems to query the nested JSON values.
My JSON file looks like this:
{
"id": "17842007980192959",
"acount_id": "17841401243773780",
"stats": [
{
"name": "engagement",
"period": "lifetime",
"values": [
{
"value": 374
}
],
"title": "Engagement",
"description": "Total number of likes and comments on the media object",
"id": "17842007980192959/insights/engagement/lifetime"
},
{
"name": "impressions",
"period": "lifetime",
"values": [
{
"value": 11125
}
],
"title": "Impressions",
"description": "Total number of times the media object has been seen",
"id": "17842007980192959/insights/impressions/lifetime"
},
{
"name": "reach",
"period": "lifetime",
"values": [
{
"value": 8223
}
],
"title": "Reach",
"description": "Total number of unique accounts that have seen the media object",
"id": "17842007980192959/insights/reach/lifetime"
},
{
"name": "saved",
"period": "lifetime",
"values": [
{
"value": 0
}
],
"title": "Saved",
"description": "Total number of unique accounts that have saved the media object",
"id": "17842007980192959/insights/saved/lifetime"
}
],
"import_date": "2017-12-04"
}
What I'm trying to do is to query the "stats" field value where name=impressions.
So ideally something like:
SELECT id, account_id, stats.values.value WHERE stats.name='engagement'
AWS example: https://docs.aws.amazon.com/athena/latest/ug/searching-for-values.html
Any help would be appreciated.
You can query the JSON with the following table definition:
CREATE EXTERNAL TABLE test(
id string,
acount_id string,
stats array<
struct<
name:string,
period:string,
values:array<
struct<value:string>>,
title:string
>
>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
LOCATION 's3://bucket/';
Now, the value column is available through the following unnesting:
select id, acount_id, stat.name,x.value
from test
cross join UNNEST(test.stats) as st(stat)
cross join UNNEST(stat."values") as valx(x)
WHERE stat.name='engagement';
I have the following array of data inside the data bucket SITES in my Couchbase server
"siteMaster": [
{
"sitename": "HTS_SITE_001",
"sitelink": "http://facebook.com",
"address" : "19/2, Bellandur, Bangalore India",
"filename": "site1.json",
"persons": 1,
"status": "70%",
"contact": "max.smith#honeywell.com",
}, {
"sitename": "HTS_SITE_002",
"sitelink": "http://facebook.com",
"address": "5th Avenue, New York",
"filename": "site2.json",
"persons": 1,
"status": "70%",
"contact": "john.smith#facebook.com",
}, {
"sitename": "HTS_SITE_003",
"sitelink": "http://facebook.com",
"address": "Palo Alto, California",
"filename": "site3.json",
"persons": 1,
"status": "80%",
"contact": "steve.jobs#apple.com",
}, {
"sitename": "HTS_SITE_004",
"sitelink": "http://facebook.com",
"address": "Bellandur, Bangalore",
"filename": "site4.json",
"persons": 1,
"status": "80%",
"contact": "max.mustermann#deutsche.com",
}
]
The N1QL query for
select * from SITES where status = "70%" should return me two rows, but unfortunately it is not returning any rows.
Where am I going wrong with the query ?
Please use the following query:
SELECT *
FROM SITES
WHERE ANY sm IN siteMaster SATISFIES sm.status = "70%" END;
You can also create the following array index to speed up the query:
CREATE INDEX idx ON SITES( DISTINCT ARRAY sm.status FOR sm IN siteMaster END );