how to do proper indexing while joining in couchbase? - couchbase

i have one couchbase document in the format as below of type "organization" in contact bucket.
"contact": {
"extendedData": [],
"id": "organization_2_1094",
"organizationId": 1094,
"organizationName": "SMART COMSSS",
"organizationRoles": [
{
"addressAssociations": [
{
"activeDate": "2019-08-08T03:51:51.417Z",
"addressAssocTypeId": -2,
"addressId": 749,
"ownershipStatus": 1,
"srvAddressStatus": 1
}
],
"extendedData": [
{
"characteristicId": "1",
"characteristicValue": "Plant_Id",
"extendedDataId": "400"
},
{
"characteristicId": "2",
"characteristicValue": "Plant_Type",
"extendedDataId": "401"
}
],
"name": "Store1",
"organizationRoleId": 928,
"roleSpecId": -103,
"statusId": 1,
"statusLastChangedDate": "2019-08-22T12:27:51.077Z"
},
{
"name": "changed",
"organizationRoleId": 929,
"roleSpecId": -104,
"statusId": 1,
"statusLastChangedDate": "2019-08-22T12:27:51.077Z"
},
{
"addressAssociations": [
{
"activeDate": "2019-08-08T23:06:49.748Z",
"addressAssocTypeId": -2,
"addressId": 752,
"ownershipStatus": 1,
"srvAddressStatus": 1
}
],
"extendedData": [
{
"characteristicId": "1",
"characteristicValue": "Plant_Id",
"extendedDataId": "402"
},
{
"characteristicId": "2",
"characteristicValue": "Plant_Type",
"extendedDataId": "403"
}
],
"name": "store11",
"organizationRoleId": 930,
"partyRoleAssocs": [
{
"partyRoleAssocId": "531"
}
],
"relevantEntityId": "S_103",
"roleSpecId": -103,
"statusId": 1,
"statusLastChangedDate": "2019-08-22T12:27:51.077Z"
},
{
"name": "new store group",
"organizationRoleId": 931,
"partyRoleAssocs": [
{
"partyRoleAssocId": "532"
}
],
"roleSpecId": -103,
"statusId": 1,
"statusLastChangedDate": "2019-08-22T12:27:51.077Z"
},
{
"name": "child store",
"organizationRoleId": 932,
"partyRoleAssocs": [
{
"partyRoleAssocId": "533"
}
],
"roleSpecId": -103,
"statusId": 1,
"statusLastChangedDate": "2019-08-22T12:27:51.077Z"
},
{
"name": "MCOTEST",
"organizationRoleId": 933,
"roleSpecId": -104,
"statusId": 1,
"statusLastChangedDate": "2019-08-22T12:27:51.077Z"
},
{
"name": "new store child",
"organizationRoleId": 934,
"partyRoleAssocs": [
{
"partyRoleAssocId": "534"
}
],
"roleSpecId": -103,
"statusId": 1,
"statusLastChangedDate": "2019-08-22T12:27:51.077Z"
},
{
"name": "new store child",
"organizationRoleId": 935,
"partyRoleAssocs": [
{
"partyRoleAssocId": "535"
},
{
"partyRoleAssocId": "565",
"toRoleId": 936
}
],
"roleSpecId": -103,
"statusId": 1,
"statusLastChangedDate": "2019-08-22T12:27:51.077Z"
},
{
"organizationRoleId": 936,
"partyRoleAssocs": [
{
"toRoleId": 935
}
],
"statusId": 1,
"statusLastChangedDate": "2019-08-22T12:27:51.077Z"
}
],
"statusId": 1,
"statusLastChangedDate": "2019-08-22T09:27:51.072Z",
"tenantId": "2",
"type": "organization"
}
and another couchbase document of type "address" in bucket contact
{
"contact": {
"address1": "stret",
"addressId": "1000",
"city": "miryalguda",
"countryCode": 4,
"id": "address_2_1000",
"state": "ap",
"tenantId": "2",
"type": "address",
"zip": "500070"
}
}
Now i have made a query to get some data from organization document and some data from address document by joining documents on addressId as per my requirement. Below is the query:
SELECT orgrole.name, orgrole.organizationRoleId,orgrole.externalIds as externalIds,orgrole.roleSpecId,a.city as city, a.zip as zip,{a.addressId,a.address1,a.address2,a.address3,a.country,a.city, a.zip,a.state,a.postalCode,a.houseNumber,a.streetName,a.fxGeocode,a.isActive} as address FROM `optima_contact` AS contact UNNEST contact.organizationRoles AS orgrole UNNEST orgrole.addressAssociations AS aa JOIN `optima_contact` AS a ON aa.addressId = TO_NUMBER(a.addressId)
WHERE contact.type = "organization" AND a.type = "address"
and i have made one index for right hand side of join.
CREATE INDEX `ix2` ON `optima_contact`(`addressId`,`address1`,`address2`,`address3`,`country`,`city`,`zip`,`state`,`postalCode`,`houseNumber`,`streetName`,`fxGeocode`,`isActive`) WHERE (`type` = "address")
Is there any way to still improve the query execution time for this?
As the number of documents for type organization are 10 and for type "address" are more than 70000, i am getting timeout while query execution

https://blog.couchbase.com/ansi-join-support-n1ql/
Options :
Change query to ON TO_STRING(aa.addressId) = a.addressId
Or change index to CREATE INDEX `ix3` ON `optima_contact`(TO_NUMBER(`addressId`), `addressId`,`address1`,`address2`,`address3`,`country`,`city`,`zip`,`state`,`postalCode`,`houseNumber`,`streetName`,`fxGeocode`,`isActive`) WHERE (`type` = "address")
Use Hash JOIN in EE.
Change the JOIN order
Try one of the following
First one should solve your problem. ON clause exact matches with RHS index key it will push the value otherwise it needs to fetch all the index keys and apply predicate on query side.
CB 6.5.0
WITH contact AS (SELECT orgrole.name, orgrole.organizationRoleId,
orgrole.externalIds AS externalIds, orgrole.roleSpecId,
aa.addressId
FROM optima_contact AS c
UNNEST c.organizationRoles AS orgrole
UNNEST orgrole.addressAssociations AS aa WHERE c.type = "organization")
SELECT {a.addressId,a.address1,a.address2,a.address3,a.country,a.city,
a.zip,a.state,a.postalCode,a.houseNumber,a.streetName,a.fxGeocode,a.isActive} AS address,
c1.name, c1.organizationRoleId,
c1.externalIds, c1.roleSpecId, a.city AS city, a.zip AS zip
FROM optima_contact AS a
UNNEST contact AS c1
WHERE a.type = "address" AND TO_NUMBER(a.addressId) = c1.addressId;
PRE 6.5
SELECT {a.addressId,a.address1,a.address2,a.address3,a.country,a.city,
a.zip,a.state,a.postalCode,a.houseNumber,a.streetName,a.fxGeocode,a.isActive} AS address,
c1.name, c1.organizationRoleId,
c1.externalIds, c1.roleSpecId, a.city AS city, a.zip AS zip
FROM optima_contact AS a
UNNEST (SELECT orgrole.name, orgrole.organizationRoleId,
orgrole.externalIds AS externalIds, orgrole.roleSpecId,
aa.addressId
FROM optima_contact AS c
UNNEST c.organizationRoles AS orgrole
UNNEST orgrole.addressAssociations AS aa WHERE c.type = "organization") AS c1
WHERE a.type = "address" AND TO_NUMBER(a.addressId) = c1.addressId;
OR
SELECT a AS address, c1.name, c1.organizationRoleId, c1.externalIds, c1.roleSpecId
FROM optima_contact AS a
UNNEST (SELECT orgrole.name, orgrole.organizationRoleId,
orgrole.externalIds AS externalIds, orgrole.roleSpecId,
orgrole.addressAssociations[*].addressId AS addresses
FROM optima_contact AS c
UNNEST c.organizationRoles AS orgrole
WHERE c.type = "organization") AS c1
WHERE a.type = "address" AND TO_NUMBER(a.addressId) IN c1.addresses;

Related

Query not returning in pg-promise the same result as pgadmin

I'm trying to do this request:
SELECT report.date_concerned,
json_object_agg('title', json_build_object('title', report.title, 'duration', report.duration, 'fk_category_id', report.fk_category_id, 'fk_client_id', report.fk_client_id, 'category_name', category.name, 'client_name', client.name)) AS details
FROM report,
category,
client
WHERE report.fk_user_id=2
AND report.fk_category_id = category.id
AND report.fk_client_id = client.id
GROUP BY report.date_concerned
ORDER BY report.date_concerned
When I'm doing it in pgadmin I receive this json:
[
{
"date_concerned": "2021-08-01T22:00:00.000Z",
"details": {
"title": "élément 2 du 2 août",
"duration": "02:15:00",
"fk_category_id": 1,
"fk_client_id": 2,
"category_name": "Trotinettes",
"client_name": "James Bond"
},
{
"title": "élément 1 du 2 aout",
"duration": null,
"fk_category_id": 1,
"fk_client_id": 2,
"category_name": "Trotinettes",
"client_name": "James Bond"
},
{
"date_concerned": "2021-08-11T22:00:00.000Z",
"details": {
"title": "premier mot, deuxième mot, troisième mot, quatrième mot",
"duration": "03:15:00",
"fk_category_id": 1,
"fk_client_id": 2,
"category_name": "Trotinettes",
"client_name": "James Bond"
}
}
]
But when I do it in pg-promise I receive it this way :
[
{
"date_concerned": "2021-08-01T22:00:00.000Z",
"details": {
"title": {
"title": "élément 2 du 2 août",
"duration": "02:15:00",
"fk_category_id": 1,
"fk_client_id": 2,
"category_name": "Trotinettes",
"client_name": "James Bond"
}
}
},
{
"date_concerned": "2021-08-11T22:00:00.000Z",
"details": {
"title": {
"title": "premier mot, deuxième mot, troisième mot, quatrième mot",
"duration": "03:15:00",
"fk_category_id": 1,
"fk_client_id": 2,
"category_name": "Trotinettes",
"client_name": "James Bond"
}
}
}
]
As you can see the second entry of the same date is not returned.
I call pg-promise this way :
sendQuery("SELECT report.date_concerned, json_object_agg('title', json_build_object('title', report.title, 'duration', report.duration, 'fk_category_id', report.fk_category_id, 'fk_client_id', report.fk_client_id, 'category_name', category.name, 'client_name', client.name)) AS details FROM report, category, client WHERE report.fk_user_id=$1 AND report.fk_category_id = category.id AND report.fk_client_id = client.id GROUP BY report.date_concerned ORDER BY report.date_concerned", urlValue, callback);
sendQuery is this method :
function sendQuery(req, value, next) {
db.any(req, value)
.then(function (data) {
next({
status: 'success',
data: data,
message: "it works",
});
})
.catch(function (err) {
console.log('Error : ');
console.log(err);
next({
status: 'error',
data: null,
message: 'an error occured'
});
});
}
The solution I'm looking for can be advices on pg-promise or on my pg query.

Snowflake - Querying Nested JSON

I need some help querying this JSON file I've ingested into a temp table in Snowflake. So, I've created a JSON_DATA variant column and plan to query and do a COPY INTO another table, but my query isn't working yet... I feel I'm close (possibly?)
JSON layout:
{
"nextPage": "01",
"page": "0",
"status": "ok",
"transactions": [
{
"id": "65985",
"recordTp": "vendorbill",
"values": {
"account": [
{
"text": "14500 Deferred Expenses",
"value": "249"
}
],
"account.number": "1450",
"account.type": [
{
"text": "Deferred Expense",
"value": "DeferExpense"
}
],
"amount": "51733",
"classnohierarchy": [
{
"text": "901 Corporate",
"value": "139"
}
],
"currency": [
{
"text": "Canadian Dollar",
"value": "3"
}
],
"customer.altname": "V Sties expenses (Tor)",
"customer.custate": "12/31/2019",
"customer.custentient": "ada Inc.",
"customer.custendate": "1/1/2019",
"customer.entyid": "PR781",
"departmentnohierarchy": [
{
"text": "8rity",
"value": "37"
}
],
"fxamount": "689",
"location": [
{
"text": "Othad Projects",
"value": "48"
}
],
"postingperiod": [
{
"text": "Jan 2020",
"value": "1"
}
],
"subsidiary.custrecord_region": [
{
"text": "CANADA",
"value": "3"
}
],
"subsidiarynohierarchy": [
{
"text": "ada Inc.",
"value": "25"
}
]
}
},
I've been able to query the values that are not (deeply) nested but I need help getting, for example, the values from 'classnohierarchy', to get both the 'text' and 'value' I tried:
transactions.value:"values".classnohierarchy.text::string as class_txt,
transactions.value:"values".classnohierarchy.value::string as class_val,
but it's returning NULL values.
Below is my entire query:
SELECT
JSON_DATA:status::string as connection_status,
transactions.value:id::string as id,
transactions.value:recordType::string as record_type,
transactions.value:"values"::variant as trans_val,
transactions.value:"values".account as acc,
transactions.value:"values".account.text as text,
transactions.value:"values".account.value as val,
transactions.value:"values"."account.number"::string as acc_num,
transactions.value:"values"."account.type".text::string as acc_type_txt,
transactions.value:"values"."account.type".value::string as acc_type_val,
transactions.value:"values".amount::string as amount,
**transactions.value:"values".classnohierarchy.text::string as class_txt,
transactions.value:"values".classnohierarchy.value::string as class_val,**
transactions.value:"values".currency.text::string as currency_text,
transactions.value:"values".currency.value::string as currency_val,
transactions.value:"values"."customer.altname"::string as customer_project_name,
transactions.value:"values"."customer.custate"::string as customer_end_date,
transactions.value:"values"."customer.custentient"::string as customer_end_client,
transactions.value:"values"."customer.custendate"::string as customer_start_date,
transactions.value:"values"."customer.entyid"::string as customer_project_id,
transactions.value:"values".departmentnohierarchy.text::string as department_name,
transactions.value:"values".departmentnohierarchy.value::string as department_value,
transactions.value:"values".fxamount::string as fx_amount,
transactions.value:"values".location.text::string as product_name,
transactions.value:"values".postingperiod.text::string as postingperiod,
transactions.value:"values".postingperiod.value::string as postingperiod,
transactions.value:"values"."subsidiary.custrecord_region".text::string as region_name,
transactions.value:"values"."subsidiary.custrecord_region".value::string as region_value,
transactions.value:"values".subsidiarynohierarchy.text::string as entity_name,
transactions.value:"values".subsidiarynohierarchy.value::string as entity_value,
FROM MY_TABLE,
LATERAL FLATTEN (JSON_DATA:transactions) as transactions
and here's a picture of whats showing in Snowflake:
SNOWFLAKE_SCREENSHOT
departmentnohierarchy is an array. you need to mention the index as below.
select *,transactions.VALUE:"values".departmentnohierarchy[0].value::text as department_name
FROM jsont1,
LATERAL FLATTEN (JSON_DATA:transactions) as transactions

How to join an element which is in an array which is also an part of another array for best performance in couchbase?

Below is the sample document for organization
{
"org": {
"id": "org_2_1084",
"organizationId": 1084,
"organizationName": "ABC",
"organizationRoles": [
{
"addressAssociations": [
{
"activeDate": "2019-08-03T18:52:00.857Z",
"addressAssocTypeId": -2,
"addressId": 100,
"ownershipStatus": 1,
"srvAddressStatus": 1
},
{
"activeDate": "2019-08-03T18:52:00.857Z",
"addressAssocTypeId": -2,
"addressId": 105,
"ownershipStatus": 1,
"srvAddressStatus": 1
}
],
"name": "NLUZ",
"organizationRoleId": 893,
"roleSpecId": -104,
"statusId": 1,
"statusLastChangedDate": "2019-08-04T13:14:44.616Z"
},
{
"addressAssociations": [
{
"activeDate": "2019-08-03T18:52:00.857Z",
"addressAssocTypeId": -2,
"addressId": 582,
"ownershipStatus": 1,
"srvAddressStatus": 1
},
{
"activeDate": "2019-08-03T18:52:00.857Z",
"addressAssocTypeId": -2,
"addressId": 603,
"ownershipStatus": 1,
"srvAddressStatus": 1
}
],
"name": "TXR",
"organizationRoleId": 894,
"partyRoleAssocs": [
{
"partyRoleAssocId": "512"
}
],
"roleSpecId": -103,
"statusId": 1,
"statusLastChangedDate": "2019-08-04T13:14:44.616Z"
},
}
and below is the sample document for address
{
"address": {
"address1": "string",
"address2": "string",
"addressId": "1531",
"changeWho": "string",
"city": "string",
"fxGeocode": "string",
"houseNumber": "string",
"id": "1531",
"isActive": true,
"postalCode": "string",
"state": "string",
"streetName": "string",
"tenantId": "2",
"type": "address",
"zip": "string"
}
}
In an organization there are multiple organizationRoles and in an organizationRole there are multiple addressAssociations.Each addressAssociation contains an addressId and corresponding to this addressId
address is stored in address document.
Now i have to get organizationRole name, organizationRole id, city, zip from the two documents.
What should be the best way to approach this situation for the best performance in couchbase?
I am thinking about using join but not able to come up with an exact query for this scenario.
I have tried the below query but its not working.
select *
from 'contact' As A UNNEST 'contact'.organizationRoles as Roles
UNNEST Roles.addressAssociations address
Join 'contact' As B
on address.addressID=B.addressID
where A.type="organization" and B.type="address";
You are in the right direction.
In addressAssociations the addressId is number, In address addressId is string. string and number not same and no implicit type casting. You must fix data or do explicit type casting using TOSTRING(), TONUMBER() etc...
Also N1QL field names are case-sensitive your query using addressID vs addressId (in the document)
SELECT r.name AS organizationRoleName, r.organizationRoleId, a.city, a.zip
FROM contact AS c
UNNEST c.organizationRoles AS r
UNNEST r.addressAssociations AS aa
jOIN contact AS a
ON aa.addressId = a.addressId
WHERE c.type = "organization" AND a.type = "address";
CREATE INDEX ix1 ON contact(addressId, city, zip) WHERE type = "address";
Check out https://blog.couchbase.com/ansi-join-support-n1ql/

N1QL query to filter JSON array in Couchbase server

I have the following array of data inside the data bucket SITES in my Couchbase server
"siteMaster": [
{
"sitename": "HTS_SITE_001",
"sitelink": "http://facebook.com",
"address" : "19/2, Bellandur, Bangalore India",
"filename": "site1.json",
"persons": 1,
"status": "70%",
"contact": "max.smith#honeywell.com",
}, {
"sitename": "HTS_SITE_002",
"sitelink": "http://facebook.com",
"address": "5th Avenue, New York",
"filename": "site2.json",
"persons": 1,
"status": "70%",
"contact": "john.smith#facebook.com",
}, {
"sitename": "HTS_SITE_003",
"sitelink": "http://facebook.com",
"address": "Palo Alto, California",
"filename": "site3.json",
"persons": 1,
"status": "80%",
"contact": "steve.jobs#apple.com",
}, {
"sitename": "HTS_SITE_004",
"sitelink": "http://facebook.com",
"address": "Bellandur, Bangalore",
"filename": "site4.json",
"persons": 1,
"status": "80%",
"contact": "max.mustermann#deutsche.com",
}
]
The N1QL query for
select * from SITES where status = "70%" should return me two rows, but unfortunately it is not returning any rows.
Where am I going wrong with the query ?
Please use the following query:
SELECT *
FROM SITES
WHERE ANY sm IN siteMaster SATISFIES sm.status = "70%" END;
You can also create the following array index to speed up the query:
CREATE INDEX idx ON SITES( DISTINCT ARRAY sm.status FOR sm IN siteMaster END );

sails.js query json objects inside table column

So this must be a strange question, I want sails.js ORM to search something like following
If this is the result for query for the following
Venue.findOne({id: 125274827508536}).exec()
returns >
{
"id": "125274827508536",
"attire": "Casual",
"can_post": false,
"category": "Restaurant/cafe",
"category_list": [
{
"id": "200742186618963",
"name": "Vegetarian & Vegan Restaurant"
},
{
"id": "192108214153222",
"name": "Breakfast & Brunch Restaurant"
},
{
"id": "188296324525457",
"name": "Sandwich Shop"
}
],
"checkins": 562,
"cover": {
"cover_id": 356427064393310,
"offset_x": 0,
"offset_y": 13,
"source": "https://fbcdn-sphotos-c-a.akamaihd.net/hphotos-ak-xaf1/t31.0-8/s720x720/460144_356427064393310_1179113344_o.jpg",
"id": "356427064393310"
},
"culinary_team": "Ramy Abu-Yousef : Owner, Chef\nDallas Jones: Chef",
"description": "Unique Sandwiches\nDelicious Salads\nHomemade Soups (4 daily)\nFresh Fruit Smoothies\nMilkshakes\nMOUSTACHE WALL OF FAME",
"general_manager": "Ramy Abu-Yousef & Syndey Friedemann",
"has_added_app": false,
"hours": {
"mon_1_open": "08:00",
"mon_1_close": "22:00",
"tue_1_open": "08:00",
"tue_1_close": "22:00",
"wed_1_open": "08:00",
"wed_1_close": "22:00",
"thu_1_open": "08:00",
"thu_1_close": "22:00",
"fri_1_open": "08:00",
"fri_1_close": "22:00",
"sat_1_open": "08:00",
"sat_1_close": "22:00",
"sun_1_open": "08:00",
"sun_1_close": "22:00"
},
"is_community_page": false,
"is_published": true,
"likes": 540,
"link": "https://www.facebook.com/JohnnyBarrs",
"location": {
"city": "Queenstown",
"country": "New Zealand",
"latitude": -45.032691433795,
"longitude": 168.66154298959,
"street": "15 Church Street",
"zip": "9300"
},
"name": "Johnny Barr's",
"parking": {
"lot": 1,
"street": 1,
"valet": 0
},
"payment_options": {
"amex": 1,
"cash_only": 0,
"discover": 0,
"mastercard": 1,
"visa": 1
},
"phone": "+64 (0)3 409 0169",
"price_range": "$$ (10-30)",
"restaurant_services": {
"delivery": 1,
"catering": 0,
"groups": 1,
"kids": 1,
"outdoor": 0,
"reserve": 0,
"takeout": 1,
"waiter": 0,
"walkins": 1
},
"restaurant_specialties": {
"breakfast": 1,
"coffee": 1,
"dinner": 1,
"drinks": 1,
"lunch": 1
},
"talking_about_count": 2,
"username": "JohnnyBarrs",
"website": "www.johnnybarrs.com",
"were_here_count": 562
}
Now what I need sails to do is the following
Venue.findOne({'restaurant_services': {'delivery': 1}).exec()
To return the same object as I have shown above,
Any thoughts please ?
If your adapter is mongo then you can do this out of the box
Venue.findOne({'restaurant_services.delivery': 1}).exec()
So you should consider your options if you can transform the data into another source to preform this query.
If not and your adapter is SQL based then it is more difficult and would require more information on your use case to decide on the most efficient option.
For instance if you could limit your query to a few indexed fields in a SQL database, you could then use lodash to find your records within your records.
Venue.find({/*limiting criteria to bring down the number of results*/}).exec(function(err,results){/* JSON.parse(results) then use lodash to find the final result*/})
Another option could be
Venue.find({restaurant_services: {contains: 'delivery: 1'}}).exec()
that is an out of the box idea, but one that might work.
Again depends deeply on your setup, how its indexed and out of how many venues that one would need to be found.