Couchbase N1QL query really slow - couchbase

I'm working on a project where we use Couchbase 4.1 and we are trying to use N1QL to query for documents. Problem is that it seems very slow even though I have created indexees. The query takes around ~2 seconds with ~11000 documents.
The query:
SELECT name, displayName, imageId, childCategories FROM `bd-couchbase` WHERE assortment = 'CategoryAssortmentOne' AND categoryPath = 'category-displayname/subcategory-displayName' AND displayName IS NOT MISSING
My document is looking like this:
{
"parentName": "8442",
"categoryPath": "category-displayname/subcategory-displayName",
"lastUpdated": "2016-05-31T11:02:03.5129252+02:00",
"childCategories": [
{
"name": "0041",
"displayName": "Category 1",
"imageId": "0041"
},
{
"name": "0042",
"displayName": "Category 2",
"imageId": "0042"
},
{
"name": "0043",
"displayName": "Category 3",
"imageId": "0043"
},
{
"name": "0044",
"displayName": "Category 4",
"imageId": "0044"
},
{
"name": "0045",
"displayName": "Category 5",
"imageId": "0045"
},
{
"name": "0046",
"displayName": "Category 6",
"imageId": "0046"
}
],
"assortment": "CategoryAssortmentOne",
"name": "0040",
"displayName": "MyCategory",
"imageId": "0040"
}
I have the following index:
CREATE INDEX `category_idx` ON `bd-couchbase`((meta().`id`),`name`,`displayName`,`imageId`,`categoryPath`,`childCategories`,`assortment`) USING GSI;
When I execute the explain I can see it uses the #primary index and then doing a fetch (which I guess is the slow part of the query). But when I have created my index is not then supposed to use that?
The result of my explain:
{
"requestID": "da1946f3-5cc8-4d1e-a05b-06789aa6be92",
"signature": "json",
"results": [
{
"#operator": "Sequence",
"~children": [
{
"#operator": "PrimaryScan",
"index": "#primary",
"keyspace": "my-couchbase",
"namespace": "default",
"using": "gsi"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Fetch",
"keyspace": "my-couchbase",
"namespace": "default"
},
{
"#operator": "Filter",
"condition": "((((`my-couchbase`.`assortment`) =
\"CategoryAssortmentOne\") and ((`my-couchbase`.`categoryPath`) = \"category-displayname/subcategory-displayName\")) and ((`my-couchbase`.`displayName`) is not missing))"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"expr": "(`my-couchbase`.`name`)"
},
{
"expr": "(`my-couchbase`.`displayName`)"
},
{
"expr": "(`my-couchbase`.`imageId`)"
},
{
"expr": "(`my-couchbase`.`childCategorie
s`)"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
}
],
"status": "success",
"metrics": {
"elapsedTime": "13.6696ms",
"executionTime": "13.6696ms",
"resultCount": 1,
"resultSize": 2089
}
}
Any suggestions?
Thanks in advance.

the query service didn't seem to be able to match your index with the query. any particular reason you included the meta.id in the index?
try redefining the index to cover only the fields used in your WHERE clause: assortment, categoryPath and displayName, and see if it gets mentioned in the EXPLAIN after that.
CREATE INDEX category_idx ON `bd-couchbase`(assortment, categoryPath, displayName, imageId, childCategories, name);

Related

Inserting a Complex Nested JSON Column in MySQL

Here is my use case :-
I am trying to get the deployment details in a JSON format using :
kubectl get deployment -o json depl_name
and inserting result back to a column: meta_data in MySQL. The column data type is json . But the insert statement is failing with error :-
ERROR 3140 (22032): Invalid JSON text: "Missing a comma or '}' after an object member." at position 1035 in value for column
Here is my entire JSON :-
{
"uuid": {
"view": "demoBoard",
"demo": [
{
"serviceName": "wordpress-backend",
"configurations": {
"ec2_iam": {
"user": [],
"roles": null,
"permissions": null
}
},
"deployment_config": {
"apiVersion": "apps/v1",
"kind": "Deployment",
"metadata": {
"annotations": {
"deployment.kubernetes.io/revision": "6",
"kubectl.kubernetes.io/last-applied-configuration": "{\"apiVersion\":\"apps/v1\",\"kind\":\"Deployment\",\"metadata\":{\"annotations\":{},\"labels\":{\"app\":\"wordpress-backend\",\"wordpress_app_id\":\"w26\"},\"name\":\"wordpress-backend\",\"namespace\":\"wordpress\"},\"spec\":{\"selector\":{\"matchLabels\":{\"app\":\"wordpress-backend\"}},\"template\":{\"metadata\":{\"labels\":{\"app\":\"wordpress-backend\",\"wordpress_app_id\":\"w26\"}},\"spec\":{\"containers\":[{\"envFrom\":[{\"configMapRef\":{\"name\":\"wordpress-backend-config\"}}],\"image\":\"docker-image\",\"imagePullPolicy\":\"IfNotPresent\",\"name\":\"wordpress-backend\",\"ports\":[{\"containerPort\":8000}],\"resources\":{},\"volumeMounts\":[{\"mountPath\":\"/tmp/me/cloud\",\"name\":\"my-key\"}]}],\"imagePullSecrets\":[{\"name\":\"my-json\"}],\"volumes\":[{\"name\":\"my-cloud-key\",\"secret\":{\"defaultMode\":123,\"secretName\":\"my-key\"}}]}}}}\n"
},
"creationTimestamp": "2022-09-12T13:56:34Z",
"generation": 7,
"labels": {
"app": "wordpress-backend",
"wordpress_app_id": "w26"
},
"name": "wordpress-backend",
"namespace": "wordpress",
"resourceVersion": "v2",
"uid": "0da99b29"
},
"spec": {
"progressDeadlineSeconds": 600,
"replicas": 1,
"revisionHistoryLimit": 10,
"selector": {
"matchLabels": {
"app": "wordpress-backend"
}
},
"strategy": {
"rollingUpdate": {
"maxSurge": "25%",
"maxUnavailable": "25%"
},
"type": "RollingUpdate"
},
"template": {
"metadata": {
"creationTimestamp": null,
"labels": {
"app": "wordpress-backend",
"wordpress_app_id": "267"
}
},
"spec": {
"containers": [
{
"envFrom": [
{
"configMapRef": {
"name": "wordpress-backend-config"
}
}
],
"image": "docker.io/my-image",
"imagePullPolicy": "IfNotPresent",
"name": "wordpress-backend",
"ports": [
{
"containerPort": 8000,
"protocol": "TCP"
}
],
"resources": {},
"terminationMessagePath": "/dev/termination-log",
"terminationMessagePolicy": "File",
"volumeMounts": [
{
"mountPath": "/my/path/cloud",
"name": "my-key"
}
]
}
],
"dnsPolicy": "ClusterFirst",
"imagePullSecrets": [
{
"name": "my-key"
}
],
"restartPolicy": "Always",
"schedulerName": "default-scheduler",
"securityContext": {},
"terminationGracePeriodSeconds": 30,
"volumes": [
{
"name": "my-key",
"secret": {
"defaultMode": 123,
"secretName": "sampleKeyName"
}
}
]
}
}
},
"status": {
"availableReplicas": 1,
"conditions": [
{
"lastTransitionTime": "2022-09-29T15:11:14Z",
"lastUpdateTime": "2022-09-29T15:11:14Z",
"message": "Deployment has minimum availability.",
"reason": "MinimumReplicasAvailable",
"status": "True",
"type": "Available"
},
{
"lastTransitionTime": "2022-09-12T14:20:35Z",
"lastUpdateTime": "2022-09-30T14:13:08Z",
"message": "ReplicaSet \"wordpress-backend-abc123\" has successfully progressed.",
"reason": "NewReplicaSetAvailable",
"status": "True",
"type": "Progressing"
}
],
"observedGeneration": 7,
"readyReplicas": 1,
"replicas": 1,
"updatedReplicas": 1
}
}
}
]
}
}
I guess, because of escape sequence in below line causing the failure :-
"message": "ReplicaSet \"wordpress-backend-abc123\" has successfully progressed.", tried removing that, but no luck.

Json Path Read from a Kafka Message

I have a kafka message like below, where im trying to read the data from the json path. However im having a challenge when reading some of the attributes from the json path. here is the sample message.
sample1:
{
"header": {
"bu": "google",
"id": "12345",
"bum": "google",
"originTimestamp": "2021-10-09T15:17:09.842+00:00",
"batchSize": "0",
"jobType": "Batch"
},
"payload": {
"derivationdetails": {
"Id": "6783jhvvh897u31y283y",
"itemid": "1234567",
"batchid": 107,
"attributes": {
"itemid": "1234567",
"lineNbr": "1498",
"cat": "5929",
"Id": "6783jhvvh897u31y283y",
"indicator": "false",
"subcat": "3514"
},
"Exception": {
"values": [
{
"type": "PICK",
"value": "blocked",
"Reason": [
"RULE"
],
"rules": [
"439"
]
}
],
"rulesBagInfo": [
{
"Idtype": "XXXX",
"uniqueid": "7889423rbhevfhjaufdyeuiryeukjbdafvjd",
"rulesMatch": [
"439"
]
}
]
}
}
}
}
sample 2: Same message but see the difference in "Payload"
{
"header": {
"bu": "google",
"id": "12345",
"bum": "google",
"originTimestamp": "2021-10-09T15:17:09.842+00:00",
"batchSize": "0",
"jobType": "Batch"
},
"payload": {
"Id": "6783jhvvh897u31y283y",
"itemid": "1234567",
"batchid": 107,
"attributes": {
"itemid": "1234567",
"lineNbr": "1498",
"cat": "5929",
"Id": "6783jhvvh897u31y283y",
"indicator": "false",
"subcat": "3514"
},
"Exception": {
"values": [
{
"type": "PICK",
"value": "blocked",
"Reason": [
"RULE"
],
"rules": [
"439"
]
}
],
"rulesBagInfo": [
{
"Idtype": "XXXX",
"uniqueid": "7889423rbhevfhjaufdyeuiryeukjbdafvjd",
"rulesMatch": [
"439"
]
}
]
}
}
}
If you observe, sometimes the message has "derivationdetails", and sometimes it doesn't. But irrespective of its existence, i need to read the values of id,itemid and batchid. I tried using
$.payload[*].id
$.payload[*].itemid
$.payload[*].batchid
But i see that for batchid is returning null even though it has a value in the message, and the attributes under "attributes" return null if im using the above. For fields under "attributes" im using this(example):
$.payload.attributes.itemId
And, completely blank on how to read the below part.
"Exception": {
"values": [
{
"type": "PICK",
"value": "blocked",
"Reason": [
"RULE"
],
"rules": [
"439"
]
}
],
"rulesBagInfo": [
{
"Idtype": "XXXX",
"uniqueid": "7889423rbhevfhjaufdyeuiryeukjbdafvjd",
"rulesMatch": [
"439"
]
Im new to this and need some suggestions on how to read the attributes properly. Any help would be much appreciated.Thanks
Use ..(recursive descent, Deep scan. JSONPath borrows this syntax from E4X.) to get the values. But It will return a list if there are multiple entries with same key nested in deep.
Below jsonpath expressions will return a list with one item each for both sample1 and sample2
$.payload..attributes.Id
$.payload..attributes.itemid
$.payload..batchid
$.payload..Exception

Azure Data Factory Copy Activity

I have been working on this for a couple days and cannot get past this error. I have 2 activities in this pipeline. The first activity copies data from an ODBC connection to an Azure database, which is successful. The 2nd activity transfers the data from Azure table to another Azure table and keeps failing.
The error message is:
Copy activity met invalid parameters: 'UnknownParameterName', Detailed message: An item with the same key has already been added..
I do not see any invalid parameters or unknown parameter names. I have rewritten this multiple times using their add activity code template and by myself, but do not receive any errors when deploying on when it is running. Below is the JSON pipeline code.
Only the 2nd activity is receiving an error.
Thanks.
Source Data set
{
"name": "AnalyticsDB-SHIPUPS_06shp-01src_AZ-915PM",
"properties": {
"structure": [
{
"name": "UPSD_BOL",
"type": "String"
},
{
"name": "UPSD_ORDN",
"type": "String"
}
],
"published": false,
"type": "AzureSqlTable",
"linkedServiceName": "Source-SQLAzure",
"typeProperties": {},
"availability": {
"frequency": "Day",
"interval": 1,
"offset": "04:15:00"
},
"external": true,
"policy": {}
}
}
Destination Data set
{
"name": "AnalyticsDB-SHIPUPS_06shp-02dst_AZ-915PM",
"properties": {
"structure": [
{
"name": "SHIP_SYS_TRACK_NUM",
"type": "String"
},
{
"name": "SHIP_TRACK_NUM",
"type": "String"
}
],
"published": false,
"type": "AzureSqlTable",
"linkedServiceName": "Destination-Azure-AnalyticsDB",
"typeProperties": {
"tableName": "[olcm].[SHIP_Tracking]"
},
"availability": {
"frequency": "Day",
"interval": 1,
"offset": "04:15:00"
},
"external": false,
"policy": {}
}
}
Pipeline
{
"name": "SHIPUPS_FC_COPY-915PM",
"properties": {
"description": "copy shipments ",
"activities": [
{
"type": "Copy",
"typeProperties": {
"source": {
"type": "RelationalSource",
"query": "$$Text.Format('SELECT COMPANY, UPSD_ORDN, UPSD_BOL FROM \"orupsd - UPS interface Dtl\" WHERE COMPANY = \\'01\\'', WindowStart, WindowEnd)"
},
"sink": {
"type": "SqlSink",
"sqlWriterCleanupScript": "$$Text.Format('delete imp_fc.SHIP_UPS_IntDtl_Tracking', WindowStart, WindowEnd)",
"writeBatchSize": 0,
"writeBatchTimeout": "00:00:00"
},
"translator": {
"type": "TabularTranslator",
"columnMappings": "COMPANY:COMPANY, UPSD_ORDN:UPSD_ORDN, UPSD_BOL:UPSD_BOL"
}
},
"inputs": [
{
"name": "AnalyticsDB-SHIPUPS_03shp-01src_FC-915PM"
}
],
"outputs": [
{
"name": "AnalyticsDB-SHIPUPS_03shp-02dst_AZ-915PM"
}
],
"policy": {
"timeout": "1.00:00:00",
"concurrency": 1,
"executionPriorityOrder": "NewestFirst",
"style": "StartOfInterval",
"retry": 3,
"longRetry": 0,
"longRetryInterval": "00:00:00"
},
"scheduler": {
"frequency": "Day",
"interval": 1,
"offset": "04:15:00"
},
"name": "915PM-SHIPUPS-fc-copy->[imp_fc]_[SHIP_UPS_IntDtl_Tracking]"
},
{
"type": "Copy",
"typeProperties": {
"source": {
"type": "SqlSource",
"sqlReaderQuery": "$$Text.Format('select distinct ups.UPSD_BOL, ups.UPSD_BOL from imp_fc.SHIP_UPS_IntDtl_Tracking ups LEFT JOIN olcm.SHIP_Tracking st ON ups.UPSD_BOL = st.SHIP_SYS_TRACK_NUM WHERE st.SHIP_SYS_TRACK_NUM IS NULL', WindowStart, WindowEnd)"
},
"sink": {
"type": "SqlSink",
"writeBatchSize": 0,
"writeBatchTimeout": "00:00:00"
},
"translator": {
"type": "TabularTranslator",
"columnMappings": "UPSD_BOL:SHIP_SYS_TRACK_NUM, UPSD_BOL:SHIP_TRACK_NUM"
}
},
"inputs": [
{
"name": "AnalyticsDB-SHIPUPS_06shp-01src_AZ-915PM"
}
],
"outputs": [
{
"name": "AnalyticsDB-SHIPUPS_06shp-02dst_AZ-915PM"
}
],
"policy": {
"timeout": "1.00:00:00",
"concurrency": 1,
"executionPriorityOrder": "NewestFirst",
"style": "StartOfInterval",
"retry": 3,
"longRetryInterval": "00:00:00"
},
"scheduler": {
"frequency": "Day",
"interval": 1,
"offset": "04:15:00"
},
"name": "915PM-SHIPUPS-AZ-update->[olcm]_[SHIP_Tracking]"
}
],
"start": "2017-08-22T03:00:00Z",
"end": "2099-12-31T08:00:00Z",
"isPaused": false,
"hubName": "adf-tm-prod-01_hub",
"pipelineMode": "Scheduled"
}
}
Have you seen this link?
They get the same error message and suggest using AzureTableSink instead of SqlSink
"sink": {
"type": "AzureTableSink",
"writeBatchSize": 0,
"writeBatchTimeout": "00:00:00"
}
It would make sense for you too since your 2nd copy activity is Azure to Azure
It could be a red herring but I'm pretty sure "tableName" is a require entry in the typeProperties for a sqlSource. Yours is missing this for the input dataset. Appreciate you have a join in the sqlReaderQuery so probably best to put a dummy (but real) table name in there.
Btw, not clear why you are using $$Text.Format and WindowStart/WindowEnd on your queries if you're not transposing these values into the query; you could just put the query between double quotes.

Couchbase select query performance

I have simple SELECT QUERY which takes more than 3 seconds to get the result. Actual result count is 15211
Query:
select meta().id assetId, modelAndPart.partNumberID,assetLocation.id locationId from ic_v10_mammoet where type = 'asset' and modelAndPart IS NOT null and tenantId='439'
EXPLAIN:
{
"requestID": "cda5ed1b-efaf-4c5b-bb67-81f0a3542324",
"clientContextID": "13583a95-04cc-4722-90dd-9642068f9ea0",
"signature": "json",
"results": [
{
"plan": {
"#operator": "Sequence",
"~children": [
{
"#operator": "IndexScan",
"index": "type_idx",
"index_id": "f1d17cd15ab5feb6",
"keyspace": "ic_v10_mammoet",
"namespace": "default",
"spans": [
{
"Range": {
"High": [
"\"asset\""
],
"Inclusion": 3,
"Low": [
"\"asset\""
]
}
}
],
"using": "gsi"
},
{
"#operator": "Fetch",
"keyspace": "ic_v10_mammoet",
"namespace": "default"
},
{
"#operator": "Parallel",
"~child": {
"#operator": "Sequence",
"~children": [
{
"#operator": "Filter",
"condition": "((((`ic_v10_mammoet`.`type`) = \"asset\") and ((`ic_v10_mammoet`.`modelAndPart`) is not null)) and ((`ic_v10_mammoet`.`tenantId`) = \"439\"))"
},
{
"#operator": "InitialProject",
"result_terms": [
{
"as": "assetId",
"expr": "(meta(`ic_v10_mammoet`).`id`)"
},
{
"expr": "((`ic_v10_mammoet`.`modelAndPart`).`partNumberID`)"
},
{
"as": "locationId",
"expr": "((`ic_v10_mammoet`.`assetLocation`).`id`)"
}
]
},
{
"#operator": "FinalProject"
}
]
}
}
]
},
"text": "select meta().id assetId, modelAndPart.partNumberID,assetLocation.id locationId from ic_v10_mammoet where type = 'asset' and modelAndPart IS NOT null and tenantId='439'"
}
],
"status": "success",
"metrics": {
"elapsedTime": "15.0015ms",
"executionTime": "15.0015ms",
"resultCount": 1,
"resultSize": 3009
}
}
Yes, you forgot to ask a question. You can use this index.
CREATE INDEX idx_assets ON somedata
( tenantId, modelAndPart.partNumberID, assetLocation.id )
WHERE type = 'asset';

How to query nested structure in elasticsearch

Below are two mocked records from my elasticsearch index. I have millions of records in my ES. I am trying to query ES to get all the records that have non-empty/ non-null "tags" field. If a record doesn't have a tag ( like second record below) then I don't want to pull it from ES.
If "books" were not nested then googling around seems like the below query would have worked -
curl -XGET 'host:port/book_indx/book/_search?' -d '{
"query" : {"filtered" : {"filter" : {"exists" :{"field" : "_source"}}}}
}'
However I am not finding a solution to query the nested structure. I tried the below with no luck -
{"query" : {"filtered" : {"filter" : {"exists" :{"field" : "_source.tags"}}}}}
{"query" : {"filtered" : {"filter" : {"exists" :{"field" : "_source":{"tags"}}}}}}
Any suggestions are really appreciated here! Thanks in advance.
{
"_shards": {
"failed": 0,
"successful": 12,
"total": 12
},
"hits": {
"hits": [
{
"_id": "book1",
"_index": "book",
"_source": {
"book_name": "How to Get Organized",
"publication_date": "2014-02-24T16:50:39+0000",
"tags": [
{
"category": "self help",
"topics": [
{
"name": "time management",
"page": 6198
},
{
"name": "calendar",
"page": 10
}
],
"id": "WEONWOIR234LI",
}
],
"last_updated": "2015-11-11T16:28:32.308+0000"
},
"_type": "book"
},
{
"_id": "book2",
"_index": "book",
"_source": {
"book_name": "How to Cook",
"publication_date": "2014-02-24T16:50:39+0000",
"tags": [],
"last_updated": "2015-11-11T16:28:32.308+0000"
},
"_type": "book"
}
],
"total": 1
},
"timed_out": false,
"took": 80
}
Mapping -
"book": {
"_id": {
"path": "message_id"
},
"properties": {
"book_name": {
"index": "not_analyzed",
"type": "string"
},
"publication_date": {
"format": "date_time||date_time_no_millis",
"type": "date"
},
"tags": {
"properties": {
"category": {
"index": "not_analyzed",
"type": "string"
},
"topic": {
"properties": {
"name": {
"index": "not_analyzed",
"type": "string"
},
"page": {
"index": "no",
"type": "integer"
}
}
},
"id": {
"index": "not_analyzed",
"type": "string"
}
},
"type": "nested"
},
"last_updated": {
"format": "date_time||date_time_no_millis",
"type": "date"
}
}
}
Since your tags field has a nested type, you need to use a nested filter in order to query it.
The following filtered query will correctly return only the first document above (i.e. with id book1)
{
"query": {
"filtered": {
"filter": {
"nested": {
"path": "tags",
"filter": {
"exists": {
"field": "tags"
}
}
}
}
}
}
}