MongoDB syntax error - json

I am having trouble with the syntax (SyntaxError: Unexpected token ILLEGAL) in MongoDB. This command was copied directly from a MongoDB instruction PDF and I cannot find out what is wrong.
Also I don't know if it is relevant but I am using Codeanywhere with a MEAN stack.
db.restaurants.insert(
{
"address" : {
"street" : "2 Avenue",
"zipcode" : "10075",
"building" : "1480",
"coord" : [ ­73.9557413, 40.7720266 ],
},
"borough" : "Manhattan",
"cuisine" : "Italian",
"grades" : [
{
"date" : ISODate("2014­10­01T00:00:00Z"),
"grade" : "A",
"score" : 11
},
{
"date" : ISODate("2014­01­16T00:00:00Z"),
"grade" : "B",
"score" : 17
}
],
"name" : "Vella",
"restaurant_id" : "41704620"
}
)

Try to replace:
"coord" : [ ­73.9557413, 40.7720266 ],
with:
"coord" : [ ­73.9557413, 40.7720266 ]
The comma at the end of subdocument is extra.
By the way, the JSON standard allows only double quoted string as property key, thus, try also this variant:
"coord" : [ "­73.9557413", "40.7720266" ]
I checked your entire JSON-document with a JSON validator, here is a valid version:
{
"address": {
"street": "2 Avenue",
"zipcode": "10075",
"building": "1480",
"coord": ["73.9557413", "40.7720266"]
},
"borough": "Manhattan",
"cuisine": "Italian",
"grades": [{
"date": "20141001T00:00:00Z",
"grade": "A",
"score": 11
}, {
"date": "20140116T00:00:00Z",
"grade": "B",
"score": 17
}],
"name": "Vella",
"restaurant_id": "41704620"
}

Related

How to rename field names in a nested array created from input JSON using JOLT transformation without changing anything else

I started with an input JSON as such.
{
"trackingNumber": "1ZEA83550362028861",
"localActivityDate": "20210324",
"localActivityTime": "183500",
"scheduledDeliveryDate": "20210324",
"actualDeliveryDate": "20210324",
"actualdeliveryTime": "183500",
"gmtActivityDate": "20210324",
"gmtActivityTime": "223500",
"activityStatus": {
"type": "G",
"code": "OR",
"description": "Origin Scan"
},
"activityLocation": {
"city": "RANDALLSTOWN,",
"stateProvince": "MD",
"postalCode": "21133",
"country": "US"
}
}
This is the JOLT transformation spec that i have written as of now.
[
{
"operation": "modify-overwrite-beta",
"spec": {
"tsY": "=substring(#(1,localActivityDate),0,4)",
"tsM": "=substring(#(1,localActivityDate),4,6)",
"tsD": "=substring(#(1,localActivityDate),6,8)",
"tsH": "=substring(#(1,localActivityTime),0,2)",
"tsMi": "=substring(#(1,localActivityTime),2,4)",
"tsS": "=substring(#(1,localActivityTime),4,6)",
"timeStamp": "=concat(#(1,tsY),'-',#(1,tsM),'-',#(1,tsD),'T',#(1,tsH),':',#(1,tsMi),':',#(1,tsS),'Z')",
"aTY": "=substring(#(1,scheduledDeliveryDate),0,4)",
"aTM": "=substring(#(1,scheduledDeliveryDate),4,6)",
"aTD": "=substring(#(1,scheduledDeliveryDate),6,8)",
"appointmentTime": "=concat(#(1,aTY),'-',#(1,aTM),'-',#(1,aTD))",
"dTY": "=substring(#(1,actualDeliveryDate),0,4)",
"dTM": "=substring(#(1,actualDeliveryDate),4,6)",
"dTD": "=substring(#(1,actualDeliveryDate),6,8)",
"dTH": "=substring(#(1,actualdeliveryTime),0,2)",
"dTMi": "=substring(#(1,actualdeliveryTime),2,4)",
"dTS": "=substring(#(1,actualdeliveryTime),4,6)",
"deliveryTime": "=concat(#(1,dTY),'-',#(1,dTM),'-',#(1,dTD),'T',#(1,dTH),':',#(1,dTMi),':',#(1,dTS),'Z')"
}
},
{
"operation": "shift",
"spec": {
"*Number": "transformedPayload.&(0,1)Info",
"activityStatus": {
"*": "transformedPayload.events.&"
},
"activityLocation": {
"*": "transformedPayload.address.&"
},
"timeStamp": "transformedPayload.events[0].&",
"appointmentTime": "transformedPayload.events[1].&",
"deliveryTime": "transformedPayload.events[2].&",
"activityStatus": {
"type": "transformedPayload.events[0].type",
"code": "transformedPayload.events[0].statusCode",
"description": "transformedPayload.events[0].statusDescription"
},
"activityLocation": {
"city": "transformedPayload.address.city",
"stateProvince": "transformedPayload.address.state",
"postalCode": "transformedPayload.address.postalCode",
"country": "transformedPayload.address.country"
}
}
},
{
"operation": "modify-default-beta",
"spec": {
"metaData": {
"domain": "LTL",
"eventType": "statusUpdate",
"version": "v1"
},
"transformedPayload": {
"events": {
"[1]": {
"statusCode": "AB",
"statusDescription": "Delivery Scheduled"
},
"[2]": {
"statusCode": "D1",
"statusDescription": "Delivered"
}
}
}
}
}
]
The resultant JSON created by this transformation looks like this.
{
"transformedPayload" : {
"events" : [ {
"type" : "G",
"statusCode" : "OR",
"statusDescription" : "Origin Scan",
"timeStamp" : "2021-03-24T18:35:00Z"
}, {
"appointmentTime" : "2021-03-24",
"statusCode" : "AB",
"statusDescription" : "Delivery Scheduled"
}, {
"deliveryTime" : "2021-03-24T18:35:00Z",
"statusCode" : "D1",
"statusDescription" : "Delivered"
} ],
"address" : {
"city" : "RANDALLSTOWN,",
"state" : "MD",
"postalCode" : "21133",
"country" : "US"
},
"trackingInfo" : "1ZEA83550362028861"
},
"metaData" : {
"domain" : "LTL",
"eventType" : "statusUpdate",
"version" : "v1"
}
}
I just need a small tweak in this where the appointmentTime and the deliveryTime fields in the index [1] and [2] of the events array also need to be named as "timestamp" (as seen in the [0]th index). So that finally the correct output JSON looks something like this.
{
"transformedPayload" : {
"events" : [ {
"type" : "G",
"statusCode" : "OR",
"statusDescription" : "Origin Scan",
"timeStamp" : "2021-03-24T18:35:00Z"
}, {
"timestamp" : "2021-03-24",
"statusCode" : "AB",
"statusDescription" : "Delivery Scheduled"
}, {
"timestamp" : "2021-03-24T18:35:00Z",
"statusCode" : "D1",
"statusDescription" : "Delivered"
} ],
"address" : {
"city" : "RANDALLSTOWN,",
"state" : "MD",
"postalCode" : "21133",
"country" : "US"
},
"trackingInfo" : "1ZEA83550362028861"
},
"metaData" : {
"domain" : "LTL",
"eventType" : "statusUpdate",
"version" : "v1"
}
}
I have tried renaming the field in the shift operation itself but that did not work. I am completely new to JOLT transformation so it seems a bit tricky doing this small change. So any help is appreciated. Thanks
Just convert the related lines within the shift transformation spec from
"appointmentTime": "transformedPayload.events[1].&",
"deliveryTime": "transformedPayload.events[2].&",
to
"appointmentTime": "transformedPayload.events[1].timestamp",
"deliveryTime": "transformedPayload.events[2].timestamp",
instead of the replicating operator & used in the previous one.

logstash parse json child element, format and insert into elasticsearch

I have a json file like this:
"fruits": {
"fruit": [
{
"id": 1,
"label": "test",
"tag": "fine",
"start": "4",
"end": "9"
},
{
"id": 2,
"label": "test1",
"tag": "fine1",
"start": "2",
"end": "4"
}
]
}
}
I have 100s of elements inside "fruit" field. I want to:
insert only the elements inside "fruit" field to the elasticsearch each as an individual doc. I want to use their own id as elasticsearch doc id.
calculate numbers in between "start" and "end" fields, then add those numbers as a comma separated string to a new field inside each doc.
The docs I want to insert into elasticsearch will be as follows:
{
{
"_index" : "my_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"id" : "1",
"label": "test",
"tag": "fine",
"start": "4",
"end": "9",
"diffs": "4,5,6,7,8,9"
}
},
{
"_index" : "my_index",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"id" : "2",
"label": "test1",
"tag": "fine1",
"start": "2",
"end": "4",
"diffs": "2,3,4"
}
}
}
Can anyone help me with the logstash configuration file to achieve the desired output? I am using ELK version 7.x
Thanks
Finally I could solve the requirement following this instruction.
https://discuss.elastic.co/t/logstash-parse-json-child-element-format-and-insert-into-elasticsearch/312230/7

Elasticsearch query with nested sets

I am pretty new to Elasticsearch, so please bear with me and let me know if I need to provide any additional information. I have inherited a project and need to implement new search functionality. The document/mapping structure is already in place but can be changed if it can not facilitate what I am trying to achieve. I am using Elasticsearch version 5.6.16.
A company is able to offer a number of services. Each service offering is grouped together in a set. Each set is composer of 3 categories;
Product(s) (ID 1)
Process(es) (ID 3)
Material(s) (ID 4)
The document structure looks like;
[{
"id": 4485,
"name": "Company A",
// ...
"services": {
"595": {
"1": [
95, 97, 91
],
"3": [
475, 476, 471
],
"4": [
644, 645, 683
]
},
"596": {
"1": [
91, 89, 76
],
"3": [
476, 476, 301
],
"4": [
644, 647, 555
]
},
"597": {
"1": [
92, 93, 89
],
"3": [
473, 472, 576
],
"4": [
641, 645, 454
]
},
}
}]
In the above example; 595, 596 and 597 are IDs relating to the set. 1, 3 and 4 relate to the categories (mentioned above).
The mapping looks like;
[{
"id": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"services": {
"properties": {
// ...
"595": {
"properties": {
"1": {"type": "long"},
"3": {"type": "long"},
"4": {"type": "long"}
}
},
"596": {
"properties": {
"1": {"type": "long"},
"3": {"type": "long"},
"4": {"type": "long"}
}
},
// ...
}
},
}]
When searching for a company that provides a Product (ID 1) - a search of 91 and 95 which would return Company A because those IDs are within the same set. But if I was to search 95 and 76, it would not return Company A - while the company does do both of these products, they are not in the same set. These same rules would apply when searching Processes and Materials or a combination of these.
I am looking for confirmation that the current document/mapping structure will facilitate this type of search.
If so, given 3 arrays of IDs (Products, Processes and Materials), what is the JSON to find all companies that provide these services within the same set?
If not, how should the document/mapping be changed to allow this search?
Thank you for your help.
It is a bad idea to have ID for what appears as a value as a field itself as that could lead to creation of so many inverted indexes, (remember that in Elasticsearch, inverted index is created on every field) and I feel it is not reasonable to have something like that.
Instead change your data model to something like below. I have also included sample documents, the possible queries you can apply and how the response can appear.
Note that just for sake of simplicity, I'm focussing only on the services field that you have mentioned in your mapping.
Mapping:
PUT my_services_index
{
"mappings": {
"properties": {
"services":{
"type": "nested", <----- Note this
"properties": {
"service_key":{
"type": "keyword" <----- Note that I have mentioned keyword here. Feel free to use text and keyword if you plan to implement partial + exact search.
},
"product_key": {
"type": "keyword"
},
"product_values": {
"type": "keyword"
},
"process_key":{
"type": "keyword"
},
"process_values":{
"type": "keyword"
},
"material_key":{
"type": "keyword"
},
"material_values":{
"type": "keyword"
}
}
}
}
}
}
Notice that I've made use of nested datatype. I'd suggest you to go through that link to understand why do we need that instead of using plain object type.
Sample Document:
POST my_services_index/_doc/1
{
"services":[
{
"service_key": "595",
"process_key": "1",
"process_values": ["95", "97", "91"],
"product_key": "3",
"product_values": ["475", "476", "471"],
"material_key": "4",
"material_values": ["644", "645", "643"]
},
{
"service_key": "596",
"process_key": "1",
"process_values": ["91", "89", "75"],
"product_key": "3",
"product_values": ["476", "476", "301"],
"material_key": "4",
"material_values": ["644", "647", "555"]
}
]
}
Notice how you can now manage your data, if it ends up having multiple combinations or product_key, process_key and material_key.
The way you interpret the above document is that, you have two nested documents inside a document of my_services_index.
Sample Query:
POST my_services_index/_search
{
"_source": "services.service_key",
"query": {
"bool": {
"must": [
{
"nested": { <---- Note this
"path": "services",
"query": {
"bool": {
"must": [
{
"term": {
"services.service_key": "595"
}
},
{
"term": {
"services.process_key": "1"
}
},
{
"term": {
"services.process_values": "95"
}
}
]
}
},
"inner_hits": {} <---- Note this
}
}
]
}
}
}
Note that I've made use of Nested Query.
Response:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.828546,
"hits" : [ <---- Note this. Which would return the original document.
{
"_index" : "my_services_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.828546,
"_source" : {
"services" : [
{
"service_key" : "595",
"process_key" : "1",
"process_values" : [
"95",
"97",
"91"
],
"product_key" : "3",
"product_values" : [
"475",
"476",
"471"
],
"material_key" : "4",
"material_values" : [
"644",
"645",
"643"
]
},
{
"service_key" : "596",
"process_key" : "1",
"process_values" : [
"91",
"89",
"75"
],
"product_key" : "3",
"product_values" : [
"476",
"476",
"301"
],
"material_key" : "4",
"material_values" : [
"644",
"647",
"555"
]
}
]
},
"inner_hits" : { <--- Note this, which would tell you which inner document has been a hit.
"services" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.828546,
"hits" : [
{
"_index" : "my_services_index",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "services",
"offset" : 0
},
"_score" : 1.828546,
"_source" : {
"service_key" : "595",
"process_key" : "1",
"process_values" : [
"95",
"97",
"91"
],
"product_key" : "3",
"product_values" : [
"475",
"476",
"471"
],
"material_key" : "4",
"material_values" : [
"644",
"645",
"643"
]
}
}
]
}
}
}
}
]
}
}
Note that I've made use of keyword datatype. Please feel free to use the datatype as and what your business requirements would be for all the fields.
The idea I've provided is to help you understand the document model.
Hope this helps!

Mongodb Aggregate JSON array field for the matching field of other collection

I am new to mongodb , I have two collections like this :
1st collection name is A
{
"_id": "1234",
"versions": [{
"owner_id": ObjectId("100000"),
"versions": 1,
"type" : "info",
"items" : ["item1","item3","item7"]
},
{
"owner_id": ObjectId("100001"),
"versions": 2,
"type" : "bug",
"OS": "Ubuntu",
"Dependencies" : "Trim",
"items" : ["item1","item7"]
}
]}
2nd Collection name is B
{ "_id": ObjectId("100000"), "email": "abc#xyz.com" } { "_id": ObjectId("100001"), "email": "bbc#xyz.com"}
Expected output is :
{
"_id": "1234",
"versions":[{
"owner_id": "abc#xyz.com",
"versions": 1,
"type" : "info",
"items" : ["item1","item3","item7"]
},
{
"owner_id": "bbc#xyz.com",
"versions": 2,
"type" : "bug",
"OS": "Ubuntu",
"Dependencies" : "Trim",
"items" : ["item1","item7"]
}
] }
I used mongo $lookup but I am not getting required output
Please help.
Thank You!!!
You need to $unwind versions, $lookup with another collection on foreignField, $project to take the first element from the match array, $group to get back in original document format
collection a
> db.a.find()
{ "_id" : "1234", "versions" : [ { "owner_id" : "100000" }, { "owner_id" : "100001" }, { "owner_id" : "100001" } ] }
collection b
> db.b.find()
{ "_id" : "100000", "email" : "abc#xyz.com" }
{ "_id" : "100001", "email" : "bbc#xyz.com" }
aggregate pipeline
> db.a.aggregate(
[
{$unwind:"$versions"},
{$lookup : {from : "b", "localField":"versions.owner_id", "foreignField":"_id", as :"out"}},
{$project : {"_id":1, "versions.owner_id":{$arrayElemAt:["$out.email",0]}}},
{$group:{_id:"$_id", versions : {$push : "$versions"}}}
]
).pretty()
output
{
"_id" : "1234",
"versions" : [
{
"owner_id" : "abc#xyz.com"
},
{
"owner_id" : "bbc#xyz.com"
},
{
"owner_id" : "bbc#xyz.com"
}
]
}

Regional Opsworks stack can't be found by Cloudformation

I have a Cloudformation template that modifies an Opsworks stack by adding few resources.
The Opsworks stack is deployed in the region eu-west-1 which is the API endpoint region as well and it shows on the side of the name of the stack: Regional.
When I run the Cloudformation template ( I give the stack ID as a parameter) I get this error:
Unable to find stack with ID xxxxxxx
I guess Cloudformation can only see the opsworks resources which are in us-east-1 region?
I tried changing the region of Cloudformation and deploy the template but the stack is still unfound.
How can I let Cloudformation search for the stack in all regions?
Should I clone the opsworks stack and change the endpoint to us-east-1 region?
What would be the best solution?
Template
{
"AWSTemplateFormatVersion": "2010-09-09",
"Description": "Add a layer to an existing stack",
"Mappings": {
"Region2Principal": {
"eu-west-1": {
"EC2Principal": "ec2.amazonaws.com",
"OpsWorksPrincipal": "opsworks.amazonaws.com"
}
},
},
"Parameters": {
"Environment" : {
"Description": "The Environnement variable ",
"Type": "String",
"Default": "dev",
"AllowedValues" : ["test", "prod"]
},
"InstanceType": {
"Type": "String",
"Default": "m4.large",
"AllowedValues" : ["t2.micro", "m1.small", "m1.large","m4.large","m4.xlarge","m4.2xlarge","m4.4xlarge","m4.10xlarge","m4.16xlarge","c4.large" , "c4.xlarge" ,"c4.2xlarge" , "c4.4xlarge","c4.8xlarge" , "c3.large" , "c3.xlarge", "c3.2xlarge", "c3.4xlarge" ,"c3.8xlarge"],
"ConstraintDescription": "must be a valid EC2 instance type"
},
"StackID": {
"Type": "String",
"Description": "ID of the existing opsworks stack to edit"
},
"vpcId": {
"Description": "VPC id of corresponding to the Environment",
"Type": "String"
},
"subnetIds" :{
"Description": "list of sunbnets in the chosen VPC",
"Type": "List<AWS::EC2::Subnet::Id>"
},
"ScriptSG":{
"Description": "script security group",
"Type" : "String"
},
"SG": {
"Description": " layer security group",
"Type": "String"
}
},
"Resources":{
"Layer": {
"Type": "AWS::OpsWorks::Layer",
"Properties": {
"AutoAssignElasticIps" : false,
"AutoAssignPublicIps" : true
}
},
"SInstance1": {
"Type": "AWS::OpsWorks::Instance",
"Properties": {
"Hostname": "S1",
"AutoScalingType": "timer",
"TimeBasedAutoScaling" : {
"Friday" : { "0" : "on", "6" : "on", "12" : "on", "18" : "on" },
"Monday" : { "0" : "on", "6" : "on", "12" : "on", "18" : "on" }
},
"RootDeviceType": "ebs",
"StackId": {"Ref": "StackID"},
"LayerIds": [{"Ref": "Layer"}],
"InstanceType": {"Ref" : "InstanceType"}
}
},
"Instance2": {
"Type": "AWS::OpsWorks::Instance",
"Properties": {
"Hostname": "S2",
"AutoScalingType": "timer",
"TimeBasedAutoScaling" : {
"Saturday": { "0" : "on", "6" : "on", "12" : "on", "18" : "on" },
"Sunday" : { "0" : "on", "6" : "on", "12" : "on", "18" : "on" },
"Thursday": { "0" : "on", "6" : "on", "12" : "on", "18" : "on" },
"Tuesday" : { "0" : "on", "6" : "on", "12" : "on", "18" : "on" },
"Wednesday":{ "0" : "on", "6" : "on", "12" : "on", "18" : "on" }
},
"RootDeviceType": "ebs",
"StackId": {"Ref": "StackID"},
"LayerIds": [{"Ref": "Layer"}],
"InstanceType": {"Ref" : "InstanceType"}
}
},
"ELB": {
"Type": "AWS::ElasticLoadBalancing::LoadBalancer",
"Properties": {
"ConnectionDrainingPolicy" : {
"Enabled" : true,
"Timeout" : 300
},
"ConnectionSettings" : {
"IdleTimeout" : 60
},
"CrossZone" : true,
"HealthCheck" : {
"HealthyThreshold" : "3",
"Interval" : "30",
"Target" : "HTTP:80/ping",
"Timeout" : "5",
"UnhealthyThreshold" : "2"
},
"LoadBalancerName": "loadBalancer",
"Listeners" : [{
"InstancePort" : "80",
"InstanceProtocol" : "HTTP",
"LoadBalancerPort" : "80",
"Protocol" : "HTTP"
}],
"Scheme" : "internal",
"SecurityGroups" : [{ "Ref" : "ELBSecurityGroup" }],
"Subnets" : { "Ref" : "subnetIds"}
}
},
"ELBAttach":{
"Type": "AWS::OpsWorks::ElasticLoadBalancerAttachment",
"Properties": {
"ElasticLoadBalancerName" : {"Ref" : "ELB"},
"LayerId" : {"Ref" : "Layer" }
}
}
},
}
It looks like you will need to move them to the same region.
Resources can be managed only in the region in which they are created. Resources that are created in one regional endpoint are not available, nor can they be cloned to, another regional endpoint.
http://docs.aws.amazon.com/general/latest/gr/rande.html#opsworks_region
https://aws.amazon.com/about-aws/whats-new/2016/08/aws-opsworks-adds-nine-regional-endpoints-and-asia-pacific-seoul-region-support/
Layer is missing the stackID parameter.
{
"Type": "AWS::OpsWorks::Layer",
"Properties": {
"Attributes" : { String:String },
"AutoAssignElasticIps" : Boolean,
"AutoAssignPublicIps" : Boolean,
"CustomInstanceProfileArn" : String,
"CustomJson" : JSON object,
"CustomRecipes" : Recipes,
"CustomSecurityGroupIds" : [ String, ... ],
"EnableAutoHealing" : Boolean,
"InstallUpdatesOnBoot" : Boolean,
"LifecycleEventConfiguration" : LifeCycleEventConfiguration,
"LoadBasedAutoScaling" : LoadBasedAutoScaling,
"Name" : String,
"Packages" : [ String, ... ],
"Shortname" : String,
"StackId" : String,
"Type" : String,
"VolumeConfigurations" : [ VolumeConfiguration, ... ]
}
}
http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-opsworks-layer.html