Real value not recognized sending JSON data from Kinesis Firehose to elasticsearch - json

I have an issue in Kibana with the field value explained in the following lines. I'll try to explain the situation.
I'm sending dynamoDB streams to Lambda then to Kenesis Firehouse and finally from Firehose to Elasticsearch. I'm using Kibana to visualize data and here is where I have the issue.
Lets say that I'm sending this JSON to DynamoDB:
{
"id": "identificator",
"timestamp": "2017-05-09T06:38:00.337Z",
"value": 33,
"units": "units",
"description": "This is the description",
"machine": {
"brand": "brand",
"application": "application"
}
}
In Lambda I receive the following:
{
"data": {
"M": {
"machine": {
"M": {
"application": {
"S": "application"
},
"brand": {
"S": "band"
}
}
},
"description": {
"S": "This is the description"
},
"id": {
"S": "identificator"
},
"units": {
"S": "units"
},
"value": {
"N": "33"
},
"_msgid": {
"S": "85209b75.f51ee8"
},
"timestamp": {
"S": "2017-05-09T06:38:00.337Z"
}
}
},
"id": {
"S": "85209b75.f51ee8"
}
}
If I forward this last JSON to Kinesis Firehose, when in Kibana I configure the index pattern, it recognizes the "timestamp" automatically (and that's great). The problem here, is that the field "value" is like a string and it is not recognized.
I tried to modify the JSON and then send it again to Firehose but then Kibana doesn't recognizes the "timestamp":
{
"data": {
"machine": {
"application": "application",
"brand": "brand"
},
"description": "This is the description",
"id": "identificator",
"units": "KWh",
"value": 33,
"_msgid": "85209b75.f51ee8",
"timestamp": "2017-05-09T06:38:00.337Z"
},
"id": "85209b75.f51ee8"
}
I would like to know how could I send this data and Kibana recognizes the "timestamp" and "value" fields.
This is an example of the code that I'm using in lambda:
var AWS = require('aws-sdk');
var unmarshalJson = require('dynamodb-marshaler').unmarshalJson;
var firehose = new AWS.Firehose();
exports.lambda_handler = function(event, context) {
var record = JSON.stringify(event.Records[0].dynamodb.NewImage);
console.log("[INFO]:"+JSON.stringify(event.Records[0].dynamodb.NewImage));
var params = {
DeliveryStreamName: 'DeliveryStreamName',
Record:{
Data: record
}
};
firehose.putRecord(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log(JSON.stringify(data)); // successful response
context.done();
});
};

I solved it creating the index mapping by myself instead of let Kinesis Firehose create it. And declare the "timestamp" attribute as { "type" : "date" } and the "value" attibute as { "type" : "float" }
For instance for this type of JSON:
{
"data": {
"timestamp": "2017-05-09T11:30:41.484Z",
"tag": "tag",
"value": 33,
"units": "units",
"type": "type",
"machine":{
"name": "name",
"type": "type",
"company": "company"
}
},
"id": "85209b75.f51ee8"
}
I created manually the following elasticsearch index and mapping:
PUT /index
{
"settings" : {
"number_of_shards" : 2
},
"mappings" : {
"type" : {
"properties" : {
"data" : {
"properties" : {
"machine":{
"properties": {
"name": { "type" : "text" },
"type": { "type" : "text" },
"company": { "type" : "text" }
}
},
"timestamp": { "type" : "date" },
"tag" : { "type" : "text" },
"value": { "type" : "float" },
"description": { "type" : "text" },
"units": { "type" : "text" },
"type" : { "type" : "text" },
"_msgid": { "type" : "text" }
}
},
"id": { "type" : "text" }
}
}
}
}
So, to solve it, the better solution I think that in lambda you have to check if the index mapping exist and if not create it by yourself.

Related

How to add a list to Elasticsearch mapping

I have the following JSON format and want to create a mapping for it from Elasticsearch console :
{
"properties": {
"#timestamp" : {
"type" : "date"
},
"list": [
{
"name": "John",
"age": "37",
"title": "Tester"
}
]
}
}
There's no list or array type in ES, you simply declare objects and then you can add a list of those objects to your documents:
PUT your-index
{
"mappings": {
"properties": {
"#timestamp" : {
"type" : "date"
},
"list": {
"type": "object",
"properties": {
"name": {
"type": "text"
},
"age": {
"type": "integer"
},
"title": {
"type": "text"
}
}
}
}
}
}

Dynamic avro schema creation

How to create avro schema for below json code
{
"_id" : "xxxx",
"name" : "xyz",
"data" : {
"abc" : {
"0001" : "gha",
"0002" : "bha"
}
}
}
Here,
"0001" : "gha",
"0002" : "bha"
key: value would be dynamic.
Maybe a schema like this does what you want?
{
"type": "record",
"name": "MySchema",
"namespace": "my.name.space",
"fields": [
{
"name": "_id",
"type": "string"
},
{
"name": "name",
"type": "string"
},
{
"name": "data",
"type": {
"type": "record",
"name": "Data",
"fields": [
{
"name": "abc",
"type": {
"type": "map",
"values": "string"
}
}
]
}
}
]
}
It's not dynamic, but you can add as many key-value pairs to the map as you like. Field names starting with a numeric value aren't allowed in Avro.

Postman Json Schema validation failed

I have this little issue. I am building API tests with postman.
One of my tests want to validate a Json response.
This is the kind of response that I have received:
{
"comuni": [
{
"istat": "015002",
"code": "A010",
"comune": "ABBIATEGRASSO",
"provincia": "MI",
"cap": "20081",
"latitude": 45.393036,
"longitude": 8.919824,
"soppresso": false,
"regione": "Lombardia",
"parte_italia": "nord",
"is_provincia": 0,
"nome_provincia": "Milano"
},
...
...
]};
So I receive an array of objects like this one above.
This is the test that I wrote:
var schema = {
"comuni" :
[
{
"istat" : {
"type" : "Integer"
},
"code" : {
"type" : "string"
},
"comune" : {
"type" : "string"
},
"provincia" : {
"type" : "string"
},
"cap" : {
"type" : "integer"
},
"latitude" : {
"type": "Number"
},
"longitude" : {
"type": "Number"
},
"soppresso": {
"tyoe" : "boolean"
},
"regione" : {
"type" : "string"
},
"parte_italia": {
"type": "string"
},
"is_provincia": {
"type": "integer"
},
"nome_provincia": {
"type": "string"
}
}]
}
pm.test("JSON schema validation", function() {
var paperwork = pm.response.json();
var result = tv4.validate(paperwork, schema, false, true);
if (result !== true) {
console.log('Schema validation failed:', tv4.error);
}
/*console.log(tv4.error.dataPath);*/
pm.expect(result).to.be.true;
console.log(JSON.stringify(result));
});
But the test fails:
Schema validation failed: unknown property (not in schema)
Obviously I am doing something wrong with the schema, but I do not understand what.
Your schema is incorrect. It should be like this.
{
"description": "Any validation failures are shown in the right-hand Messages pane.",
"type": "object",
"properties": {
"foo": {
"type": "number"
},
"bar": {
"type": "string",
"enum": [
"a",
"b",
"c"
]
}
}
}
And data should look like,
{
"foo": 12345,
"bar": "a"
}
Refer below link for more examples, like Array/Objects etc.

how to match an array value by it's key in a key value pair elasticsearch array?

I have an array of key value pairs. Is it possible to exact match value of key & then do a check on it's value's range value?
Example: In below doc oracle_props is an array with name, value pairs. I need to check if it has "oracle_cursors" key and then check if it's value is less than 1000.
GET /eg/message/_percolate
{
"doc": {
"client": {
"name": "Athena",
"version": 1,
"db": {
"#type": "Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 64bit",
"oracle_props": [
{
"#name": "open_cursors",
"#value": 4000
},
{
"#name": "USER_ROLE_PRIVS_COUNT",
"#value": 1
},
{
"#name": "CREATE_PERMISSION",
"#value": "Y"
}
]
}
}
}
}
Below is my percolator.
I also need to check the following so that it gives back 3 as my result
"client.name" must be "Athena"
"client.db.#type" must be "Oracle" then only go ahead and check below points
"client.db.oracle_props.#name" field is not found
check if it has "oracle_cursors" key and then check if it's value is < 1000
1 & 2 are and operations and any of 3 or 4 satisfies it should result 3. I need help with point 4, below is my query. Also please suggest if there is a better way.
PUT /eg/.percolator/3
{
"query": {
"filtered": {
"filter": {
"or": [
{
"missing": {
"field": "client.db.oracle_props.#name"
}
}
]
},
"query": {
"bool": {
"must": [
{
"match": {
"client.name": "Athena"
}
},
{
"match": {
"client.db.#type": "Oracle"
}
}
]
}
}
}
}
}
Update
Can I have something like below
{
"match": {
"client.db.oracle_props[name='open_cursors'].value": 4000
}
}
More tries
I followed elasticsearch nested query and changed the mapping to nestedtype by re-indexing. Can anyone find problem why am i getting nested: NullPointerException;?
PUT /eg/.percolator/3
{
"nested" : {
"path" : "client.db.oracle_props",
"score_mode" : "avg",
"query" : {
"bool" : {
"must" : [
{
"match" : {"client.db.oracle_props.#name" : "open_cursors"}
},
{
"range" : {"client.db.oracle_props.#value" : {"lt" : 4000}}
}
]
}
}
}
}
mapping change
...
"properties": {
"#type": {
"type": "string"
},
"oracle_props": {
"type" : "nested",
"properties": {
"#name": {
"type": "string"
},
"#value": {
"type": "long"
}
}
}
}
...
Let's get into it:
You seem to map your nested path wrong, oracle_props is a child item of db in your example document, but not in your mapping, where it appears directly as child of your root.
You are mapping oracle_props.#value as long, but assign a text Y to it at the CREATE_PERMISSION nested doc
You query for range lt 4000, which excludes 4000, lte would fit for you
I didn't get your requirement for the missing value, hence I skipped that.
To get you to the right path, I has to simplify it a bit (since I couldn't follow all the mess in your question, sorry)
I'm not going into percolation either, and renamed everything to twitter/tweet, since this was easier for me to copy from my examples.
1) Create empty index "twitter"
curl -XDELETE 'http://localhost:9200/twitter/'
curl -XPUT 'http://localhost:9200/twitter/'
2) create geo_point mapping for the actual "tweet"
curl -XPUT 'http://localhost:9200/twitter/tweet/_mapping' -d '
{
"tweet": {
"properties": {
"db": {
"type": "object",
"properties": {
"#type": {
"type": "string"
},
"oracle_props": {
"type": "nested",
"properties": {
"#name": {
"type": "string"
},
"#value": {
"type": "string"
}
}
}
}
}
}
}
}'
3) Let's check if the mapping was set
curl -XGET 'http://localhost:9200/twitter/tweet/_mapping?pretty=true'
4) Post some tweets, with nested data
curl -XPUT 'http://localhost:9200/twitter/tweet/1' -d '{
"name": "Athena",
"version": 1,
"db": {
"#type": "Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 64bit",
"oracle_props": [
{
"#name": "open_cursors",
"#value": 4000
},
{
"#name": "USER_ROLE_PRIVS_COUNT",
"#value": 1
},
{
"#name": "CREATE_PERMISSION",
"#value": "Y"
}
]
}
}'
5) Query nested only
curl -XGET localhost:9200/twitter/tweet/_search -d '{
"query": {
"nested" : {
"path" : "db.oracle_props",
"score_mode" : "avg",
"query" : {
"bool" : {
"must" : [
{
"term": {
"db.oracle_props.#name": "open_cursors"
}
},
{
"range": {
"db.oracle_props.#value": {
"lte": 4000
}
}
}
]
}
}
}
}
}';
6) Query "Athena" and "Oracle"
curl -XGET localhost:9200/twitter/tweet/_search -d '{
"query" : {
"bool" : {
"must" : [
{
"match" : {"tweet.name" : "Athena"}
},
{
"match" : {"tweet.db.#type" : "Oracle"}
}
]
}
}
}'
7) Combine the former two queries
curl -XGET localhost:9200/twitter/tweet/_search -d '{
"query" : {
"bool" : {
"must" : [
{
"match" : {"tweet.name" : "Athena"}
},
{
"match" : {"tweet.db.#type" : "Oracle"}
},
{
"nested" : {
"path" : "db.oracle_props",
"score_mode" : "avg",
"query" : {
"bool" : {
"must" : [
{
"term": {
"db.oracle_props.#name": "open_cursors"
}
},
{
"range": {
"db.oracle_props.#value": {
"lte": 4000
}
}
}
]
}
}
}
}
]
}
}
}'
Results as
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 2.462332,
"hits": [
{
"_index": "twitter",
"_type": "tweet",
"_id": "1",
"_score": 2.462332,
"_source": {
"name": "Athena",
"version": 1,
"db": {
"#type": "Oracle Database 10g Enterprise Edition Release 10.2.0.4.0 64bit",
"oracle_props": [
{
"#name": "open_cursors",
"#value": 4000
},
{
"#name": "USER_ROLE_PRIVS_COUNT",
"#value": 1
},
{
"#name": "CREATE_PERMISSION",
"#value": "Y"
}
]
}
}
}
]
}
}
You need to use a Nested Document. See http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-nested-type.html

AWS: Cloud Formation: Is it possible to use multiple "DependsOn"?

Given i have this example template:
{
"AWSTemplateFormatVersion" : "2010-09-09",
"Mappings" : {
"RegionMap" : {
"us-west-1" : { "AMI" : "ami-655a0a20" },
...
}
},
"Resources" : {
"Ec2Instance" : {
"Type" : "AWS::EC2::Instance",
"Properties" : {
...
},
"DependsOn" : "myDB"
},
"myDB" : {
"Type" : "AWS::RDS::DBInstance",
"Properties" : {
...
}
},
"myDB2" : {
"Type" : "AWS::RDS::DBInstance",
"Properties" : {
...
}
}
}
}
Is it possible to specify multiple DependsOn in any way?
Would be great to have somethink like:
"DependsOn" : ["myDB", "myDB2"]
Whats the normal way?
Yes,
The DependsOn attribute can take a single string or list of strings.
http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-attribute-dependson.html
Syntax:
"DependsOn" : [ String, ... ]
This answer comes up first in Google, so I will include how to do multiple dependson attributes in YAML, which I found in this answer.
AnotherProductionResource:
Type: AWS::CloudFormation::Stack
Condition: ISProduction
DependsOn:
- AResource
- MyProductionResource
Properties:
[...]
Yes,
"DependsOn" can take multiple strings. I have listed an example below:
"DependsOn": [ "S3BucketAppElbLogs", "ElbLogAppBucketPolicy" ]
{
"Description": "Create a variable number of EC2 instance resources.",
"Parameters": {
"InstanceCount": {
"Description": "Number of EC2 instances (must be between 1 and 5).",
"Type": "Number",
"Default": 1,
"MinValue": 1,
"MaxValue": 5,
"ConstraintDescription": "Must be a number between 1 and 5."
},
"ImageId": {
"Description": "Image ID to launch EC2 instances.",
"Type": "AWS::EC2::Image::Id",
"Default": "ami-31c9924e"
},
"InstanceType": {
"Description": "Instance type to launch EC2 instances.",
"Type": "String",
"Default": "m3.medium",
"AllowedValues": [
"m3.medium",
"m3.large",
"m3.xlarge",
"m3.2xlarge"
]
}
},
"Conditions": {
"Launch1" : {"Fn::Equals" : [{"Ref" : "InstanceCount"}, "1"]},
"Launch2" : {"Fn::Equals" : [{"Ref" : "InstanceCount"}, "2"]}
},
"Resources": {
"Instance2": {
"Condition": "Launch2",
"Type": "AWS::EC2::Instance",
"Properties": {
"ImageId": {
"Ref": "ImageId"
},
"InstanceType": {
"Ref": "InstanceType"
}
},
"DependsOn": "Instance1"
},
"Instance1": {
"Condition": "Launch1",
"Type": "AWS::EC2::Instance",
"Properties": {
"ImageId": {
"Ref": "ImageId"
},
"InstanceType": {
"Ref": "InstanceType"
}
}
}
}
}