Elastic search to include boundary value time stamp - json

I am trying to include the boundary values in response, but it always rounds up to the nearest end time according to the interval.
For eg, if I ask data from 10:20 to 10:42 , at 5 mins interval, It will return data for
10:20 - 10:25 - 10:30 - 10:30 - 10:35 - 10:40 but the last 10:40-1:42 is never returned, How can I do this. Here is the query and response.
Query
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [{
"range": {
"timestamp": {
"gte": 1486443000000,
"lte": 1486446240000
}
}
}, {
"term": {
"applicationId": "******"
}
}, {
"term": {
"hostId": "*******"
}
}]
}
}
}
},
"filter": {
"limit": {
"value": 0
}
},
"aggs": {
"time": {
"histogram": {
"field": "timestamp",
"interval": 300000,
"min_doc_count": 0,
"extended_bounds": {
"min": 1486443000000 ,
"max": 1486446240000
}
},
"aggs": {
"establishedConnections": {
"sum": {
"field": "establishedConnections"
}
}
}
}
},
"sort": {
"timestamp": {
"order": "desc"
}
}
}
Response
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 21,
"successful": 21,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
},
"aggregations": {
"time": {
"buckets": [
{
"key_as_string": "2017-02-07T04:50:00.000Z",
"key": 1486443000000,
"doc_count": 50,
"establishedConnections": {
"value": 13
}
},
{
"key_as_string": "2017-02-07T04:55:00.000Z",
"key": 1486443300000,
"doc_count": 50,
"establishedConnections": {
"value": 20
}
},
{
"key_as_string": "2017-02-07T05:00:00.000Z",
"key": 1486443600000,
"doc_count": 50,
"establishedConnections": {
"value": 7
}
},
{
"key_as_string": "2017-02-07T05:05:00.000Z",
"key": 1486443900000,
"doc_count": 50,
"establishedConnections": {
"value": 14
}
},
{
"key_as_string": "2017-02-07T05:10:00.000Z",
"key": 1486444200000,
"doc_count": 50,
"establishedConnections": {
"value": 13
}
},
{
"key_as_string": "2017-02-07T05:15:00.000Z",
"key": 1486444500000,
"doc_count": 50,
"establishedConnections": {
"value": 12
}
},
{
"key_as_string": "2017-02-07T05:20:00.000Z",
"key": 1486444800000,
"doc_count": 50,
"establishedConnections": {
"value": 9
}
},
{
"key_as_string": "2017-02-07T05:25:00.000Z",
"key": 1486445100000,
"doc_count": 50,
"establishedConnections": {
"value": 14
}
},
{
"key_as_string": "2017-02-07T05:30:00.000Z",
"key": 1486445400000,
"doc_count": 50,
"establishedConnections": {
"value": 19
}
},
{
"key_as_string": "2017-02-07T05:35:00.000Z",
"key": 1486445700000,
"doc_count": 50,
"establishedConnections": {
"value": 13
}
},
{
"key_as_string": "2017-02-07T05:40:00.000Z",
"key": 1486446000000,
"doc_count": 40,
"establishedConnections": {
"value": 8
}
}
]
}
}
}

The thing is, that in the aggregations part in query, you asked for this:
"aggs": {
"time": {
"histogram": {
"field": "timestamp",
"interval": 300000,
"min_doc_count": 0,
"extended_bounds": {
"min": 1486443000000 ,
"max": 1486446240000
}
},
"aggs": {
"establishedConnections": {
"sum": {
"field": "establishedConnections"
}
}
}
}
}
and in the interval value, you specified 300000 which in milliseconds is exactly 5 minutes, that's why the last interval from 10:42 is discarded, and all document from this interval are placed under key 10:40.
To make this more formal:
When the aggregation executes, the time field of every document will be evaluated and will be rounded down to its closest bucket. Here is the rounding function that is used:
bucket_key = Math.floor((value - offset) / interval) * interval + offset

Related

Elasticsearch: Aggregation of null fields in a facet bucket

I'm trying to implement facets with a date range aggregation in the current version of Amazon Elasticsearch Service (version 7.10). The key for what I want the article documents to group for, is publishedAt, what is a date. I want one bucket, where publishedAt is in the past, which means, it is published, one where it is in the future, which means scheduled and one for all articles without a publishedAt, which are drafts. published and scheduled are working as they should. For drafts I can't enter a filter or date range as they are null. So I want to make use of the "Missing Values" feature. This should treat the documents with publishedAt = null like to have the date given in the missing field. Unfortunately it has no effect on the results. Even if I change the date of missing to let it match with published or scheduled.
My request:
GET https://es.amazonaws.com/articles/_search
{
"size": 10,
"aggs": {
"facet_bucket_all": {
"aggs": {
"channel": {
"terms": {
"field": "channel.keyword",
"size": 5
}
},
"brand": {
"terms": {
"field": "brand.keyword",
"size": 5
}
},
"articleStatus": {
"date_range": {
"field": "publishedAt",
"format": "dd-MM-yyyy",
"missing": "01-07-1886",
"ranges": [
{ "key": "published", "from": "now-99y/M", "to": "now/M" },
{ "key": "scheduled", "from": "now+1s/M", "to": "now+99y/M" },
{ "key": "drafts", "from": "01-01-1886", "to": "31-12-1886" }
]
}
}
},
"filter": {
"bool": {
"must": []
}
}
},
"facet_bucket_publishedAt": {
"aggs": {},
"filter": {
"bool": {
"must": []
}
}
},
"facet_bucket_author": {
"aggs": {
"author": {
"terms": {
"field": "author",
"size": 10
}
}
},
"filter": {
"bool": {
"must": []
}
}
}
},
"query": {
"bool": {
"filter": [
{
"range": {
"publishedAt": {
"lte": "2021-08-09T09:52:19.975Z"
}
}
}
]
}
},
"from": 0,
"sort": [
{
"_score": "desc"
}
]
}
And in the result, the drafts are empty:
"articleStatus": {
"buckets": [
{
"key": "published",
"from": -1.496448E12,
"from_as_string": "01-08-1922",
"to": 1.627776E12,
"to_as_string": "01-08-2021",
"doc_count": 47920
},
{
"key": "scheduled",
"from": 1.627776E12,
"from_as_string": "01-08-2021",
"to": 4.7519136E12,
"to_as_string": "01-08-2120",
"doc_count": 3
},
{
"key": "drafts",
"from": 1.67252256E13,
"from_as_string": "01-01-1886",
"to": 1.67566752E13,
"to_as_string": "31-12-1886",
"doc_count": 0
}
]
}
SearchKit added this part to the query:
"query": {
"bool": {
"filter": [
{
"range": {
"publishedAt": {
"lte": "2021-08-09T09:52:19.975Z"
}
}
}
]
}
}
This had to be removed, because it filters out null values, before the missing filter makes its job.
Now I get the correct result:
"articleStatus": {
"buckets": [
{
"key": "drafts",
"from": -2.650752E12,
"from_as_string": "01-01-1886",
"to": -2.6193024E12,
"to_as_string": "31-12-1886",
"doc_count": 7
},
{
"key": "published",
"from": -1.496448E12,
"from_as_string": "01-08-1922",
"to": 1.627776E12,
"to_as_string": "01-08-2021",
"doc_count": 47920
},
{
"key": "scheduled",
"from": 1.627776E12,
"from_as_string": "01-08-2021",
"to": 4.7519136E12,
"to_as_string": "01-08-2120",
"doc_count": 3
}
]
}

Elasticsearch query to get all the value of multiple attributes linked to a keyname

I have below json data:
{
"_index": "logs",
"_type": "_doc",
"_id": "122",
"_version": 7,
"_score": null,
"_source": {
"Data": {
"FacTotal": 62701268992,
"FacFree": 56609468416,
"FacStatus": "Normal",
"Version": "2.0",
"Ip": "192.168.0.106"
},
"Created": "2021-01-04T14:13:48.245760",
"Device": "T1"
"Customer": "demo1"
},
"fields": {
"Data.UpTime": [
"2021-01-04T14:10:05.000Z"
],
"Created": [
"2021-01-04T14:13:48.245Z"
]
},
"sort": [
1609769628245
]
}
Just like above json data, I have multiple data with different Customers and each customers have multiple Device. I have written below query which gives me a list of all the Customers and the count of Devices each customer have.
GET logs/_search
{
"size": 0,
"aggs": {
"customers": {
"terms": {
"field": "Customer.keyword"
},
"aggs": {
"type_count": {
"cardinality": {
"field": "Device.keyword"
}
}
}
}
}
}
Here is the response:
{
"took" : 996,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 325,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"customers" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "demo1",
"doc_count" : 141,
"type_count" : {
"value" : 5
}
},
{
"key" : "demo2",
"doc_count" : 140,
"type_count" : {
"value" : 5
}
},
{
"key" : "demo3",
"doc_count" : 36,
"type_count" : {
"value" : 1
}
},
{
"key" : "demo4",
"doc_count" : 8,
"type_count" : {
"value" : 1
}
}
]
}
}
}
How can I modify above query so that along with count it also gives us the names of the devices for a customer. Something like below
{
"key": "demo1",
"doc_count": 141,
"type_count": {
"value": 3
},
"device_name": [ <- device name
"T1",
"T2",
"T3"
]
}
Thanks
Great start!! You can leverage the terms aggregation
GET logs/_search
{
"size": 0,
"aggs": {
"customers": {
"terms": {
"field": "Customer.keyword"
},
"aggs": {
"device_name": {
"terms": {
"field": "Device.keyword",
"size": 100
}
},
"type_count": {
"cardinality": {
"field": "Device.keyword"
}
}
}
}
}
}
You can use stats bucket aggregation along with the terms aggregation, to achieve your use case
Adding a working example with index data, search query, and search result
Index Data:
{
"Device": "T2",
"Customer": "demo1"
}
{
"Device": "T2",
"Customer": "demo1"
}
{
"Device": "T1",
"Customer": "demo2"
}
{
"Device": "T3",
"Customer": "demo1"
}
Search Query:
{
"size": 0,
"aggs": {
"customers": {
"terms": {
"field": "Customer.keyword"
},
"aggs": {
"device_name": {
"terms": {
"field": "Device.keyword"
}
},
"bucketcount": {
"stats_bucket": {
"buckets_path": "device_name._count"
}
}
}
}
}
}
Search Result:
"aggregations": {
"customers": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "demo1", // note this
"doc_count": 2,
"device_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "T2", // note this
"doc_count": 2
}
]
},
"bucketcount": {
"count": 1, // note this
"min": 2.0,
"max": 2.0,
"avg": 2.0,
"sum": 2.0
}
},
{
"key": "demo2",
"doc_count": 2,
"device_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "T1",
"doc_count": 1
},
{
"key": "T3",
"doc_count": 1
}
]
},
"bucketcount": {
"count": 2,
"min": 1.0,
"max": 1.0,
"avg": 1.0,
"sum": 2.0
}
}
]
}
}

Kibana query on NESTED object returns estimated price why?

In Kibana visualization I built a bar chart graph that aggregates orders per label bucket (e.g: USD/CAD). Then I display the sum of orders price per bucket for each of the orders inside it.
My chart visualization returns a wrong price which seems to be coming approximated
This is my graph query request debug
{
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"3": {
"terms": {
"field": "label.keyword",
"size": 2,
"order": {
"_term": "desc"
}
},
"aggs": {
"2": {
"terms": {
"field": "orders.id",
"size": 40,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "orders.price"
}
}
}
}
}
}
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [],
"query": {
"bool": {
"must": [
{
"match_all": {}
}
],
"filter": [],
"should": [],
"must_not": []
}
}
}
And this is the response, which confirms the the orders.price value is being approximated
{
"took": 9,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 124,
"max_score": 0,
"hits": []
},
"aggregations": {
"3": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 122,
"buckets": [
{
"2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"1": {
"value": 0.003690590019687079
},
"key": 213481,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213482,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213483,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213484,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213485,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213486,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213487,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213488,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213489,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213490,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213491,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213492,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213493,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213494,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213495,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213496,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213497,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213498,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213499,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213500,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213501,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213502,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213503,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213504,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213505,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213506,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213507,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213508,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213509,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213510,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213511,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213512,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213513,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213514,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213515,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213516,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213517,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213518,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213519,
"doc_count": 1
},
{
"1": {
"value": 0.003690590019687079
},
"key": 213520,
"doc_count": 1
}
]
},
"key": "IOST/ETH",
"doc_count": 1
},
{
"2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"1": {
"value": 0.0003004000000146334
},
"key": 213321,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213322,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213323,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213324,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213325,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213326,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213327,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213328,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213329,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213330,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213331,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213332,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213333,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213334,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213335,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213336,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213337,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213338,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213339,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213340,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213341,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213342,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213343,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213344,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213345,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213346,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213347,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213348,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213349,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213350,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213351,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213352,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213353,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213354,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213355,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213356,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213357,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213358,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213359,
"doc_count": 1
},
{
"1": {
"value": 0.0003004000000146334
},
"key": 213360,
"doc_count": 1
}
]
},
"key": "IOST/BTC",
"doc_count": 1
}
]
}
},
"status": 200
And discovery tab confirms that my data is there and should be retrieving the right price from the order.price
What is more strange is, when I flatten my data and stop using an array of nested orders the same graph will work. Am I doing anything wrong while building my graph? I need to fix this because my source data comes in that structure, it has a nested array of orders for each label
The orders field has to be mapped as a nested type for you to get accurate results.
"mappings": {
"orderbook": {
"properties": {
"exchange": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"label": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"length": {
"type": "long"
},
"orders": {
"type": "nested",
"properties": {
"id": {
"type": "long"
},
"label": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"price": {
"type": "float"
}
}
},
"timestamp": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
In Kibana then you can use vega like this
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"title": "Order Totals vs Order Ids",
"data": {
"url": {
"index": "orderbookindex",
"body": {
"size": 0,
"aggs": {
"Orders": {
"nested": {"path": "orders"},
"aggs": {
"Order_Ids": {
"terms": {"field": "orders.id"},
"aggs": {
"Order_Totals": {
"sum": {"field": "orders.price"}
}
}
}
}
}
}
}
},
"format": {
"property": "aggregations.Orders.Order_Ids.buckets"
}
},
"mark": "bar",
"encoding": {
"x": {
"bin": false,
"field": "key",
"type": "Nominal",
"axis": {"title": "Order Ids"}
},
"y": {
"field": "Order_Totals.value",
"type": "quantitative",
"axis": {"title": "Order Totals"}
}
}
}
I haven't tried the other layer of grouping but you would need something like this https://bl.ocks.org/domoritz/f5abc519dd990bfcbc3f20f634658364
There is also a plugin available for handling nested aggregations in Kibana for you to try
https://github.com/ppadovani/KibanaNestedSupportPlugin
-------------- Solution Output ----------------------------------
Guid's output from #sramalingam24 Vega code

Elastic Search geohashes return full details not just doc_count

I am working on a property aggrigation website that would store hundred of thosuands of properties. To map areas I am using Elastic Search's GEO Hashing to reduce the number of matches returned for a given zoom level.
The code to generate the hashes is as follows:
`GET _search
{
"from": 0,
"size": 0,
"query": {
"match_all": {}
},
"filter": {
"and": [{
"range": {
"property.price": {
"lte": 1000000000
}
}
}, {
"geo_bounding_box": {
"property.location": {
"top_left": {
"lat": 42.88679,
"lon": -73.5081419
},
"bottom_right": {
"lat": 41.2390897,
"lon": -69.9279921
}
}
}
}, {
"term": {
"property.rental": false
}
}, {
"term": {
"property.country": "US"
}
}]
},
"sort": [{
"property.price": "asc"
}],
"facets": {
"stat1": {
"statistical": {
"field": "price"
}
}
},
"aggs": {
"geohash": {
"filter": {
"geo_bounding_box": {
"property.location": {
"top_left": {
"lat": 42.88679,
"lon": -73.5081419
},
"bottom_right": {
"lat": 41.2390897,
"lon": -69.9279921
}
}
}
},
"aggregations": {
"locations": {
"geohash_grid": {
"field": "location",
"precision": 8
}
}
}
}
}
}`
The resulting JSON comes back with the number of matches per GeoHash and is working, except that there doesnt seem to be any way to include the property details in the result, or include an ID to get back to it when clicked?
The result is as follows:
`{
"took": 94,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 13,
"max_score": 0,
"hits": []
},
"facets": {
"stat1": {
"_type": "statistical",
"count": 50,
"total": 90640800,
"min": 1,
"max": 19500000,
"mean": 1812816,
"sum_of_squares": 628677324820002,
"variance": 9287244646544.04,
"std_deviation": 3047498.0962330457
}
},
"aggregations": {
"geohash": {
"doc_count": 16,
"locations": {
"buckets": [
{
"key": "drt05n43",
"doc_count": 2
},
{
"key": "drt0v0q8",
"doc_count": 1
},
{
"key": "drt0sr3e",
"doc_count": 1
},
{
"key": "drt0kgr8",
"doc_count": 1
},
{
"key": "drt07sdk",
"doc_count": 1
},
{
"key": "drt075vd",
"doc_count": 1
},
{
"key": "drt05n19",
"doc_count": 1
},
{
"key": "drt05jgv",
"doc_count": 1
},
{
"key": "drsbrgvh",
"doc_count": 1
},
{
"key": "drmpgznd",
"doc_count": 1
},
{
"key": "drmpft6c",
"doc_count": 1
},
{
"key": "drmpe6bg",
"doc_count": 1
},
{
"key": "drmp7ybz",
"doc_count": 1
},
{
"key": "drmgkj77",
"doc_count": 1
},
{
"key": "drkzzj3d",
"doc_count": 1
}
]
}
}
}
}`
Any help in adding additional property data to the result would be HUGELY appreciated.
Thanks :)
p.s. sorry about the code intendation, SO's code detection with JSON is a little weird.
You could probably use nested aggregations:
http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/nested-aggregation.html
Using elastic.js from http://www.fullscale.co/elasticjs/
ejs.GeoHashGridAggregation('Geohash-Grid')
.field('Geohash')
.precision(precision)
.aggregation(
ejs.TermsAggregation('HouseType').field('HouseType')
)
.aggregation(
ejs.TermsAggregation('HouseColor').field('HouseColor')
)
);
But that is just one example. The output will then have nested aggregation counts for documents that match each geohash.

Returning term count for a single document using the terms facet in elastic search

Say I have the following search query...
POST /topics/_search
{
"fields": [
"topic_attachment",
"topic_replies",
"topic_status"
],
"query" : {
"filtered" : {
"query" : {
"term" : {
"_id" : "5478"
}
}
}
},
"facets": {
"text": {
"terms": {
"field": "text",
"size": 10,
"order": "count"
}
}
}
}
The result of this search is the following.
{
"took": 93,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "topics",
"_type": "full-topic",
"_id": "5478",
"_score": 1,
"fields": {
"topic_replies": 1141,
"topic_status": 0,
"topic_attachment": false
}
}
]
},
"facets": {
"text": {
"_type": "terms",
"missing": 0,
"total": 8058,
"other": 8048,
"terms": [
{
"term": "ω",
"count": 1
},
{
"term": "œyouâ",
"count": 1
},
{
"term": "œyou",
"count": 1
},
{
"term": "œwhisperedâ",
"count": 1
},
{
"term": "œwalt",
"count": 1
},
{
"term": "œunderstandingâ",
"count": 1
},
{
"term": "œtieâ",
"count": 1
},
{
"term": "œthe",
"count": 1
},
{
"term": "œpersonally",
"count": 1
},
{
"term": "œnappiesâ",
"count": 1
}
]
}
}
}
Each term has a count of exactly 1. Why is this? I know the text from this document has more than one term in common. Is this because the term count only increments once per document? If so how do I count a term more than once from a single document?
That's the document count, not the term frequency. Luckily with the new aggregations module (replacement for facets introduced in 1.0.Beta2) count has been renamed to doc_count to clarify what it is.