ElasticSearch: Aggregation with order by top_hits differs in doc count - json

My problem is that having a max score aggregation that sorts by _score, in combination with a terms aggregation, is showing a different doc_count.
Here is the aggregation block that I want, but giving a lower doc_count for certain results. Again, it always gives a lower value; never higher.
"aggs": {
"group_by_field": {
"terms": {
"field": "field4",
"order": {
"max_score": "desc"
}
},
"aggs": {
"max_score": {
"max": {
"script": "_score"
}
}
}
}
}
Here is the aggregation block that I do not want, but is showing the correct doc_count for all results.
"aggs": {
"group_by_target": {
"terms": {
"field": "target"
}
}
}
For those interested, here is my full body query:
{
"index": "myIndex",
"type": "myType",
"body": {
"query": {
"bool": {
"filter": {
0: {
"term": {
"term1": false,
}
}
},
"must": {
"bool": {
"should": {
0: {
"match_phrase": {
"phrase1": "example",
}
}
},
"minimum_should_match": 1,
}
}
}
},
"size": 0,
"_source": {
0: "field1",
1: "field2",
2: "field3",
3: "field4",
},
"aggs": {
"group_by_field": {
"terms": {
"field": "field4",
"size": 25,
"order": {
"max_score": "desc",
}
},
"aggs": {
"max_score": {
"max": {
"script": "_score",
}
},
"my_top_agg": {
"top_hits": {
"size": 3,
"_source": {
0: "field1",
1: "field2",
2: "field3",
3: "field4",
}
}
}
}
},
"my_total": {
"cardinality": {
"field": "field4",
}
}
}
}
}
I have test both queries and each time it is showing a the same incorrect doc_count when having the order aggregation. What is causing the problem?

Related

Elasticsearch: get the max and min value form a specific object of a maps of objects

I has this mapping for the índex on elastic, i was try to get the max value of a day for a specific sensor, but my query get the value of all the sensors.
"sensor": {
"type": "nested",
"properties": {
"type": {
"type": "integer"
},
"name": {
"type": "keyword"
},
"number": {
"type": "integer"
},
"values": {
"type": "nested",
"properties": {
"type": {
"type": "text"
},
"value": {
"type": "float"
},
"unit": {
"type": "text"
},
"Lmin": {
"type": "float"
},
"Lmax": {
"type": "float"
}
}
}
}
An this is the map of objects,
I need only the max and the min value of the las day from the sensor number 13, i try it but ever i get the max of all sensors.
{"query": {
"nested": {
"path": "sensor",
"query": {
"nested": {
"path": "sensor.values",
"query": {
"bool": {
"must": [
{
"match": {
"sensor.values.type": "TEMPERATURE"
}
}
]
}
}
}
}
}
},
"aggs": {
"agg_by_type": {
"nested": {
"path": "sensor.values"
},
"aggs": {
"max_value": {
"max": {
"field": "sensor.values.value"
}
}
}
}
}
}
I'm new in elasticsearch, can someone help whit this please?, thanks.
You need to also add the nested filter in the aggregation part to only aggregate the relevant nested documents, i.e. the ones related to TEMPERATURE, like this:
{
"query": {
"nested": {
"path": "sensor",
"query": {
"nested": {
"path": "sensor.values",
"query": {
"bool": {
"must": [
{
"match": {
"sensor.values.type": "TEMPERATURE"
}
}
]
}
}
}
}
}
},
"aggs": {
"agg_by_type": {
"nested": {
"path": "sensor.values"
},
"aggs": {
"temperature_only": {
"filter": {
"match": {
"sensor.values.type": "TEMPERATURE"
}
},
"aggs": {
"max_value": {
"max": {
"field": "sensor.values.value"
}
}
}
}
}
}
}
}
After a few days of work in another projects, i back to try make this query and finally i can do it, now i can get the data per day, hours, and by type of sensor, thanks for your help.
This is my code if somebody are trying the same.
{
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"match": {
"mac": "34:ab:95:8f:84:c0"
}
}
],
"filter": [
{
"range": {
"timestamp": {
"gte": "2021-08-10",
"lt": "2021-08-25"
}
}
}
]
}
},
{
"nested": {
"path": "sensor",
"query": {
"bool": {
"must": [
{
"match": {
"sensor.type": 1
}
}
],
"should": [
{
"nested": {
"path": "sensor.values",
"query": {
"bool": {
"must": [
{
"match": {
"sensor.values.type": "HUMIDITY"
}
},
{
"match": {
"sensor.values.type": "TEMPERATURE"
}
}
]
}
}
}
}
]
}
}
}
}
]
}
},
"aggs": {
"values_per_day": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "1d",
"format" : "yyyy-MM-dd HH:mm:ss"
},
"aggs": {
"agg_type": {
"nested": {
"path": "sensor"
},
"aggs": {
"type_only": {
"filter": {
"match": {
"sensor.type": 1
}
},
"aggs": {
"agg_by_type": {
"nested": {
"path": "sensor.values"
},
"aggs": {
"temperature_only": {
"filter": {
"match": {
"sensor.values.type": "TEMPERATURE"
}
},
"aggs": {
"max_value": {
"max": {
"field": "sensor.values.value"
}
},
"min_value": {
"min": {
"field": "sensor.values.value"
}
}
}
},
"humedity_only": {
"filter": {
"match": {
"sensor.values.type": "HUMIDITY"
}
},
"aggs": {
"max_value": {
"max": {
"field": "sensor.values.value"
}
},
"min_value": {
"min": {
"field": "sensor.values.value"
}
}
}
}
}
}
}
}
}
}
}
}
},
"from": 0,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}

ElasticSearch - Match all nested variations in a document

I have several products with variations like the one below
{
"title": "100% Cotton Unstitched Suit For Men",
"slug": "100-cotton-unstitched-suit-for-men",
"price": 200,
"sale_price": 0,
"vendor_id": 32,
"featured": 0,
"viewed": 20,
"stock": 4,
"sku": "XXX-B",
"rating": 0,
"active": 1,
"vendor_name": "vendor_name",
"category": [
"men_fashion",
"traditional_clothing",
"unstitched_fabric"
],
"image": "imagename.jpg",
"variations": [
{
"variation_id": "34",
"stock": 5,
"price": 200,
"variation_image": "",
"sku": "XXX-C",
"size": "m",
"color": "red"
},
{
"variation_id": "35",
"stock": 5,
"price": 200,
"variation_image": "",
"sku": "XXX-D",
"size": "l",
"color": "red"
}
]
}
I am looking for a query that would have all of the below parameters
Get all products in a certain category
Get all products that are black
Get sizes l and m
My current Query:
{
"size": 15,
"from": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"match": {
"category": "women_fashion"
}
},
{
"nested": {
"path": "variations",
"query": {
"bool": {
"must": [{
"match": {
"variations.color": "red"
}
},
{
"match": {
"variations.size": "l"
}
},
{
"match": {
"variations.size": "m"
}
}
]
}
}
}
}
]
}
}
}
}
}
It works fine if i only search for 1 size. But once i search for 2 sizes it gives no records. My guess is that it looks for all 3 parameters in every nested variation, which obviously it cant find. How would i modify the query to search for
size: m, color: black
size: l, color: black
I have also tried using a nested filter, but the issue with this is that it works like "SHOULD" query while i am looking for a "MUST" query. ie. it shows all the products have large variations while i just want to show products that are large and red.
My Second Query:
{
"size": 15,
"from": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"match": {
"category": "women_fashion"
}
},
{
"nested": {
"path": "variations",
"query": {
"bool":{
"filter": [
{
"term": {
"variations.color": "red"
}
},
{
"term": {
"variations.size": "l"
}
}
]
}
}
}
}
]
}
}
}
}
}
You can put the size variations into a clause of it's own so at least one of the sizes will match
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"match": {
"category": "women_fashion"
}
},
{
"nested": {
"path": "variations",
"query": {
"bool": {
"must": [
{
"match": {
"variations.color": "red"
}
},
{
"query": {
"bool": {
"should": [
{
"match": {
"variations.size": "l"
}
},
{
"match": {
"variations.size": "m"
}
}
]
}
}
}
]
}
}
}
}
]
}
}
}
}
}

ElasticSearch combine conditional statements 'and' with 'or'

Purpose of the query below is to return n results for each criteria i.e. it must match partnersites 16 and match 'venueTown' or partnersites 16 and match 'venueName'. Currently it returns the results where each field must contain the same string. In my case fields: name, venueName and venueTown must contain manchester, but I want separate results for each pair {(partnersites, venueName), (partnersites, venueTown)}.
{
"size": 0,
"_source": ["groupedName", "groupedDisplayName", "groupedUrl", "eventCode", "venueName", "venueTown", "venueId", "media"],
"query": {
"bool": {
"must": [{
"match": {
"partnersites": {
"query": "16"
}
}
}, {
"match": {
"name": "manchester"
}
}, {
"match": {
"venueName": "manchester"
}
}, {
"match": {
"venueTown": "manchester"
}
}, {
"match": {
"venueTown": "manchester"
}
}]
}
},
"aggs": {
"distinct_names": {
"terms": {
"field": "name.keyword",
"size": 10
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"size": 1,
"_source": ["groupedName", "groupedDisplayName", "groupedUrl", "eventCode", "venueName", "venueTown", "venueId", "media"]
}
}
}
},
"distinct_venues": {
"terms": {
"field": "venueName.keyword",
"size": 10
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"size": 1,
"_source": ["groupedName", "groupedDisplayName", "groupedUrl", "eventCode", "venueName", "venueTown", "venueId", "media"]
}
}
}
},
"distinct_towns": {
"terms": {
"field": "venueTown.keyword",
"size": 10
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"size": 1,
"_source": ["groupedName", "groupedDisplayName", "groupedUrl", "eventCode", "venueName", "venueTown", "venueId", "media"]
}
}
}
}
}
}
Try this:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"partnersites": "16"
}
},
{
"match_phrase_prefix": {
"name": "mancheste"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"partnersites": "16"
}
},
{
"match_phrase_prefix": {
"venueName": "mancheste"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"partnersites": "16"
}
},
{
"match_phrase_prefix": {
"venueTown": "mancheste"
}
}
]
}
}
]
}
}
]
}
},
"aggs": {
"distinct_names": {
"terms": {
"field": "groupedName.keyword",
"size": 30
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"size": 1,
"_source": [
"groupedName",
"groupedDisplayName",
"groupedUrl",
"eventCode",
"venueName",
"venueTown",
"venueId",
"media"
]
}
}
}
}
}
}

Elasticsearch trying to query term and time range

I am trying to find all documents in which the content field contains the word "syria" and have the epoch time be greater than 1465312440000. The following query runs, but does only return the documents that contain word "syria". How do I fix this?(Elasticsearch version 2.2)
{
"query": {
"filtered": {
"query": {
"match": {
"content": "syria"
},
"filter": {
"term": {
"sourceOriginator": "Twitter"
},
"bool": {
"range": {
"epochCollectionDate": {
"gte": 1465312440
}
}
}
}
}
}
}
}
Thank you Guys.
I struggled with this so if someone is looking for how to do this with aggregation as well, i used winlogbeat but it will work with other indexes just change terms and field names.
I tested this with Elastic 7.1.1
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"winlog.event_id": "5156"
}
}
],
"filter": [
{
"term": {
"winlog.provider_name" : "Microsoft-Windows-Security-Auditing"
}
},
{
"range": {
"#timestamp": {
"gt": "now-10d",
"lt": "now"
}
}
}
]
}
},
"aggregations": {
"event_count": {
"value_count": {
"field": "winlog.event_id"
}
},
"group_by_host": {
"terms": {
"field": "host.name",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
}
}
Of course it is hard to test without the data, but the filter is wrong. It should be on the same level as the second query. To my opinion the following solution is easier:
{
"query": {
"bool": {
"must": [
{
"match": {
"content": "syria"
}
}
],
"filter": [
{
"term": {
"sourceOriginator": "Twitter"
}
},
{
"range": {
"epochCollectionDate": {
"gte": 1465312440
}
}
}
]
}
}
}
Just to complement #Jettro's solution which will only work on ES 2.0 and later, the following one will work on all versions up to ES 5.
{
"query": {
"filtered": {
"query": {
"match": {
"content": "syria"
}
},
"filter": {
"bool": {
"must": [
{
"term": {
"sourceOriginator": "Twitter"
}
},
{
"range": {
"epochCollectionDate": {
"gte": 1465312440
}
}
}
]
}
}
}
}
}
Note that if you are on ES 2.0 or later, you should really use #Jettro's solution as the filtered query has been deprecated in 2.0.

How to filter on nested aggregations

I'm trying to apply different filters for different nested aggregations. Is there a way to do this?
"filter": {
"nested": {
"path": "hierarchy.productGroup",
"filter": {
"terms": {
"hierarchy.productGroup.name": ["iPhone"]
}
}
}
},
"aggs": {
"category": {
"nested": {
"path": "hierarchy.productGroup"
},
"aggs": {
"category": {
"terms": {
"field": "hierarchy.productGroup.name"
}
}
}
},
"color": {
"filter": {
"nested": {
"path": "hierarchy.productGroup",
"filter": {
"terms": {
"hierarchy.productGroup.name": ["iPhone"]
}
}
}
},
"nested": {
"path": "specs.measurementsProduct.colorName"
},
"aggs": {
"color": {
"terms": {
"field": "specs.measurementsProduct.colorName.name"
}
}
}
}
}
When I run this query I get the following error:
Error: Parse Failure [Found two aggregation type definitions in [color]: [filter] and [nested]]];
I would like to make my color aggregation dependent on the category filter.
You forgot a key starting and ending.
Try with this json:
{
"filter": {
"nested": {
"path": "hierarchy.productGroup",
"filter": {
"terms": {
"hierarchy.productGroup.name": ["iPhone"]
}
}
}
},
"aggs": {
"category": {
"nested": {
"path": "hierarchy.productGroup"
},
"aggs": {
"category": {
"terms": {
"field": "hierarchy.productGroup.name"
}
}
}
},
"color": {
"filter": {
"nested": {
"path": "hierarchy.productGroup",
"filter": {
"terms": {
"hierarchy.productGroup.name": ["iPhone"]
}
}
}
},
"nested": {
"path": "specs.measurementsProduct.colorName"
},
"aggs": {
"color": {
"terms": {
"field": "specs.measurementsProduct.colorName.name"
}
}
}
}
}
}