How to filter on nested aggregations - json

I'm trying to apply different filters for different nested aggregations. Is there a way to do this?
"filter": {
"nested": {
"path": "hierarchy.productGroup",
"filter": {
"terms": {
"hierarchy.productGroup.name": ["iPhone"]
}
}
}
},
"aggs": {
"category": {
"nested": {
"path": "hierarchy.productGroup"
},
"aggs": {
"category": {
"terms": {
"field": "hierarchy.productGroup.name"
}
}
}
},
"color": {
"filter": {
"nested": {
"path": "hierarchy.productGroup",
"filter": {
"terms": {
"hierarchy.productGroup.name": ["iPhone"]
}
}
}
},
"nested": {
"path": "specs.measurementsProduct.colorName"
},
"aggs": {
"color": {
"terms": {
"field": "specs.measurementsProduct.colorName.name"
}
}
}
}
}
When I run this query I get the following error:
Error: Parse Failure [Found two aggregation type definitions in [color]: [filter] and [nested]]];
I would like to make my color aggregation dependent on the category filter.

You forgot a key starting and ending.
Try with this json:
{
"filter": {
"nested": {
"path": "hierarchy.productGroup",
"filter": {
"terms": {
"hierarchy.productGroup.name": ["iPhone"]
}
}
}
},
"aggs": {
"category": {
"nested": {
"path": "hierarchy.productGroup"
},
"aggs": {
"category": {
"terms": {
"field": "hierarchy.productGroup.name"
}
}
}
},
"color": {
"filter": {
"nested": {
"path": "hierarchy.productGroup",
"filter": {
"terms": {
"hierarchy.productGroup.name": ["iPhone"]
}
}
}
},
"nested": {
"path": "specs.measurementsProduct.colorName"
},
"aggs": {
"color": {
"terms": {
"field": "specs.measurementsProduct.colorName.name"
}
}
}
}
}
}

Related

Elasticsearch: get the max and min value form a specific object of a maps of objects

I has this mapping for the índex on elastic, i was try to get the max value of a day for a specific sensor, but my query get the value of all the sensors.
"sensor": {
"type": "nested",
"properties": {
"type": {
"type": "integer"
},
"name": {
"type": "keyword"
},
"number": {
"type": "integer"
},
"values": {
"type": "nested",
"properties": {
"type": {
"type": "text"
},
"value": {
"type": "float"
},
"unit": {
"type": "text"
},
"Lmin": {
"type": "float"
},
"Lmax": {
"type": "float"
}
}
}
}
An this is the map of objects,
I need only the max and the min value of the las day from the sensor number 13, i try it but ever i get the max of all sensors.
{"query": {
"nested": {
"path": "sensor",
"query": {
"nested": {
"path": "sensor.values",
"query": {
"bool": {
"must": [
{
"match": {
"sensor.values.type": "TEMPERATURE"
}
}
]
}
}
}
}
}
},
"aggs": {
"agg_by_type": {
"nested": {
"path": "sensor.values"
},
"aggs": {
"max_value": {
"max": {
"field": "sensor.values.value"
}
}
}
}
}
}
I'm new in elasticsearch, can someone help whit this please?, thanks.
You need to also add the nested filter in the aggregation part to only aggregate the relevant nested documents, i.e. the ones related to TEMPERATURE, like this:
{
"query": {
"nested": {
"path": "sensor",
"query": {
"nested": {
"path": "sensor.values",
"query": {
"bool": {
"must": [
{
"match": {
"sensor.values.type": "TEMPERATURE"
}
}
]
}
}
}
}
}
},
"aggs": {
"agg_by_type": {
"nested": {
"path": "sensor.values"
},
"aggs": {
"temperature_only": {
"filter": {
"match": {
"sensor.values.type": "TEMPERATURE"
}
},
"aggs": {
"max_value": {
"max": {
"field": "sensor.values.value"
}
}
}
}
}
}
}
}
After a few days of work in another projects, i back to try make this query and finally i can do it, now i can get the data per day, hours, and by type of sensor, thanks for your help.
This is my code if somebody are trying the same.
{
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"match": {
"mac": "34:ab:95:8f:84:c0"
}
}
],
"filter": [
{
"range": {
"timestamp": {
"gte": "2021-08-10",
"lt": "2021-08-25"
}
}
}
]
}
},
{
"nested": {
"path": "sensor",
"query": {
"bool": {
"must": [
{
"match": {
"sensor.type": 1
}
}
],
"should": [
{
"nested": {
"path": "sensor.values",
"query": {
"bool": {
"must": [
{
"match": {
"sensor.values.type": "HUMIDITY"
}
},
{
"match": {
"sensor.values.type": "TEMPERATURE"
}
}
]
}
}
}
}
]
}
}
}
}
]
}
},
"aggs": {
"values_per_day": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "1d",
"format" : "yyyy-MM-dd HH:mm:ss"
},
"aggs": {
"agg_type": {
"nested": {
"path": "sensor"
},
"aggs": {
"type_only": {
"filter": {
"match": {
"sensor.type": 1
}
},
"aggs": {
"agg_by_type": {
"nested": {
"path": "sensor.values"
},
"aggs": {
"temperature_only": {
"filter": {
"match": {
"sensor.values.type": "TEMPERATURE"
}
},
"aggs": {
"max_value": {
"max": {
"field": "sensor.values.value"
}
},
"min_value": {
"min": {
"field": "sensor.values.value"
}
}
}
},
"humedity_only": {
"filter": {
"match": {
"sensor.values.type": "HUMIDITY"
}
},
"aggs": {
"max_value": {
"max": {
"field": "sensor.values.value"
}
},
"min_value": {
"min": {
"field": "sensor.values.value"
}
}
}
}
}
}
}
}
}
}
}
}
},
"from": 0,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}

Python generate json: bool malformed query, expected END_OBJECT but found FIELD_NAME

i have a probleme with elastic search when i request it with the following json, i have this error: [bool] malformed query, expected [END_OBJECT] but found [FIELD_NAME]
i've tried to look on many site but none of them give me a response
{
"query":{
"bool":{
"must":[
{
"match":{
"group_issuer_name":"bnp"
}
},
{
"match":{
"asset_country":"France"
}
}
]
},
"aggs":{
"by_ptf_name":{
"terms":{
"field":"ptf_name.keyword"
},
"aggs":{
"by_ptf_id":{
"terms":{
"field":"ptf_id.keyword"
},
"aggs":{
"sum_of_asset_exposure":{
"sum":{
"field":"asset_exposure"
}
},
"min_of_ptf_total_asset":{
"min":{
"field":"ptf_total_asset"
}
}
}
}
}
}
}
}
}
You are missing }. The query part must be closed and then the aggregation part should start.
The structure should be
{
"query": {},
"aggregation": {}
}
Modify your query as -
{
"query": {
"bool": {
"must": [
{
"match": {
"group_issuer_name": "bnp"
}
},
{
"match": {
"asset_country": "France"
}
}
]
}
}, // note this
"aggs": {
"by_ptf_name": {
"terms": {
"field": "ptf_name.keyword"
},
"aggs": {
"by_ptf_id": {
"terms": {
"field": "ptf_id.keyword"
},
"aggs": {
"sum_of_asset_exposure": {
"sum": {
"field": "asset_exposure"
}
},
"min_of_ptf_total_asset": {
"min": {
"field": "ptf_total_asset"
}
}
}
}
}
}
}
}

ElasticSearch: Aggregation with order by top_hits differs in doc count

My problem is that having a max score aggregation that sorts by _score, in combination with a terms aggregation, is showing a different doc_count.
Here is the aggregation block that I want, but giving a lower doc_count for certain results. Again, it always gives a lower value; never higher.
"aggs": {
"group_by_field": {
"terms": {
"field": "field4",
"order": {
"max_score": "desc"
}
},
"aggs": {
"max_score": {
"max": {
"script": "_score"
}
}
}
}
}
Here is the aggregation block that I do not want, but is showing the correct doc_count for all results.
"aggs": {
"group_by_target": {
"terms": {
"field": "target"
}
}
}
For those interested, here is my full body query:
{
"index": "myIndex",
"type": "myType",
"body": {
"query": {
"bool": {
"filter": {
0: {
"term": {
"term1": false,
}
}
},
"must": {
"bool": {
"should": {
0: {
"match_phrase": {
"phrase1": "example",
}
}
},
"minimum_should_match": 1,
}
}
}
},
"size": 0,
"_source": {
0: "field1",
1: "field2",
2: "field3",
3: "field4",
},
"aggs": {
"group_by_field": {
"terms": {
"field": "field4",
"size": 25,
"order": {
"max_score": "desc",
}
},
"aggs": {
"max_score": {
"max": {
"script": "_score",
}
},
"my_top_agg": {
"top_hits": {
"size": 3,
"_source": {
0: "field1",
1: "field2",
2: "field3",
3: "field4",
}
}
}
}
},
"my_total": {
"cardinality": {
"field": "field4",
}
}
}
}
}
I have test both queries and each time it is showing a the same incorrect doc_count when having the order aggregation. What is causing the problem?

Elasticsearch trying to query term and time range

I am trying to find all documents in which the content field contains the word "syria" and have the epoch time be greater than 1465312440000. The following query runs, but does only return the documents that contain word "syria". How do I fix this?(Elasticsearch version 2.2)
{
"query": {
"filtered": {
"query": {
"match": {
"content": "syria"
},
"filter": {
"term": {
"sourceOriginator": "Twitter"
},
"bool": {
"range": {
"epochCollectionDate": {
"gte": 1465312440
}
}
}
}
}
}
}
}
Thank you Guys.
I struggled with this so if someone is looking for how to do this with aggregation as well, i used winlogbeat but it will work with other indexes just change terms and field names.
I tested this with Elastic 7.1.1
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"winlog.event_id": "5156"
}
}
],
"filter": [
{
"term": {
"winlog.provider_name" : "Microsoft-Windows-Security-Auditing"
}
},
{
"range": {
"#timestamp": {
"gt": "now-10d",
"lt": "now"
}
}
}
]
}
},
"aggregations": {
"event_count": {
"value_count": {
"field": "winlog.event_id"
}
},
"group_by_host": {
"terms": {
"field": "host.name",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
}
}
Of course it is hard to test without the data, but the filter is wrong. It should be on the same level as the second query. To my opinion the following solution is easier:
{
"query": {
"bool": {
"must": [
{
"match": {
"content": "syria"
}
}
],
"filter": [
{
"term": {
"sourceOriginator": "Twitter"
}
},
{
"range": {
"epochCollectionDate": {
"gte": 1465312440
}
}
}
]
}
}
}
Just to complement #Jettro's solution which will only work on ES 2.0 and later, the following one will work on all versions up to ES 5.
{
"query": {
"filtered": {
"query": {
"match": {
"content": "syria"
}
},
"filter": {
"bool": {
"must": [
{
"term": {
"sourceOriginator": "Twitter"
}
},
{
"range": {
"epochCollectionDate": {
"gte": 1465312440
}
}
}
]
}
}
}
}
}
Note that if you are on ES 2.0 or later, you should really use #Jettro's solution as the filtered query has been deprecated in 2.0.

How to check if a value does not exist in elasticsearch attribute?

I have a json object with system environments. I need to list out that percolator if environment variable does not contain a variable name "JAVA_HOME" and also if JAVA_HOME's value is not present in PATH variable. Is this possible in Elasticsearch?
below is my percolator
PUT /eg/.percolator/2
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"match": {
"client.name": "Athena"
}
},
{
"match": {
"client.environment.variable.#name": "JAVA_HOME"
}
}
]
}
}
}
}
}
my doc
GET /eg/message/_percolate
{
"doc": {
"client": {
"name": "Athena",
"environment": {
"variable": [
{
"#name": "JAVA_HOME",
"#value": "/home/vikrant/Linux/Sandra/java"
},
{
"#name": "PATH",
"#value": "/bin:/sbin:/usr/bin:/usr/sbin:/home/vikrant/Linux/FAS611:/home/vikrant/Linux/FAS611/odbc:/home/vikrant/Linux/Sandra/java/bin:/home/vikrant/Linux/Sandra/server/bin:.:/bin:/usr/bin:/sbin:/usr/sbin:/usr/bin/X11:/usr/local/bin:/usr/local/etc:/etc:/usr/etc:.:/databases/ORACLE/bin:/databases/SYBASE/OCS-15_0/bin:/databases/DB2/bin:/usr/odbc/bin"
}
]
}
}
}
}
You need nested mapping first since your docs have nesting. This is the mapping that I first created for your docs to make things work:
{
"nested_exp": {
"properties": {
"client": {
"type": "nested",
"properties": {
"environment": {
"type": "nested",
"properties": {
"variable": {
"type": "nested",
"properties": {
"#name": {
"type": "string"
},
"#value": {
"type": "string"
}
}
}
}
},
"name": {
"type": "string"
}
}
}
}
}
}
Then I changed your provided query to make it work as you expected:
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"nested": {
"path": "client",
"query": {
"match": {
"client.name": "Athena"
}
}
}
},
{
"nested": {
"path": "client.environment.variable",
"query": {
"match": {
"client.environment.variable.#name": "JAVA_HOME"
}
}
}
}
]
}
}
}
}
}
Coming to your question, it has two parts.
For the first part, you can use the NOT filter with nested match filter. Something like this:
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"nested": {
"path": "client",
"query": {
"match": {
"client.name": "Athena"
}
}
}
},
{
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"not": {
"filter": {
"nested": {
"path": "client.environment.variable",
"query": {
"match": {
"client.environment.variable.#name": "JAVA_HOME"
}
}
}
}
}
}
}
}
]
}
}
}
}
}
I just tried on my local Elasticsearch server and it works like a charm.
The second one seems difficult to implement with Elasticsearch only as what you what to query is something like value of 1 variable should not be present in another variable's value.