Elasticsearch: Aggregation of null fields in a facet bucket - json

I'm trying to implement facets with a date range aggregation in the current version of Amazon Elasticsearch Service (version 7.10). The key for what I want the article documents to group for, is publishedAt, what is a date. I want one bucket, where publishedAt is in the past, which means, it is published, one where it is in the future, which means scheduled and one for all articles without a publishedAt, which are drafts. published and scheduled are working as they should. For drafts I can't enter a filter or date range as they are null. So I want to make use of the "Missing Values" feature. This should treat the documents with publishedAt = null like to have the date given in the missing field. Unfortunately it has no effect on the results. Even if I change the date of missing to let it match with published or scheduled.
My request:
GET https://es.amazonaws.com/articles/_search
{
"size": 10,
"aggs": {
"facet_bucket_all": {
"aggs": {
"channel": {
"terms": {
"field": "channel.keyword",
"size": 5
}
},
"brand": {
"terms": {
"field": "brand.keyword",
"size": 5
}
},
"articleStatus": {
"date_range": {
"field": "publishedAt",
"format": "dd-MM-yyyy",
"missing": "01-07-1886",
"ranges": [
{ "key": "published", "from": "now-99y/M", "to": "now/M" },
{ "key": "scheduled", "from": "now+1s/M", "to": "now+99y/M" },
{ "key": "drafts", "from": "01-01-1886", "to": "31-12-1886" }
]
}
}
},
"filter": {
"bool": {
"must": []
}
}
},
"facet_bucket_publishedAt": {
"aggs": {},
"filter": {
"bool": {
"must": []
}
}
},
"facet_bucket_author": {
"aggs": {
"author": {
"terms": {
"field": "author",
"size": 10
}
}
},
"filter": {
"bool": {
"must": []
}
}
}
},
"query": {
"bool": {
"filter": [
{
"range": {
"publishedAt": {
"lte": "2021-08-09T09:52:19.975Z"
}
}
}
]
}
},
"from": 0,
"sort": [
{
"_score": "desc"
}
]
}
And in the result, the drafts are empty:
"articleStatus": {
"buckets": [
{
"key": "published",
"from": -1.496448E12,
"from_as_string": "01-08-1922",
"to": 1.627776E12,
"to_as_string": "01-08-2021",
"doc_count": 47920
},
{
"key": "scheduled",
"from": 1.627776E12,
"from_as_string": "01-08-2021",
"to": 4.7519136E12,
"to_as_string": "01-08-2120",
"doc_count": 3
},
{
"key": "drafts",
"from": 1.67252256E13,
"from_as_string": "01-01-1886",
"to": 1.67566752E13,
"to_as_string": "31-12-1886",
"doc_count": 0
}
]
}

SearchKit added this part to the query:
"query": {
"bool": {
"filter": [
{
"range": {
"publishedAt": {
"lte": "2021-08-09T09:52:19.975Z"
}
}
}
]
}
}
This had to be removed, because it filters out null values, before the missing filter makes its job.
Now I get the correct result:
"articleStatus": {
"buckets": [
{
"key": "drafts",
"from": -2.650752E12,
"from_as_string": "01-01-1886",
"to": -2.6193024E12,
"to_as_string": "31-12-1886",
"doc_count": 7
},
{
"key": "published",
"from": -1.496448E12,
"from_as_string": "01-08-1922",
"to": 1.627776E12,
"to_as_string": "01-08-2021",
"doc_count": 47920
},
{
"key": "scheduled",
"from": 1.627776E12,
"from_as_string": "01-08-2021",
"to": 4.7519136E12,
"to_as_string": "01-08-2120",
"doc_count": 3
}
]
}

Related

How to round up values in Elasticsearch query?

I am trying to set up an automated Kibana alert that takes in data from a defined extraction query. I get all the information I want, however, the response query returns values without rounding them up (up to 12 decimal points). Where in the extraction query and what do I specify to round this value up?
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"match_all": {
"boost": 1
}
},
{
"range": {
"#timestamp": {
"from": "{{period_end}}||-24h",
"to": "{{period_end}}",
"include_lower": true,
"include_upper": true,
"format": "epoch_millis",
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
},
"_source": {
"includes": [],
"excludes": []
},
"stored_fields": "*",
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
},
{
"field": "timestamp",
"format": "date_time"
}
],
"script_fields": {},
"aggregations": {
"2": {
"terms": {
"field": "tag.country.keyword",
"size": 20,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"1": "desc"
},
{
"_key": "asc"
}
]
},
"aggregations": {
"1": {
"avg": {
"field": "my_field"
}
}
}
}
}
}
Here, I'm talking about the "avg" aggregation at the very bottom. As I understand, right below the "field" key, I should specify a "script" key, defining the rounding function that I want to use. Can anybody help me come up with the correct function?
I'm not sure what to specify in the "script" key to make the rounding function work.

filtering on visualisation metrics in Kibana

I'm trying to apply a filter to line visualisations in Kibana 4.5.1. I have an index (xscores) with two different types (sd and sma), here is a sample:
{
"_index": "xscore",
"_type": "xscore",
"_id": "AVgAejjHwGMH9TPDlF04",
"_score": 1,
"_source": {
"id": "AVgAejjHwGMH9TPDlF04",
"value": 0.019607843137254926,
"timestamp": 1477476480000,
"minutes": 1,
"type": "sma"
}
I am trying to show the sum only for sma and the average only for sd by adding a filter in the json box. However I always get a search_phase_execution_exception. This is the code that Kibana sends to elasticsearch:
{"query": {
"filtered": {
"query": {
"query_string": {
"analyze_wildcard": true,
"query": "*"
}
},
"filter": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": 1477436400000,
"lte": 1477522799999,
"format": "epoch_millis"
}
}
}
],
"must_not": [
]
}
}
}},"size": 0,"aggs": {
"3": {
"date_histogram": {
"field": "timestamp",
"interval": "30m",
"time_zone": "Europe\/London",
"min_doc_count": 1,
"extended_bounds": {
"min": 1477436400000,
"max": 1477522799999
}
},
"aggs": {
"4": {
"terms": {
"field": "type",
"size": 5,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"avg": {
"field": "value"
}
},
"2": {
"sum": {
"field": "value",
"filter": {
"term": {
"type": "sma"
}
}
}
}
}
}
}
}}}
The problem is in the last area I think but can't figure out what exactly is wrong.
Running the same query in ES returns the following error:
"shard": 0,
"index": "xscore",
"node": "mszD3Y_4T-aGNEkVtt4BCg",
"reason": {
"type": "search_parse_exception",
"reason": "Unexpected token START_OBJECT in [2]."
I'm using ES 2.3 and Kibana 4.5 on a MacOS 10.10.

ElasticSearch- How to limit size of the each combined query?

Here is my Mapping
{
"state":"open",
"settings":{
"index":{
"creation_date":"1453816191454",
"number_of_shards":"5",
"number_of_replicas":"1",
"version":{
"created":"1070199"
},
"uuid":"TfMJ4M0wQDedYSQuBz5BjQ"
}
},
"mappings":{
"Product":{
"properties":{
"index":"not_analyzed",
"store":true,
"type":"string"
},
"ProductName":{
"type":"nested",
"properties":{
"Name":{
"store":true,
"type":"string"
}
}
},
"ProductCode":{
"type":"string"
},
"Number":{
"index":"not_analyzed",
"store":true,
"type":"string"
},
"id":{
"index":"no",
"store":true,
"type":"integer"
},
"ShortDescription":{
"store":true,
"type":"string"
},
"Printer":{
"_routing":{
"required":true
},
"_parent":{
"type":"Product"
},
"properties":{
"properties":{
"RelativeUrl":{
"index":"no",
"store":true,
"type":"string"
}
}
},
"PrinterId":{
"index":"no",
"store":true,
"type":"integer"
},
"Name":{
"store":true,
"type":"string"
}
}
},
"aliases":[]
}
}
I would like to query mainly Products and if there products have 20 results, then return 20 products but if Products dont have any matching return printers+products having matching printers(childs)
When I execute this query, for key=tn-200, it returns 20 products and for key=hl-2230 returns me only printers. It works as expected. because hl-2230 doesnt have any products matching.
{
"query": {
"bool": {
"should": [{
"query_string": {
"default_field": "_all",
"query": "key"
}
}],
"must_not": [],
"must": []
}
},
"from": 0,
"size": 20,
"sort": [],
"aggs": {}
}
when I execute this query for hl-2230, it will return me products of matching hl-2230 printer. Also works as expected.
{
"query": {
"has_child": {
"type": "Printer",
"query": {
"match": {
"Name": "HL-2230"
}
}
}
},
"from": 0,
"size": 20,
"sort": [],
"aggs": {}
}
Now my questions is how to combine those? I tried to use combined bool query with limit but when I search hl-2230, it only returns products and never returns any printers. As if "should" part is inactive and only must part is executed. because If I set "value" : 1 for the must query, I get 5 results (5 shards), "value" : 2, I get 10 results.
I am not sure if the limit query is the way to go also? Please advise me.
thanks.
{
"query": {
"bool": {
"should": [{
"filtered" : {
"filter" : {
"limit" : {
"value" : 20
}
},
"query": {
"multi_match": {
"type": "best_fields",
"query": "hl-2230",
"fields": [
"ManufactureNumber^5",
"Number^4",
"Name^3"
]
}
}
}
}],
"must": [{
"filtered" : {
"filter" : {
"limit" : {
"value" : 1
}
},
"query": {
"has_child": {
"type": "Printer",
"query": {
"match": {
"Name": "HL-2230"
}
}
}
}
}
}]
}
},
"from": 0,
"size": 20,
"sort": [],
"aggs": {}
}
PLease try this:
{
"query": {
"bool": {
"should": [
{
"multi_match": {
"type": "best_fields",
"query": "hl-2230",
"fields": [
"ManufactureNumber^5",
"Number^4",
"Name^3"
]
}
},
{
"has_child": {
"type": "Printer",
"query": {
"match": {
"Name": "HL-2230"
}
}
}
}
]
}
},
"size": 20,
"sort": [],
"aggs": {}
}
Hope this helps.

AND query in Elasticsearch

I'm trying to filter my query by 2 fields, but keep getting error. I'm using the AND query as suggested by Elasticsearch docs (it's actually a 'bool' query), here-
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-post-filter.html
GET /index_v1/user/_search
{
"query": {
"bool": {
"filter": {
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }}
}
}
}
}
This works-
GET /index_v1/user/_search
{
"query": {
"filtered": {
"query": {
"match": {
"id": "101"
}
}
}
}
}
and returns this-
{
"took": 24,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 3.442347,
"hits": [
{
"_index": "index_v1",
"_type": "user",
"_id": "1",
"_score": 3.442347,
"_source": {
"id": "101",
"firstName": "John",
"guid": "1001",
"lastName": "Doe",
"email": "john.doe#company.com",
"entitlements": {
"id": "en2"
}
}
},
{
"_index": "index_v1",
"_type": "user",
"_id": "2",
"_score": 3.140066,
"_source": {
"id": "101",
"firstName": "John",
"guid": "1001",
"lastName": "Doe",
"email": "john.doe#company.com",
"tenants": [
{
"id": "12345",
"roles": [
"PrimaryAdmin"
]
}
],
"entitlements": {
"id": "en2"
}
}
}
]
}
}
Here's the mapping document-
{
"index_v1": {
"mappings": {
"user": {
"properties": {
"email": {
"type": "string"
},
"entitlements": {
"properties": {
"id": {
"type": "string"
}
}
},
"firstName": {
"type": "string"
},
"guid": {
"type": "string"
},
"id": {
"type": "string"
},
"lastName": {
"type": "string"
},
"tenants": {
"properties": {
"id": {
"type": "string"
},
"roles": {
"type": "string"
}
}
}
}
}
}
}
}
Also, how can I add this to AND condition
["tenants"]["id"]="12345"
You have to run a filtered query to use filters. The relevant example you'll want is here.
GET /index_v1/user/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }},
{ "term": { "tenants.id": "12345" }}
]
}
}
}
}
That should be roughly it, though I'm sure you'll have to tweak it (I'm a little rusty).
In order for the id fields to match exactly, you'll want to set those fields to be analyzed as keywords in the mapping, otherwise ES will try to get smart with it and give you unexpected results.
The query posted by Nick Larson should work fine, but as far as exactly what is wrong with your query, you are using curly brackets where you should be using square brackets (it's actually invalid JSON syntax, in it's current form). "filter" should be an array, so you have to use square brackets:
GET /index_v1/user/_search
{
"query": {
"bool": {
"filter": [
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }}
]
}
}
}

Elasticsearch combined query and filter not giving correct resutls

I'm trying to make a search page with extra filter items, but i can't get my query to work how i want it.
Here's the query example:
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
i want only results that are from 2015. Searching for title 'Sense' comes up with nothing, even though there is a row with the title 'Sense8'. If i search for Sense8, it returns the correct data, but not 'Sense'.
What am i doing wrong?
Thanks
You probably need to use an ngram or edge ngram analyzer in your mapping. I wrote a blog post about using ngrams for autocomplete on the Qbox blog that goes through it some detail, but here is some code that might give you what you want:
PUT /test_index
{
"settings": {
"analysis": {
"filter": {
"ngram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"ngram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"year":{
"type": "string"
},
"title":{
"type": "string",
"index_analyzer": "ngram_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
}
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"year": "2015","title":"Sense8"}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"year": "2014","title":"Something else"}
POST /test_index/_search
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
...
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.30685282,
"_source": {
"year": "2015",
"title": "Sense8"
},
"sort": [
0.30685282
]
}
]
}
}
You can run the code in your browser here:
http://sense.qbox.io/gist/4f72c182db2017ac7d32077af16cbc3528cb79f0