ElasticSearch- How to limit size of the each combined query? - json

Here is my Mapping
{
"state":"open",
"settings":{
"index":{
"creation_date":"1453816191454",
"number_of_shards":"5",
"number_of_replicas":"1",
"version":{
"created":"1070199"
},
"uuid":"TfMJ4M0wQDedYSQuBz5BjQ"
}
},
"mappings":{
"Product":{
"properties":{
"index":"not_analyzed",
"store":true,
"type":"string"
},
"ProductName":{
"type":"nested",
"properties":{
"Name":{
"store":true,
"type":"string"
}
}
},
"ProductCode":{
"type":"string"
},
"Number":{
"index":"not_analyzed",
"store":true,
"type":"string"
},
"id":{
"index":"no",
"store":true,
"type":"integer"
},
"ShortDescription":{
"store":true,
"type":"string"
},
"Printer":{
"_routing":{
"required":true
},
"_parent":{
"type":"Product"
},
"properties":{
"properties":{
"RelativeUrl":{
"index":"no",
"store":true,
"type":"string"
}
}
},
"PrinterId":{
"index":"no",
"store":true,
"type":"integer"
},
"Name":{
"store":true,
"type":"string"
}
}
},
"aliases":[]
}
}
I would like to query mainly Products and if there products have 20 results, then return 20 products but if Products dont have any matching return printers+products having matching printers(childs)
When I execute this query, for key=tn-200, it returns 20 products and for key=hl-2230 returns me only printers. It works as expected. because hl-2230 doesnt have any products matching.
{
"query": {
"bool": {
"should": [{
"query_string": {
"default_field": "_all",
"query": "key"
}
}],
"must_not": [],
"must": []
}
},
"from": 0,
"size": 20,
"sort": [],
"aggs": {}
}
when I execute this query for hl-2230, it will return me products of matching hl-2230 printer. Also works as expected.
{
"query": {
"has_child": {
"type": "Printer",
"query": {
"match": {
"Name": "HL-2230"
}
}
}
},
"from": 0,
"size": 20,
"sort": [],
"aggs": {}
}
Now my questions is how to combine those? I tried to use combined bool query with limit but when I search hl-2230, it only returns products and never returns any printers. As if "should" part is inactive and only must part is executed. because If I set "value" : 1 for the must query, I get 5 results (5 shards), "value" : 2, I get 10 results.
I am not sure if the limit query is the way to go also? Please advise me.
thanks.
{
"query": {
"bool": {
"should": [{
"filtered" : {
"filter" : {
"limit" : {
"value" : 20
}
},
"query": {
"multi_match": {
"type": "best_fields",
"query": "hl-2230",
"fields": [
"ManufactureNumber^5",
"Number^4",
"Name^3"
]
}
}
}
}],
"must": [{
"filtered" : {
"filter" : {
"limit" : {
"value" : 1
}
},
"query": {
"has_child": {
"type": "Printer",
"query": {
"match": {
"Name": "HL-2230"
}
}
}
}
}
}]
}
},
"from": 0,
"size": 20,
"sort": [],
"aggs": {}
}

PLease try this:
{
"query": {
"bool": {
"should": [
{
"multi_match": {
"type": "best_fields",
"query": "hl-2230",
"fields": [
"ManufactureNumber^5",
"Number^4",
"Name^3"
]
}
},
{
"has_child": {
"type": "Printer",
"query": {
"match": {
"Name": "HL-2230"
}
}
}
}
]
}
},
"size": 20,
"sort": [],
"aggs": {}
}
Hope this helps.

Related

Error in Term Parsing in Elastic search question

I have the following query:
{
"aggs": {
"groupby": {
"terms": {
"field": "AMAZING LONG NAME THAT MAKES NO SENSE",
"missing": "",
"order": [
{
"_term": "asc"
}
],
"size": 10038
}
}
},
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"term": {
"match": {
"AMAZING LONG NAME THAT MAKES NO SENSE": "Term1"
}
}
}
]
}
}
]
}
},
"size": 10
}
And it raises a parsing_exception
{
"error": {
"root_cause": [
{
"type": "parsing_exception",
"reason": "[term] query does not support [AMAZING LONG NAME THAT MAKES NO SENSE]",
"line": 1,
"col": 235
}
],
"type": "x_content_parse_exception",
"reason": "[1:235] [bool] failed to parse field [filter]",
"caused_by": {
"type": "x_content_parse_exception",
"reason": "[1:235] [bool] failed to parse field [must]",
"caused_by": {
"type": "parsing_exception",
"reason": "[term] query does not support [AMAZING LONG NAME THAT MAKES NO SENSE]",
"line": 1,
"col": 235
}
}
},
"status": 400
}
My question is should it be the field name that is to be entered in match?
The Term query syntax can be corrected as belwo :
POST demoindex/_search
{
"aggs": {
"groupby": {
"terms": {
"field": "AMAZING LONG NAME THAT MAKES NO SENSE",
"missing": "",
"order": [
{
"_term": "asc"
}
],
"size": 10038
}
}
},
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"term": {
"AMAZING LONG NAME THAT MAKES NO SENSE": {
"value": "Term1"
}
}
}
]
}
}
]
}
},
"size": 10
}
Term query syntax is as belwo:
query -> term -> fieldname(to perform exact match on)--> value

Elasticsearch: Aggregation of null fields in a facet bucket

I'm trying to implement facets with a date range aggregation in the current version of Amazon Elasticsearch Service (version 7.10). The key for what I want the article documents to group for, is publishedAt, what is a date. I want one bucket, where publishedAt is in the past, which means, it is published, one where it is in the future, which means scheduled and one for all articles without a publishedAt, which are drafts. published and scheduled are working as they should. For drafts I can't enter a filter or date range as they are null. So I want to make use of the "Missing Values" feature. This should treat the documents with publishedAt = null like to have the date given in the missing field. Unfortunately it has no effect on the results. Even if I change the date of missing to let it match with published or scheduled.
My request:
GET https://es.amazonaws.com/articles/_search
{
"size": 10,
"aggs": {
"facet_bucket_all": {
"aggs": {
"channel": {
"terms": {
"field": "channel.keyword",
"size": 5
}
},
"brand": {
"terms": {
"field": "brand.keyword",
"size": 5
}
},
"articleStatus": {
"date_range": {
"field": "publishedAt",
"format": "dd-MM-yyyy",
"missing": "01-07-1886",
"ranges": [
{ "key": "published", "from": "now-99y/M", "to": "now/M" },
{ "key": "scheduled", "from": "now+1s/M", "to": "now+99y/M" },
{ "key": "drafts", "from": "01-01-1886", "to": "31-12-1886" }
]
}
}
},
"filter": {
"bool": {
"must": []
}
}
},
"facet_bucket_publishedAt": {
"aggs": {},
"filter": {
"bool": {
"must": []
}
}
},
"facet_bucket_author": {
"aggs": {
"author": {
"terms": {
"field": "author",
"size": 10
}
}
},
"filter": {
"bool": {
"must": []
}
}
}
},
"query": {
"bool": {
"filter": [
{
"range": {
"publishedAt": {
"lte": "2021-08-09T09:52:19.975Z"
}
}
}
]
}
},
"from": 0,
"sort": [
{
"_score": "desc"
}
]
}
And in the result, the drafts are empty:
"articleStatus": {
"buckets": [
{
"key": "published",
"from": -1.496448E12,
"from_as_string": "01-08-1922",
"to": 1.627776E12,
"to_as_string": "01-08-2021",
"doc_count": 47920
},
{
"key": "scheduled",
"from": 1.627776E12,
"from_as_string": "01-08-2021",
"to": 4.7519136E12,
"to_as_string": "01-08-2120",
"doc_count": 3
},
{
"key": "drafts",
"from": 1.67252256E13,
"from_as_string": "01-01-1886",
"to": 1.67566752E13,
"to_as_string": "31-12-1886",
"doc_count": 0
}
]
}
SearchKit added this part to the query:
"query": {
"bool": {
"filter": [
{
"range": {
"publishedAt": {
"lte": "2021-08-09T09:52:19.975Z"
}
}
}
]
}
}
This had to be removed, because it filters out null values, before the missing filter makes its job.
Now I get the correct result:
"articleStatus": {
"buckets": [
{
"key": "drafts",
"from": -2.650752E12,
"from_as_string": "01-01-1886",
"to": -2.6193024E12,
"to_as_string": "31-12-1886",
"doc_count": 7
},
{
"key": "published",
"from": -1.496448E12,
"from_as_string": "01-08-1922",
"to": 1.627776E12,
"to_as_string": "01-08-2021",
"doc_count": 47920
},
{
"key": "scheduled",
"from": 1.627776E12,
"from_as_string": "01-08-2021",
"to": 4.7519136E12,
"to_as_string": "01-08-2120",
"doc_count": 3
}
]
}

Json object in URI

I am trying to query my elastic search server in python, If I hard coded the url and query string like below in my python (search()) script working fine no issues. If I want to store those url and query in a separate xml file (property file). I'm not getting the results instead I got the below error:
text '{"error":{"root_cause":[{"type":"parsing_exception","reason":"Expected [START_OBJECT] but found [VALUE_STRING]","line":1,"col":1}],"type":"parsing_exception","reason":"Expected [START_OBJECT] but found [VALUE_STRING]","line":1,"col":1},"status":400}' str
Here is my code I am using
def search():
url="http://0.0.0.0/logstash-pse*/_search/"
query={ "size": 0, "aggs": { "2": { "date_histogram": { "field": "#timestamp", "interval": "30m", "time_zone": "America/Chicago", "min_doc_count": 1 }, "aggs": { "3": { "terms": { "field": "queryname.keyword", "size": 100, "order": { "1.90": "desc" } }, "aggs": { "1": { "percentiles": { "field": "queryResponseTime", "percents": [ 90 ], "keyed": "false" } } } } } } }, "query": { "bool": { "must": [ { "query_string": { "query": "path: \"/store_locator/\"", "analyze_wildcard": "true" } }, { "query_string": { "analyze_wildcard": "true", "query": "*" } }, { "range": { "#timestamp": { "gte": 1527181463371, "lte": 1527267863371, "format": "epoch_millis" } } } ], "must_not": [] } }, "highlight": { "pre_tags": [ "#kibana-highlighted-field#" ], "post_tags": [ "#/kibana-highlighted-field#" ], "fields": { "*": { "highlight_query": { "bool": { "must": [ { "query_string": { "query": "path: \"/store_locator/\"", "analyze_wildcard": "true", "all_fields": "true" } }, { "query_string": { "analyze_wildcard": "true", "query": "*", "all_fields": "true" } }, { "range": { "#timestamp": { "gte": 1527181463371, "lte": 1527267863371, "format": "epoch_millis" } } } ], "must_not": [] } } } }, "fragment_size": 2147483647 }, "_source": { "excludes": [] }, "version": "true"}
response = requests.post(url, auth=(user, password), verify=False,json=query)
XML property file I am using like the below:
<custom>
<url>the above url goes here</url>
<query> above query </query>
</custom>
Any idea what I am missing?, Much appreciated
Able to figure it out with few exercise on my own. In case if someone is looking for:
I just used the below
response = requests.post(url, auth=(user, password), verify=False,json=json.loads(query))

AND query in Elasticsearch

I'm trying to filter my query by 2 fields, but keep getting error. I'm using the AND query as suggested by Elasticsearch docs (it's actually a 'bool' query), here-
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-post-filter.html
GET /index_v1/user/_search
{
"query": {
"bool": {
"filter": {
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }}
}
}
}
}
This works-
GET /index_v1/user/_search
{
"query": {
"filtered": {
"query": {
"match": {
"id": "101"
}
}
}
}
}
and returns this-
{
"took": 24,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 3.442347,
"hits": [
{
"_index": "index_v1",
"_type": "user",
"_id": "1",
"_score": 3.442347,
"_source": {
"id": "101",
"firstName": "John",
"guid": "1001",
"lastName": "Doe",
"email": "john.doe#company.com",
"entitlements": {
"id": "en2"
}
}
},
{
"_index": "index_v1",
"_type": "user",
"_id": "2",
"_score": 3.140066,
"_source": {
"id": "101",
"firstName": "John",
"guid": "1001",
"lastName": "Doe",
"email": "john.doe#company.com",
"tenants": [
{
"id": "12345",
"roles": [
"PrimaryAdmin"
]
}
],
"entitlements": {
"id": "en2"
}
}
}
]
}
}
Here's the mapping document-
{
"index_v1": {
"mappings": {
"user": {
"properties": {
"email": {
"type": "string"
},
"entitlements": {
"properties": {
"id": {
"type": "string"
}
}
},
"firstName": {
"type": "string"
},
"guid": {
"type": "string"
},
"id": {
"type": "string"
},
"lastName": {
"type": "string"
},
"tenants": {
"properties": {
"id": {
"type": "string"
},
"roles": {
"type": "string"
}
}
}
}
}
}
}
}
Also, how can I add this to AND condition
["tenants"]["id"]="12345"
You have to run a filtered query to use filters. The relevant example you'll want is here.
GET /index_v1/user/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }},
{ "term": { "tenants.id": "12345" }}
]
}
}
}
}
That should be roughly it, though I'm sure you'll have to tweak it (I'm a little rusty).
In order for the id fields to match exactly, you'll want to set those fields to be analyzed as keywords in the mapping, otherwise ES will try to get smart with it and give you unexpected results.
The query posted by Nick Larson should work fine, but as far as exactly what is wrong with your query, you are using curly brackets where you should be using square brackets (it's actually invalid JSON syntax, in it's current form). "filter" should be an array, so you have to use square brackets:
GET /index_v1/user/_search
{
"query": {
"bool": {
"filter": [
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }}
]
}
}
}

Elasticsearch combined query and filter not giving correct resutls

I'm trying to make a search page with extra filter items, but i can't get my query to work how i want it.
Here's the query example:
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
i want only results that are from 2015. Searching for title 'Sense' comes up with nothing, even though there is a row with the title 'Sense8'. If i search for Sense8, it returns the correct data, but not 'Sense'.
What am i doing wrong?
Thanks
You probably need to use an ngram or edge ngram analyzer in your mapping. I wrote a blog post about using ngrams for autocomplete on the Qbox blog that goes through it some detail, but here is some code that might give you what you want:
PUT /test_index
{
"settings": {
"analysis": {
"filter": {
"ngram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"ngram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"year":{
"type": "string"
},
"title":{
"type": "string",
"index_analyzer": "ngram_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
}
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"year": "2015","title":"Sense8"}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"year": "2014","title":"Something else"}
POST /test_index/_search
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
...
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.30685282,
"_source": {
"year": "2015",
"title": "Sense8"
},
"sort": [
0.30685282
]
}
]
}
}
You can run the code in your browser here:
http://sense.qbox.io/gist/4f72c182db2017ac7d32077af16cbc3528cb79f0