Elasticsearch aggregation - json

I'm looking how to merge elasticsearch hits into one. Basicly I want to do the equivalent of
SELECT COUNT(*) WHERE TOTO = 1 AND TATA = 2 AND TITI = 3
I've manage to deal with the WHERE part using filters but I get all the hits in separate entities
so what I have is
SELECT TOTO, TATA, TITI WHERE TOTO = 1 AND TATA =2 AND TATA =3
How do I manage to get only one hit containing the COUNT(*) value ?
Environment
{
"my_element": {
"mappings": {
"test": {
"properties": {
"baskets": {
"type": "nested",
"properties": {
"basket_id": {
"type": "string"
},
"num_basket": {
"type": "integer"
},
"tp_basket": {
"type": "string"
}
}
},
"test_id": {
"type": "string"
},
"test_name": {
"type": "string"
}
}
}
}
}
}
so I want to count how many test element have a basket_id of X and a num_basket of 3 (if they do that means baskets are Identical, so I want also to show baskets field)
{
"fields": [
"bucket_list",
"baskets.basket_id",
"baskets.num_basket"
],
"query": {
"filtered": {
"filter": {
"and": {
"filter": [
{
"nested": {
"path": "baskets",
"filter": {
"and": {
"filters": [
{
"or": {
"filters": [
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "40"
}
},
{
"term": {
"baskets.num_basket": "1"
}
},
{
"term": {
"baskets.tp_basket": "1"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "41"
}
},
{
"term": {
"baskets.num_basket": "1"
}
},
{
"term": {
"baskets.tp_basket": "1"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "342"
}
},
{
"term": {
"baskets.num_basket": "1"
}
},
{
"term": {
"baskets.tp_basket": "1"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "342"
}
},
{
"term": {
"baskets.num_basket": "1"
}
},
{
"term": {
"baskets.tp_basket": "1"
}
}
]
}
}
]
}
}
]
}
}
}
},
{
"nested": {
"path": "baskets",
"filter": {
"and": {
"filters": [
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "15"
}
},
{
"term": {
"baskets.num_basket": "2"
}
}
]
}
}
]
}
}
}
},
{
"nested": {
"path": "baskets",
"filter": {
"and": {
"filters": [
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "15"
}
},
{
"term": {
"baskets.num_basket": "3"
}
}
]
}
}
]
}
}
}
}
]
}
}
}
}
}
and result
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 2,
"successful": 2,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "my_element",
"_type": "test",
"_id": "sMHPv3i4RTqNCIChGh4Iew",
"_score": 1,
"fields": {
"baskets.basket_id": [
"15",
"15",
"15"
],
"baskets.num_basket": [
2,
3,
1
]
}
},
{
"_index": "my_element",
"_type": "test",
"_id": "KL3U-g-7RtuusNV8hi9YHQ",
"_score": 1,
"fields": {
"baskets.basket_id": [
"15",
"15",
"15"
],
"baskets.num_basket": [
1,
2,
3
]
}
}
]
}
}

Probably I am answering really late, but for those who are looking for the answer. This query can be written in very simple way, as follows.
GET _count
{
"query":
{
"bool" : {
"must" : [ {
"term" : {
"basket_id" : "X"
}
}, {
"term" : {
"num_basket" : 3
}
} ]
}
}
}
The above query will give you following result.
{
"count": 6,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
}
}
You can read the count from above response, which is your desired result.
Hope this was helpful.

When the search results are returned from Elasticsearch you should see a total field - this has the count of matching documents.
for example, see "total" : 2, below:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 12,
"successful" : 12,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 2.098612,
"hits" : [ {
"_index" : "mytest",
"_type" : "message",
"_id" : "P9wGgJHjQmK8GUvw8M5Q8A",
"_score" : 2.098612,
"fields" : {
"tata" : [ "1" ],
"toto" : [ "1" ],
"titi" : [ "2" ]
}
}, {
"_index" : "mytest",
"_type" : "message",
"_id" : "M26ychoyRR6HkordRdS_HA",
"_score" : 0.30685282,
"fields" : {
"tata" : [ "1" ],
"toto" : [ "1" ],
"titi" : [ "2" ]
}
} ]
}
}

Related

Elasticsearch returns broken HTML tags when I use Highlighting

I have an HTML string in content like :
"content": "<h3>The Matrix has you </h3>follow the white rabbit."
I use "fragment_size" : 150 to control the size of the highlighted fragment in characters ,but it returns a substring with broken HTML tags :
"highlight": {
"content": [
"/%D8%A2%D8%B2%D8%A7%D8%AF">The <em>Matrix</em> has"
]
}
How can I fix it in query DSL based on JSON?
{
"query": {
"filtered": {
"query": {
"multi_match": {
"query": "matrix",
"fields": ["title","content"]
}
},
"filter": {
"term": { "content_type": "page" }
}
}
},
"highlight" : {
"order" : "score",
"fields" : {
"content" : {"fragment_size" : 150, "number_of_fragments" : 3}
}
}
}
And here is a sample response:
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.98773545,
"hits": [
{
"_index": "myindex",
"_type": "post",
"_id": "101",
"_score": 0.024953224,
"_source": {
"ID": 101,
"content_type": "page",
"date": "1999-02-18 14:32:21",
"title": "Wake up, Neo",
"content": "<h3>The Matrix has you </h3>follow the white rabbit."
},
"highlight": {
"content": [
"/%D8%A2%D8%B2%D8%A7%D8%AF">the <em>matrix</em> has"
]
}
}
]
}
}
I haven't tried it but I think you should specify the encoderin the highlight part to html.
{
"query": {
"filtered": {
"query": {
"multi_match": {
"query": "matrix",
"fields": ["title","content"]
}
},
"filter": {
"term": { "content_type": "page" }
}
}
},
"highlight" : {
"order" : "score",
"fields" : {
"content" : {"fragment_size" : 150, "number_of_fragments" : 3}
},
"encoder": "html"
}
}
See: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-highlighting.html

Elasticsearch filtering nested object

I have a simplified object that looks something like this:
"name" : "Partner Name",
"features" : [
{
"val" : "Family",
"key" : "Type"
},
{
"val" : "Paris",
"key" : "City"
}
],
"variants" : [
{
"name" : "Activity 1 Name",
"description" : "Quick description",
"price" : 20
}
]
I want to filter by the City and Type keys. My current query filters by price but I can't get it working for City or Type. Adding more terms to the filter array didn't do the trick.
'query':{
'filtered':{
'query':{
'query_string':{
'query':query
}
},
'filter': {
'bool':{
'filter': [{
'range': {
'variants.price': {
'gte': 0
}
}
},
{
'range': {
'variants.price': {
'lte': 50
}
}
},
{
'term': {
'active': true
}
}
]
}
}
}
}
Any help would be appreciated. Thanks!
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"filter": [
{
"range": {
"variants.price": {
"gte": 0
}
}
},
{
"range": {
"variants.price": {
"lte": 50
}
}
},
{
"nested": {
"path": "features",
"query": {
"bool": {
"should": [
{"term":{"features.key":"type"}},
{"term":{"features.key":"city"}}
]
}
}
}
}
]
}
}
}
}
}

Elastic Search Extracting Inner Elements

I am using elastic search with Scala and REST and have the following data structure: (as a JSON input file)
{
"bookTitle" : "textbook",
"bookAuthors" : [
{
"authorId" : "01",
"authorName" : "author1"
},
{
"authorId" : "02",
"authorName" : "author2"
},
]
}
The data mappings used by this collection:
{
"properties" : {
"book": {
"properties": {
"bookTitle": {
"type": "string"
},
"bookAuthors": {
"type": "nested",
"properties": {
"authorId ": {
"type":"string"
},
"authorName" : {
"type": "string"
}
}
}
}
}
}
}
I would like to be able to query by the author id and get only the single author that matches. Up until now I have managed to query by the authorId but I keep getting the entire book document with both authors being displayed; I also tried selecting only the fields specific to the bookAuthors to be displayed, but the results were the same.
Current situation:
get the author name where the authorId is 01 => returns [author1,author2]
Required Query:
get the author name where the authorId is 01 => return [author1]
In elasticsearch 1.5.2 you could achieve this using inner hits
For example:
put mybooks
{
"mappings": {
"book": {
"properties": {
"bookTitle": {
"type": "string"
},
"bookAuthors": {
"type": "nested",
"properties": {
"authorId ": {
"type": "string"
},
"authorName": {
"type": "string"
}
}
}
}
}
}
}
2) Index Documents
put mybooks/book/1
{
"bookTitle": "book1",
"bookAuthors": [
{
"authorId": "01",
"authorName": "author1"
},
{
"authorId": "02",
"authorName": "author2"
}
]
}
put mybooks/book/2
{
"bookTitle" : "book2",
"bookAuthors" : [
{
"authorId" : "03",
"authorName" : "author1"
},
{
"authorId" : "02",
"authorName" : "author2"
}
]
}
3)Query
post mybooks/_search
{
"_source": [
"bookTitle"
],
"query": {
"nested": {
"path": "bookAuthors",
"query": {
"match": {
"bookAuthors.authorId": "02"
}
},
"inner_hits": {
"_source" :["authorName"]
}
}
}
}
4) Result
"hits": [
{
"_index": "mybooks",
"_type": "book",
"_id": "1",
"_score": 1.4054651,
"_source": {
"bookTitle": "book1"
},
"inner_hits": {
"bookAuthors": {
"hits": {
"total": 1,
"max_score": 1.4054651,
"hits": [
{
"_index": "mybooks",
"_type": "book",
"_id": "1",
"_nested": {
"field": "bookAuthors",
"offset": 1
},
"_score": 1.4054651,
"_source": {
"authorName": "author2"
}
}
]
}
}
}
},
{
"_index": "mybooks",
"_type": "book",
"_id": "2",
"_score": 1.4054651,
"_source": {
"bookTitle": "book2"
},
"inner_hits": {
"bookAuthors": {
"hits": {
"total": 1,
"max_score": 1.4054651,
"hits": [
{
"_index": "mybooks",
"_type": "book",
"_id": "2",
"_nested": {
"field": "bookAuthors",
"offset": 1
},
"_score": 1.4054651,
"_source": {
"authorName": "author2"
}
}
]
}
}
}
}
]

Elasticsearch combined query and filter not giving correct resutls

I'm trying to make a search page with extra filter items, but i can't get my query to work how i want it.
Here's the query example:
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
i want only results that are from 2015. Searching for title 'Sense' comes up with nothing, even though there is a row with the title 'Sense8'. If i search for Sense8, it returns the correct data, but not 'Sense'.
What am i doing wrong?
Thanks
You probably need to use an ngram or edge ngram analyzer in your mapping. I wrote a blog post about using ngrams for autocomplete on the Qbox blog that goes through it some detail, but here is some code that might give you what you want:
PUT /test_index
{
"settings": {
"analysis": {
"filter": {
"ngram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"ngram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"year":{
"type": "string"
},
"title":{
"type": "string",
"index_analyzer": "ngram_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
}
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"year": "2015","title":"Sense8"}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"year": "2014","title":"Something else"}
POST /test_index/_search
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
...
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.30685282,
"_source": {
"year": "2015",
"title": "Sense8"
},
"sort": [
0.30685282
]
}
]
}
}
You can run the code in your browser here:
http://sense.qbox.io/gist/4f72c182db2017ac7d32077af16cbc3528cb79f0

How can I get all the documents which have at least the properties specified in an Elasticsearch query?

It is possible to select an item from the index which match multiple values for a certain sub-item? I think this is not so clear but I added more details below.
I have the following index:
{
"mappings" : {
"entity" : {
"properties" : {
"name" : {"type" : "string"},
"features" : {
"type" : "nested",
"include_in_parent" : false,
"properties" : {
"id" : {"type" : "integer"},
"value_int" : {"type" : "integer"},
"value_text" : {"type" : "string"},
"value_decimal" : {"type" : "integer"}
}
}
}
}
},
"settings" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
}
}
Some items from the index
{
"name" : "Bazar",
"features" : [
{
"id" : 1,
"value_text" : null,
"value_decimal" : null,
"value_int": 51
},
{
"id" : 9,
"value_text" : "Amsterdam",
"value_decimal" : null,
"value_int": null
}
]
}
{
"name" : "Bazar Test",
"features" : [
{
"id" : 1,
"value_text" : null,
"value_decimal" : null,
"value_int": 52
},
{
"id" : 9,
"value_text" : "Leiden",
"value_decimal" : null,
"value_int": null
}
]
}
{
"name" : "Bazar no city",
"features" : [
{
"id" : 1,
"value_text" : null,
"value_decimal" : null,
"value_int": 51
},
]
}
What I need is a way to find just the items which have the features.id = 1 and features.id = 2 (ex: "Bazar" and "Bazar Test" items).
The query I got some far is
{
"query" : {
"nested" : {
"path" : "features",
"query" : {
"bool" : {
"must" : [
{ "terms" : { "features.id" : [1, 9]} }
]
}
}
}
}
}
The problem with this query is that it selects the items which have features.id = 1 OR features.id = 9 so all the items are returned.
Edit
Tried a new query
{
"query" : {
"nested" : {
"path" : "features",
"query" : {
"bool" : {
"must" : [
{ "terms" : {
"features.id" : [1, 9],
"minimum_should_match": 2
}
}
]
}
}
}
}
}
But I got no results.
Edit:
After I combined the answers, I managed to get it working.
Thank you for help :)
This is my query (a bit modified)
{
"from": 0,
"size": 20,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"match_phrase_prefix": {
"title": {
"query": "deli",
"max_expansions": 5
}
}
},
{
"match": {
"entity_type_id": 5
}
}
]
}
},
"filter": {
"and": {
"filters": [
{
"nested": {
"path": "features",
"query": {
"bool": {
"must": [
{
"match": {
"features.id": 31
}
},
{
"match": {
"features.value_int": {
"query": [
56, 57
],
"operator": "and"
}
}
}
]
}
}
}
}
]
}
}
}
}
}
Thank you.
The match query supports a Boolean operator parameter. You should also wrap the query in a nested query, as the features field is nested in your mapping.
Try this query:
{
"query": {
"nested": {
"query": {
"match": {
"features.id": {
"query": "1 9",
"operator": "and"
}
}
},
"path": "features"
}
}
}
Nested documents are more difficult to query. This should be what you want:
{
"query": {
"filtered": {
"filter": {
"and": {
"filters": [
{
"nested": {
"path": "features",
"query": {
"term": {
"features.id": {
"value": "1"
}
}
}
}
},
{
"nested": {
"path": "features",
"query": {
"term": {
"features.id": {
"value": "9"
}
}
}
}
}
]
}
}
}
}
}