Elastic Search Extracting Inner Elements - json

I am using elastic search with Scala and REST and have the following data structure: (as a JSON input file)
{
"bookTitle" : "textbook",
"bookAuthors" : [
{
"authorId" : "01",
"authorName" : "author1"
},
{
"authorId" : "02",
"authorName" : "author2"
},
]
}
The data mappings used by this collection:
{
"properties" : {
"book": {
"properties": {
"bookTitle": {
"type": "string"
},
"bookAuthors": {
"type": "nested",
"properties": {
"authorId ": {
"type":"string"
},
"authorName" : {
"type": "string"
}
}
}
}
}
}
}
I would like to be able to query by the author id and get only the single author that matches. Up until now I have managed to query by the authorId but I keep getting the entire book document with both authors being displayed; I also tried selecting only the fields specific to the bookAuthors to be displayed, but the results were the same.
Current situation:
get the author name where the authorId is 01 => returns [author1,author2]
Required Query:
get the author name where the authorId is 01 => return [author1]

In elasticsearch 1.5.2 you could achieve this using inner hits
For example:
put mybooks
{
"mappings": {
"book": {
"properties": {
"bookTitle": {
"type": "string"
},
"bookAuthors": {
"type": "nested",
"properties": {
"authorId ": {
"type": "string"
},
"authorName": {
"type": "string"
}
}
}
}
}
}
}
2) Index Documents
put mybooks/book/1
{
"bookTitle": "book1",
"bookAuthors": [
{
"authorId": "01",
"authorName": "author1"
},
{
"authorId": "02",
"authorName": "author2"
}
]
}
put mybooks/book/2
{
"bookTitle" : "book2",
"bookAuthors" : [
{
"authorId" : "03",
"authorName" : "author1"
},
{
"authorId" : "02",
"authorName" : "author2"
}
]
}
3)Query
post mybooks/_search
{
"_source": [
"bookTitle"
],
"query": {
"nested": {
"path": "bookAuthors",
"query": {
"match": {
"bookAuthors.authorId": "02"
}
},
"inner_hits": {
"_source" :["authorName"]
}
}
}
}
4) Result
"hits": [
{
"_index": "mybooks",
"_type": "book",
"_id": "1",
"_score": 1.4054651,
"_source": {
"bookTitle": "book1"
},
"inner_hits": {
"bookAuthors": {
"hits": {
"total": 1,
"max_score": 1.4054651,
"hits": [
{
"_index": "mybooks",
"_type": "book",
"_id": "1",
"_nested": {
"field": "bookAuthors",
"offset": 1
},
"_score": 1.4054651,
"_source": {
"authorName": "author2"
}
}
]
}
}
}
},
{
"_index": "mybooks",
"_type": "book",
"_id": "2",
"_score": 1.4054651,
"_source": {
"bookTitle": "book2"
},
"inner_hits": {
"bookAuthors": {
"hits": {
"total": 1,
"max_score": 1.4054651,
"hits": [
{
"_index": "mybooks",
"_type": "book",
"_id": "2",
"_nested": {
"field": "bookAuthors",
"offset": 1
},
"_score": 1.4054651,
"_source": {
"authorName": "author2"
}
}
]
}
}
}
}
]

Related

acronyms on Elastic Search request

Please, I have a request on Elastic search and I'm trying to add a list of acronyms (or synonyms) inside the request. But I can't figure out where to place it.
Let's say the synonymes list is {'HR': 'Human Ressources", "AWS": "Amazon Web Service"}
The request is the following:
{
"query": {
"bool": {
"filter": [
{
"terms": {
"observatory": [
"rome",
"meban",
"emass",
"cigref",
"opiiec",
"null"
]
}
},
{
"terms": {
"referentiel_id": [
"null",
42,
48,
52
]
}
}
],
"must": {
"match": {
"skill": {
"query": "*dactif*",
"fuzziness": "AUTO"
}
}
}
}
}
}
You can use synonym token filter to handle synonyms in your search query
Adding a working example with index data, mapping, search query and search result
Index Mapping:
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"synonym"
]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"HR, Human Ressources",
"AWS, Amazon Web Service"
]
}
}
}
}
},
"mappings": {
"properties": {
"observatory": {
"type": "text",
"analyzer": "synonym"
}
}
}
}
Index Data:
{
"observatory":"HR"
}
{
"observatory":"Human Ressources"
}
Search Query:
{
"query": {
"bool": {
"should": [
{
"match": {
"observatory": {
"query": "HR"
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "67707925",
"_type": "_doc",
"_id": "1",
"_score": 0.487735,
"_source": {
"observatory": "Human Ressources"
}
},
{
"_index": "67707925",
"_type": "_doc",
"_id": "2",
"_score": 0.487735,
"_source": {
"observatory": "HR"
}
}
]

How to match on multiple fields per array item in elastic search

I am trying to create an elastic search query to match multiple fields inside of an object inside of an array.
For example, the Elastic Search structure I am querying against is similar to the following:
"hits": [
{
"_index": "titles",
"_type": "title",
...
"_source": {
...
"genres": [
{
"code": "adventure",
"priority": 1
},
{
"code": "action",
"priority": 2
},
{
"code": "horror",
"priority": 3
}
],
...
},
...
]
And what I am trying to do is match on titles with specific genre/priority pairings. For example, I am trying to match all titles with code=action and priority=1, but my query is returning too many results. The above title is hit during this example due to the fact that the genre list contains both a genre with code=action AND another genre that matches priority=1. My query is similar to the following:
"query": {
"bool": {
"filter": [
{
"bool": {
"must":[
{"term": {
"genres.code": {
"value": "action",
"boost": 1.0
}
}},
{"term": {
"genres.priority": {
"value": 1,
"boost": 1.0
}
}}
]
}
},
...
}
Is there any way to form the query in order to match a title with a single genre containing both priority=1 AND code=action?
I have recreated your problem. I added the following mapping
PUT titles
{
"mappings": {
"title": {
"properties": {
"author": {
"type": "text"
},
"genres": {
"type": "nested"
}
}
}
}
}
Then I added values to the index. This was what was inserted
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "titles",
"_type": "title",
"_id": "2",
"_score": 1,
"_source": {
"author": "Author 1",
"genres": [
{
"code": "adventure",
"priority": 2
},
{
"code": "action",
"priority": 3
},
{
"code": "horror",
"priority": 1
}
]
}
},
{
"_index": "titles",
"_type": "title",
"_id": "1",
"_score": 1,
"_source": {
"author": "Author 2",
"genres": [
{
"code": "adventure",
"priority": 3
},
{
"code": "action",
"priority": 1
},
{
"code": "horror",
"priority": 2
}
]
}
},
{
"_index": "titles",
"_type": "title",
"_id": "3",
"_score": 1,
"_source": {
"author": "Author 3",
"genres": [
{
"code": "adventure",
"priority": 3
},
{
"code": "action",
"priority": 1
},
{
"code": "horror",
"priority": 2
}
]
}
}
]
}
My query is:
GET titles/title/_search
{
"query": {
"nested": {
"path": "genres",
"query": {
"bool": {
"must": [
{
"term": {
"genres.code": {
"value": "horror"
}
}
},
{
"term": {
"genres.priority": {
"value": 1
}
}
}
]
}
}
}
}
}
The query returns
"_source": {
"author": "Author 1",
"genres": [
{
"code": "adventure",
"priority": 2
},
{
"code": "action",
"priority": 3
},
{
"code": "horror",
"priority": 1
}
]
}
This title is the only one that has code = 'horror' and priority = 1.

AND query in Elasticsearch

I'm trying to filter my query by 2 fields, but keep getting error. I'm using the AND query as suggested by Elasticsearch docs (it's actually a 'bool' query), here-
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-post-filter.html
GET /index_v1/user/_search
{
"query": {
"bool": {
"filter": {
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }}
}
}
}
}
This works-
GET /index_v1/user/_search
{
"query": {
"filtered": {
"query": {
"match": {
"id": "101"
}
}
}
}
}
and returns this-
{
"took": 24,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 3.442347,
"hits": [
{
"_index": "index_v1",
"_type": "user",
"_id": "1",
"_score": 3.442347,
"_source": {
"id": "101",
"firstName": "John",
"guid": "1001",
"lastName": "Doe",
"email": "john.doe#company.com",
"entitlements": {
"id": "en2"
}
}
},
{
"_index": "index_v1",
"_type": "user",
"_id": "2",
"_score": 3.140066,
"_source": {
"id": "101",
"firstName": "John",
"guid": "1001",
"lastName": "Doe",
"email": "john.doe#company.com",
"tenants": [
{
"id": "12345",
"roles": [
"PrimaryAdmin"
]
}
],
"entitlements": {
"id": "en2"
}
}
}
]
}
}
Here's the mapping document-
{
"index_v1": {
"mappings": {
"user": {
"properties": {
"email": {
"type": "string"
},
"entitlements": {
"properties": {
"id": {
"type": "string"
}
}
},
"firstName": {
"type": "string"
},
"guid": {
"type": "string"
},
"id": {
"type": "string"
},
"lastName": {
"type": "string"
},
"tenants": {
"properties": {
"id": {
"type": "string"
},
"roles": {
"type": "string"
}
}
}
}
}
}
}
}
Also, how can I add this to AND condition
["tenants"]["id"]="12345"
You have to run a filtered query to use filters. The relevant example you'll want is here.
GET /index_v1/user/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }},
{ "term": { "tenants.id": "12345" }}
]
}
}
}
}
That should be roughly it, though I'm sure you'll have to tweak it (I'm a little rusty).
In order for the id fields to match exactly, you'll want to set those fields to be analyzed as keywords in the mapping, otherwise ES will try to get smart with it and give you unexpected results.
The query posted by Nick Larson should work fine, but as far as exactly what is wrong with your query, you are using curly brackets where you should be using square brackets (it's actually invalid JSON syntax, in it's current form). "filter" should be an array, so you have to use square brackets:
GET /index_v1/user/_search
{
"query": {
"bool": {
"filter": [
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }}
]
}
}
}

How to query nested structure in elasticsearch

Below are two mocked records from my elasticsearch index. I have millions of records in my ES. I am trying to query ES to get all the records that have non-empty/ non-null "tags" field. If a record doesn't have a tag ( like second record below) then I don't want to pull it from ES.
If "books" were not nested then googling around seems like the below query would have worked -
curl -XGET 'host:port/book_indx/book/_search?' -d '{
"query" : {"filtered" : {"filter" : {"exists" :{"field" : "_source"}}}}
}'
However I am not finding a solution to query the nested structure. I tried the below with no luck -
{"query" : {"filtered" : {"filter" : {"exists" :{"field" : "_source.tags"}}}}}
{"query" : {"filtered" : {"filter" : {"exists" :{"field" : "_source":{"tags"}}}}}}
Any suggestions are really appreciated here! Thanks in advance.
{
"_shards": {
"failed": 0,
"successful": 12,
"total": 12
},
"hits": {
"hits": [
{
"_id": "book1",
"_index": "book",
"_source": {
"book_name": "How to Get Organized",
"publication_date": "2014-02-24T16:50:39+0000",
"tags": [
{
"category": "self help",
"topics": [
{
"name": "time management",
"page": 6198
},
{
"name": "calendar",
"page": 10
}
],
"id": "WEONWOIR234LI",
}
],
"last_updated": "2015-11-11T16:28:32.308+0000"
},
"_type": "book"
},
{
"_id": "book2",
"_index": "book",
"_source": {
"book_name": "How to Cook",
"publication_date": "2014-02-24T16:50:39+0000",
"tags": [],
"last_updated": "2015-11-11T16:28:32.308+0000"
},
"_type": "book"
}
],
"total": 1
},
"timed_out": false,
"took": 80
}
Mapping -
"book": {
"_id": {
"path": "message_id"
},
"properties": {
"book_name": {
"index": "not_analyzed",
"type": "string"
},
"publication_date": {
"format": "date_time||date_time_no_millis",
"type": "date"
},
"tags": {
"properties": {
"category": {
"index": "not_analyzed",
"type": "string"
},
"topic": {
"properties": {
"name": {
"index": "not_analyzed",
"type": "string"
},
"page": {
"index": "no",
"type": "integer"
}
}
},
"id": {
"index": "not_analyzed",
"type": "string"
}
},
"type": "nested"
},
"last_updated": {
"format": "date_time||date_time_no_millis",
"type": "date"
}
}
}
Since your tags field has a nested type, you need to use a nested filter in order to query it.
The following filtered query will correctly return only the first document above (i.e. with id book1)
{
"query": {
"filtered": {
"filter": {
"nested": {
"path": "tags",
"filter": {
"exists": {
"field": "tags"
}
}
}
}
}
}
}

Elasticsearch combined query and filter not giving correct resutls

I'm trying to make a search page with extra filter items, but i can't get my query to work how i want it.
Here's the query example:
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
i want only results that are from 2015. Searching for title 'Sense' comes up with nothing, even though there is a row with the title 'Sense8'. If i search for Sense8, it returns the correct data, but not 'Sense'.
What am i doing wrong?
Thanks
You probably need to use an ngram or edge ngram analyzer in your mapping. I wrote a blog post about using ngrams for autocomplete on the Qbox blog that goes through it some detail, but here is some code that might give you what you want:
PUT /test_index
{
"settings": {
"analysis": {
"filter": {
"ngram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"ngram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"year":{
"type": "string"
},
"title":{
"type": "string",
"index_analyzer": "ngram_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
}
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"year": "2015","title":"Sense8"}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"year": "2014","title":"Something else"}
POST /test_index/_search
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
...
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.30685282,
"_source": {
"year": "2015",
"title": "Sense8"
},
"sort": [
0.30685282
]
}
]
}
}
You can run the code in your browser here:
http://sense.qbox.io/gist/4f72c182db2017ac7d32077af16cbc3528cb79f0