How to query nested structure in elasticsearch - json

Below are two mocked records from my elasticsearch index. I have millions of records in my ES. I am trying to query ES to get all the records that have non-empty/ non-null "tags" field. If a record doesn't have a tag ( like second record below) then I don't want to pull it from ES.
If "books" were not nested then googling around seems like the below query would have worked -
curl -XGET 'host:port/book_indx/book/_search?' -d '{
"query" : {"filtered" : {"filter" : {"exists" :{"field" : "_source"}}}}
}'
However I am not finding a solution to query the nested structure. I tried the below with no luck -
{"query" : {"filtered" : {"filter" : {"exists" :{"field" : "_source.tags"}}}}}
{"query" : {"filtered" : {"filter" : {"exists" :{"field" : "_source":{"tags"}}}}}}
Any suggestions are really appreciated here! Thanks in advance.
{
"_shards": {
"failed": 0,
"successful": 12,
"total": 12
},
"hits": {
"hits": [
{
"_id": "book1",
"_index": "book",
"_source": {
"book_name": "How to Get Organized",
"publication_date": "2014-02-24T16:50:39+0000",
"tags": [
{
"category": "self help",
"topics": [
{
"name": "time management",
"page": 6198
},
{
"name": "calendar",
"page": 10
}
],
"id": "WEONWOIR234LI",
}
],
"last_updated": "2015-11-11T16:28:32.308+0000"
},
"_type": "book"
},
{
"_id": "book2",
"_index": "book",
"_source": {
"book_name": "How to Cook",
"publication_date": "2014-02-24T16:50:39+0000",
"tags": [],
"last_updated": "2015-11-11T16:28:32.308+0000"
},
"_type": "book"
}
],
"total": 1
},
"timed_out": false,
"took": 80
}
Mapping -
"book": {
"_id": {
"path": "message_id"
},
"properties": {
"book_name": {
"index": "not_analyzed",
"type": "string"
},
"publication_date": {
"format": "date_time||date_time_no_millis",
"type": "date"
},
"tags": {
"properties": {
"category": {
"index": "not_analyzed",
"type": "string"
},
"topic": {
"properties": {
"name": {
"index": "not_analyzed",
"type": "string"
},
"page": {
"index": "no",
"type": "integer"
}
}
},
"id": {
"index": "not_analyzed",
"type": "string"
}
},
"type": "nested"
},
"last_updated": {
"format": "date_time||date_time_no_millis",
"type": "date"
}
}
}

Since your tags field has a nested type, you need to use a nested filter in order to query it.
The following filtered query will correctly return only the first document above (i.e. with id book1)
{
"query": {
"filtered": {
"filter": {
"nested": {
"path": "tags",
"filter": {
"exists": {
"field": "tags"
}
}
}
}
}
}
}

Related

Json schema for recursive key

json data as given and have the names of the students in multiple instance like 100 (only 3 given). So, is there a way to give a #defs for a key and value to simplify the schema?
{
"student_id": {
"Alice": 0,
"Bob": 1,
"Charlie": 2,
"Derek": 3,
"Emily": 4,
"Florence": 5
},
"project": {
"Alice": "Science",
"Bob": "Math",
"Charlie": "Science",
"Derek": "Science",
"Emily": "Math",
"Florence": "Math"
},
"summer_camp": {
"Alice": true,
"Bob": false,
"Charlie": true,
"Derek": false,
"Emily": true,
"Florence": false
},
"Data":[
"student_id",
"project",
"summer_camp"
]
}
You can specify the property names in a reusable definition:
{
"$defs": {
"property_names_students": {
"propertyNames": {
"enum": [
"Alice",
"Bob",
...
]
]
}
},
"type": "object",
"properties": {
"student_id": {
"$ref": "#/$defs/property_names_students",
"additionalProperties": {
"type": "integer"
}
},
"project": {
"$ref": "#/$defs/property_names_students",
"additionalProperties": {
"enum": ["Science", "Math", ... ]
}
},
...
}
}

Import JSON with objects as nested to Elastic Search

i've log with thousands records of aggregated data in JSON:
{
"count": 25,
"domain": "domain.tld",
"geoips": {
"AU": 5,
"NZ": 20
},
"ips": {
"1.2.3.4": 5,
"1.2.3.5": 1,
"1.2.3.6": 1,
"1.2.3.7": 1,
"1.2.3.8": 1,
"1.2.3.9": 9,
"1.2.3.10": 7
},
"subdomains": {
"a.domain.tld": 1,
"b.domain.tld": 1,
"c.domain.tld": 1,
"domain.tld": 22
},
"tld": "tld",
"types": {
"1": 3,
"43": 22
}
}
and i have mapping on ES:
"mappings": {
"properties": {
"count": {
"type": "long"
},
"domain": {
"type": "keyword"
},
"ips": {
"type": "nested",
"properties": {
"key": {
"type": "keyword"
},
"val": {
"type": "long"
}
}
},
"geoips": {
"type": "nested",
"properties": {
"key": {
"type": "keyword"
},
"val": {
"type": "long"
}
}
},
"subdomains": {
"type": "nested",
"properties": {
"key": {
"type": "keyword"
},
"val": {
"type": "long"
}
}
},
"tld": {
"type": "keyword"
},
"types": {
"type": "nested",
"properties": {
"key": {
"type": "keyword"
},
"val": {
"type": "long"
}
}
}
}
}
Is there any simple way how import these lines to ES as nested objects ? If i use a bulk insert without modification, the ES will modify mapping by adding a new field for each IP/subdomain/GeoIP instead add it as simple key/val object.
Or only one way is regenerate JSON to key/val nested fields ?
Your mapping is already very good but the data doesn't fit it since the nested data type expects an array of objects, not a single object. So you'll need to transform your nested objects into array of key-value pairs like so:
...
"ips": [
{
"key": "1.2.3.4",
"val": 5
},
{
"key": "1.2.3.5",
"val": 1
},
...
],
"subdomains": [
{
"key": "a.domain.tld",
"val": 1
},
{
"key": "b.domain.tld",
"val": 1
},
...
]
...

How to match on multiple fields per array item in elastic search

I am trying to create an elastic search query to match multiple fields inside of an object inside of an array.
For example, the Elastic Search structure I am querying against is similar to the following:
"hits": [
{
"_index": "titles",
"_type": "title",
...
"_source": {
...
"genres": [
{
"code": "adventure",
"priority": 1
},
{
"code": "action",
"priority": 2
},
{
"code": "horror",
"priority": 3
}
],
...
},
...
]
And what I am trying to do is match on titles with specific genre/priority pairings. For example, I am trying to match all titles with code=action and priority=1, but my query is returning too many results. The above title is hit during this example due to the fact that the genre list contains both a genre with code=action AND another genre that matches priority=1. My query is similar to the following:
"query": {
"bool": {
"filter": [
{
"bool": {
"must":[
{"term": {
"genres.code": {
"value": "action",
"boost": 1.0
}
}},
{"term": {
"genres.priority": {
"value": 1,
"boost": 1.0
}
}}
]
}
},
...
}
Is there any way to form the query in order to match a title with a single genre containing both priority=1 AND code=action?
I have recreated your problem. I added the following mapping
PUT titles
{
"mappings": {
"title": {
"properties": {
"author": {
"type": "text"
},
"genres": {
"type": "nested"
}
}
}
}
}
Then I added values to the index. This was what was inserted
"hits": {
"total": 3,
"max_score": 1,
"hits": [
{
"_index": "titles",
"_type": "title",
"_id": "2",
"_score": 1,
"_source": {
"author": "Author 1",
"genres": [
{
"code": "adventure",
"priority": 2
},
{
"code": "action",
"priority": 3
},
{
"code": "horror",
"priority": 1
}
]
}
},
{
"_index": "titles",
"_type": "title",
"_id": "1",
"_score": 1,
"_source": {
"author": "Author 2",
"genres": [
{
"code": "adventure",
"priority": 3
},
{
"code": "action",
"priority": 1
},
{
"code": "horror",
"priority": 2
}
]
}
},
{
"_index": "titles",
"_type": "title",
"_id": "3",
"_score": 1,
"_source": {
"author": "Author 3",
"genres": [
{
"code": "adventure",
"priority": 3
},
{
"code": "action",
"priority": 1
},
{
"code": "horror",
"priority": 2
}
]
}
}
]
}
My query is:
GET titles/title/_search
{
"query": {
"nested": {
"path": "genres",
"query": {
"bool": {
"must": [
{
"term": {
"genres.code": {
"value": "horror"
}
}
},
{
"term": {
"genres.priority": {
"value": 1
}
}
}
]
}
}
}
}
}
The query returns
"_source": {
"author": "Author 1",
"genres": [
{
"code": "adventure",
"priority": 2
},
{
"code": "action",
"priority": 3
},
{
"code": "horror",
"priority": 1
}
]
}
This title is the only one that has code = 'horror' and priority = 1.

AND query in Elasticsearch

I'm trying to filter my query by 2 fields, but keep getting error. I'm using the AND query as suggested by Elasticsearch docs (it's actually a 'bool' query), here-
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-post-filter.html
GET /index_v1/user/_search
{
"query": {
"bool": {
"filter": {
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }}
}
}
}
}
This works-
GET /index_v1/user/_search
{
"query": {
"filtered": {
"query": {
"match": {
"id": "101"
}
}
}
}
}
and returns this-
{
"took": 24,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 3.442347,
"hits": [
{
"_index": "index_v1",
"_type": "user",
"_id": "1",
"_score": 3.442347,
"_source": {
"id": "101",
"firstName": "John",
"guid": "1001",
"lastName": "Doe",
"email": "john.doe#company.com",
"entitlements": {
"id": "en2"
}
}
},
{
"_index": "index_v1",
"_type": "user",
"_id": "2",
"_score": 3.140066,
"_source": {
"id": "101",
"firstName": "John",
"guid": "1001",
"lastName": "Doe",
"email": "john.doe#company.com",
"tenants": [
{
"id": "12345",
"roles": [
"PrimaryAdmin"
]
}
],
"entitlements": {
"id": "en2"
}
}
}
]
}
}
Here's the mapping document-
{
"index_v1": {
"mappings": {
"user": {
"properties": {
"email": {
"type": "string"
},
"entitlements": {
"properties": {
"id": {
"type": "string"
}
}
},
"firstName": {
"type": "string"
},
"guid": {
"type": "string"
},
"id": {
"type": "string"
},
"lastName": {
"type": "string"
},
"tenants": {
"properties": {
"id": {
"type": "string"
},
"roles": {
"type": "string"
}
}
}
}
}
}
}
}
Also, how can I add this to AND condition
["tenants"]["id"]="12345"
You have to run a filtered query to use filters. The relevant example you'll want is here.
GET /index_v1/user/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }},
{ "term": { "tenants.id": "12345" }}
]
}
}
}
}
That should be roughly it, though I'm sure you'll have to tweak it (I'm a little rusty).
In order for the id fields to match exactly, you'll want to set those fields to be analyzed as keywords in the mapping, otherwise ES will try to get smart with it and give you unexpected results.
The query posted by Nick Larson should work fine, but as far as exactly what is wrong with your query, you are using curly brackets where you should be using square brackets (it's actually invalid JSON syntax, in it's current form). "filter" should be an array, so you have to use square brackets:
GET /index_v1/user/_search
{
"query": {
"bool": {
"filter": [
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }}
]
}
}
}

Elasticsearch combined query and filter not giving correct resutls

I'm trying to make a search page with extra filter items, but i can't get my query to work how i want it.
Here's the query example:
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
i want only results that are from 2015. Searching for title 'Sense' comes up with nothing, even though there is a row with the title 'Sense8'. If i search for Sense8, it returns the correct data, but not 'Sense'.
What am i doing wrong?
Thanks
You probably need to use an ngram or edge ngram analyzer in your mapping. I wrote a blog post about using ngrams for autocomplete on the Qbox blog that goes through it some detail, but here is some code that might give you what you want:
PUT /test_index
{
"settings": {
"analysis": {
"filter": {
"ngram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"ngram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"year":{
"type": "string"
},
"title":{
"type": "string",
"index_analyzer": "ngram_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
}
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"year": "2015","title":"Sense8"}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"year": "2014","title":"Something else"}
POST /test_index/_search
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
...
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.30685282,
"_source": {
"year": "2015",
"title": "Sense8"
},
"sort": [
0.30685282
]
}
]
}
}
You can run the code in your browser here:
http://sense.qbox.io/gist/4f72c182db2017ac7d32077af16cbc3528cb79f0