Please, I have a request on Elastic search and I'm trying to add a list of acronyms (or synonyms) inside the request. But I can't figure out where to place it.
Let's say the synonymes list is {'HR': 'Human Ressources", "AWS": "Amazon Web Service"}
The request is the following:
{
"query": {
"bool": {
"filter": [
{
"terms": {
"observatory": [
"rome",
"meban",
"emass",
"cigref",
"opiiec",
"null"
]
}
},
{
"terms": {
"referentiel_id": [
"null",
42,
48,
52
]
}
}
],
"must": {
"match": {
"skill": {
"query": "*dactif*",
"fuzziness": "AUTO"
}
}
}
}
}
}
You can use synonym token filter to handle synonyms in your search query
Adding a working example with index data, mapping, search query and search result
Index Mapping:
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"synonym"
]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms": [
"HR, Human Ressources",
"AWS, Amazon Web Service"
]
}
}
}
}
},
"mappings": {
"properties": {
"observatory": {
"type": "text",
"analyzer": "synonym"
}
}
}
}
Index Data:
{
"observatory":"HR"
}
{
"observatory":"Human Ressources"
}
Search Query:
{
"query": {
"bool": {
"should": [
{
"match": {
"observatory": {
"query": "HR"
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "67707925",
"_type": "_doc",
"_id": "1",
"_score": 0.487735,
"_source": {
"observatory": "Human Ressources"
}
},
{
"_index": "67707925",
"_type": "_doc",
"_id": "2",
"_score": 0.487735,
"_source": {
"observatory": "HR"
}
}
]
Related
I am trying to query my elastic search server in python, If I hard coded the url and query string like below in my python (search()) script working fine no issues. If I want to store those url and query in a separate xml file (property file). I'm not getting the results instead I got the below error:
text '{"error":{"root_cause":[{"type":"parsing_exception","reason":"Expected [START_OBJECT] but found [VALUE_STRING]","line":1,"col":1}],"type":"parsing_exception","reason":"Expected [START_OBJECT] but found [VALUE_STRING]","line":1,"col":1},"status":400}' str
Here is my code I am using
def search():
url="http://0.0.0.0/logstash-pse*/_search/"
query={ "size": 0, "aggs": { "2": { "date_histogram": { "field": "#timestamp", "interval": "30m", "time_zone": "America/Chicago", "min_doc_count": 1 }, "aggs": { "3": { "terms": { "field": "queryname.keyword", "size": 100, "order": { "1.90": "desc" } }, "aggs": { "1": { "percentiles": { "field": "queryResponseTime", "percents": [ 90 ], "keyed": "false" } } } } } } }, "query": { "bool": { "must": [ { "query_string": { "query": "path: \"/store_locator/\"", "analyze_wildcard": "true" } }, { "query_string": { "analyze_wildcard": "true", "query": "*" } }, { "range": { "#timestamp": { "gte": 1527181463371, "lte": 1527267863371, "format": "epoch_millis" } } } ], "must_not": [] } }, "highlight": { "pre_tags": [ "#kibana-highlighted-field#" ], "post_tags": [ "#/kibana-highlighted-field#" ], "fields": { "*": { "highlight_query": { "bool": { "must": [ { "query_string": { "query": "path: \"/store_locator/\"", "analyze_wildcard": "true", "all_fields": "true" } }, { "query_string": { "analyze_wildcard": "true", "query": "*", "all_fields": "true" } }, { "range": { "#timestamp": { "gte": 1527181463371, "lte": 1527267863371, "format": "epoch_millis" } } } ], "must_not": [] } } } }, "fragment_size": 2147483647 }, "_source": { "excludes": [] }, "version": "true"}
response = requests.post(url, auth=(user, password), verify=False,json=query)
XML property file I am using like the below:
<custom>
<url>the above url goes here</url>
<query> above query </query>
</custom>
Any idea what I am missing?, Much appreciated
Able to figure it out with few exercise on my own. In case if someone is looking for:
I just used the below
response = requests.post(url, auth=(user, password), verify=False,json=json.loads(query))
I'm using ELK stack and I'm trying to find out how to visualize all logs except of those from specific IP ranges (for example 10.0.0.0/8). Is there any way how to negate filter query:
{"wildcard":{"src_address":"10.*"}}
I put it to Buckets -> Split Bars -> Aggregation -> Filters and I would like to negate this query so I got all logs except of those from 10.0.0.0/8
This is the whole JSON request:
{
"query": {
"filtered": {
"query": {
"query_string": {
"query": "low_level_category:\"user_authentication_failure\" AND NOT src_address:\"10.*\"",
"analyze_wildcard": true
}
},
"filter": {
"bool": {
"must": [
{
"range": {
"#timestamp": {
"gte": 1474384885044,
"lte": 1474989685044,
"format": "epoch_millis"
}
}
}
],
"must_not": []
}
}
}
},
"size": 0,
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"interval": "3h",
"time_zone": "Europe/Berlin",
"min_doc_count": 200,
"extended_bounds": {
"min": 1474384885043,
"max": 1474989685043
}
},
"aggs": {
"3": {
"terms": {
"field": "src_address.raw",
"size": 5,
"order": {
"_count": "desc"
}
}
}
}
}
}
}
Thanks
You can input this in the Kibana search box and it should get you what you need:
NOT src_address:10.*
I'm trying to filter my query by 2 fields, but keep getting error. I'm using the AND query as suggested by Elasticsearch docs (it's actually a 'bool' query), here-
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-post-filter.html
GET /index_v1/user/_search
{
"query": {
"bool": {
"filter": {
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }}
}
}
}
}
This works-
GET /index_v1/user/_search
{
"query": {
"filtered": {
"query": {
"match": {
"id": "101"
}
}
}
}
}
and returns this-
{
"took": 24,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 3.442347,
"hits": [
{
"_index": "index_v1",
"_type": "user",
"_id": "1",
"_score": 3.442347,
"_source": {
"id": "101",
"firstName": "John",
"guid": "1001",
"lastName": "Doe",
"email": "john.doe#company.com",
"entitlements": {
"id": "en2"
}
}
},
{
"_index": "index_v1",
"_type": "user",
"_id": "2",
"_score": 3.140066,
"_source": {
"id": "101",
"firstName": "John",
"guid": "1001",
"lastName": "Doe",
"email": "john.doe#company.com",
"tenants": [
{
"id": "12345",
"roles": [
"PrimaryAdmin"
]
}
],
"entitlements": {
"id": "en2"
}
}
}
]
}
}
Here's the mapping document-
{
"index_v1": {
"mappings": {
"user": {
"properties": {
"email": {
"type": "string"
},
"entitlements": {
"properties": {
"id": {
"type": "string"
}
}
},
"firstName": {
"type": "string"
},
"guid": {
"type": "string"
},
"id": {
"type": "string"
},
"lastName": {
"type": "string"
},
"tenants": {
"properties": {
"id": {
"type": "string"
},
"roles": {
"type": "string"
}
}
}
}
}
}
}
}
Also, how can I add this to AND condition
["tenants"]["id"]="12345"
You have to run a filtered query to use filters. The relevant example you'll want is here.
GET /index_v1/user/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"and": [
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }},
{ "term": { "tenants.id": "12345" }}
]
}
}
}
}
That should be roughly it, though I'm sure you'll have to tweak it (I'm a little rusty).
In order for the id fields to match exactly, you'll want to set those fields to be analyzed as keywords in the mapping, otherwise ES will try to get smart with it and give you unexpected results.
The query posted by Nick Larson should work fine, but as far as exactly what is wrong with your query, you are using curly brackets where you should be using square brackets (it's actually invalid JSON syntax, in it's current form). "filter" should be an array, so you have to use square brackets:
GET /index_v1/user/_search
{
"query": {
"bool": {
"filter": [
{ "term": { "id": "101" }},
{ "term": { "firstName": "John" }}
]
}
}
}
I'm trying to make a search page with extra filter items, but i can't get my query to work how i want it.
Here's the query example:
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
i want only results that are from 2015. Searching for title 'Sense' comes up with nothing, even though there is a row with the title 'Sense8'. If i search for Sense8, it returns the correct data, but not 'Sense'.
What am i doing wrong?
Thanks
You probably need to use an ngram or edge ngram analyzer in your mapping. I wrote a blog post about using ngrams for autocomplete on the Qbox blog that goes through it some detail, but here is some code that might give you what you want:
PUT /test_index
{
"settings": {
"analysis": {
"filter": {
"ngram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20,
"token_chars": [
"letter",
"digit",
"punctuation",
"symbol"
]
}
},
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"ngram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"year":{
"type": "string"
},
"title":{
"type": "string",
"index_analyzer": "ngram_analyzer",
"search_analyzer": "whitespace_analyzer"
}
}
}
}
}
POST /test_index/_bulk
{"index":{"_index":"test_index","_type":"doc","_id":1}}
{"year": "2015","title":"Sense8"}
{"index":{"_index":"test_index","_type":"doc","_id":2}}
{"year": "2014","title":"Something else"}
POST /test_index/_search
{
"size": 25,
"from": 0,
"sort": {
"_score": {
"order": "asc"
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"year": "2015"
}
}
]
}
},
"query": {
"match": {
"title": "Sense"
}
}
}
}
}
...
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": null,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.30685282,
"_source": {
"year": "2015",
"title": "Sense8"
},
"sort": [
0.30685282
]
}
]
}
}
You can run the code in your browser here:
http://sense.qbox.io/gist/4f72c182db2017ac7d32077af16cbc3528cb79f0
I have two indexes index1 and index2 and both has two types type1 and type2 with same name in elastic search.(please assume that we have valid business reason behind it)
I would like to search index1 - type1 and index2 -type2
here is my query
POST _search
{
"query": {
"indices": {
"indices": ["index1","index2"],
"query": {
"filtered":{
"query":{
"multi_match": {
"query": "test",
"type": "cross_fields",
"fields": ["_all"]
}
},
"filter":{
"or":{
"filters":[
{
"terms":{
"_index":["index1"], // how can i make this work?
"_type": ["type1"]
}
},
{
"terms":{
"_index":["index2"], // how can i make this work?
"_type": ["type2"]
}
}
]
}
}
}
},
"no_match_query":"none"
}
}
}
You can use the indices, type in a bool filter to filter on type and index
The query would look something on these lines :
POST index1,index2/_search
{
"query": {
"filtered": {
"query": {
"multi_match": {
"query": "test",
"type": "cross_fields",
"fields": [
"_all"
]
}
},
"filter": {
"bool": {
"should": [
{
"indices": {
"index": "index1",
"filter": {
"type": {
"value": "type1"
}
},
"no_match_filter": "none"
}
},
{
"indices": {
"index": "index2",
"filter": {
"type": {
"value": "type2"
}
},
"no_match_filter": "none"
}
}
]
}
}
}
}
}
Passing the index names in the url example : index1,index2/_search is a good practice else you risk executing query across all indices in the cluster.