Elastic query to show exact match OR other fields if not found - json

I need some help rewriting my elasticsearch query.
What i need is:
1- to show a single record if there is an exact match on the two fields verb and sessionid.raw (partial matches are not accepted).
"must": [
{ "match" : { "verb" : "login" } },
{ "term" : { "sessionid.raw" : strSessionID } },
]
OR
2- to show the top 5 records (sorted by _score DESC and #timestamp ASC) that match some other fields, giving a boost if the records are between the specified time range.
"must": [
{ "match" : { "verb" : "login" } },
{ "term" : { "pid" : strPID } },
],
"should": [
{ "match" : { "user.raw" : strUser } },
{ "range" : { "#timestamp" : {
"from" : QueryFrom,
"to" : QueryTo,
"format" : DateFormatElastic,
"time_zone" : "America/Sao_Paulo",
"boost" : 2 }
} },
]
The code below is almost doing what i want.
Right now it boosts sessionid.raw to the top if found, but the remaining records are not being discarded.
var objQueryy = {
"fields" : [ "#timestamp", "program", "pid", "sessionid.raw", "user", "frontendip", "frontendname", "_score" ],
"size" : ItemsPerPage,
"sort" : [ { "_score" : { "order": "desc" } }, { "#timestamp" : { "order" : "asc" } } ],
"query" : {
"bool": {
"must": [
{ "match" : { "verb" : "login" } },
{ "term" : { "pid" : strPID } },
{ "bool": {
"should": [
{ "match" : { "user.raw" : strUser } },
{ "match" : { "sessionid.raw": { "query": strSessionID, "boost" : 3 } } },
{ "range" : { "#timestamp" : { "from": QueryFrom, "to": QueryTo, "format": DateFormatElastic, "time_zone": "America/Sao_Paulo" } } },
],
}},
],
},
},
}

Elasticsearch cannot "prune" your secondary results for you when an exact match is also found.
You would have to implement this discarding functionality on the client side after all results had been returned.
You may find the cleanest implementation is to execute your two search strategies separately. Your search client would:
Run the first (exact match) query
Run the second (expanded) query only if no results found

Related

Nested inner hits in FreeText search use case

I building a standard free text search on a site that sells cars.
In the search box the user can enter a search word that are passed on to the query where it is used to match both nested and non-nested properties.
I'm using inner_hits to limit the number of variants returned by the query (in this sample variants is not remove from _source)
When matching on a nested property color the inner_hits collection contains the correct variant as expected.
However when matching on a non-nested property title the inner_hits collection is empty. I understand why it's empty.
Can you suggest a better way to structure the query?
Another option would be to always just return at least 1 variant - but how can the be achieved?
Mappings
PUT test
{
"mappings": {
"car": {
"properties": {
"variants": {
"type": "nested"
}
}
}
}
}
Insert data
PUT test/car/1
{
"title": "VW Golf",
"variants": [
{
"color": "red",
"forsale": true
},
{
"color": "blue",
"forsale": false
}
]
}
Query by color
GET test/_search
{
"query": {
"nested": {
"path": "variants",
"query": {
"match": {
"variants.color": "blue"
}
},
"inner_hits": {}
}
}
}
Color query: works as expected!
"hits" : [
{
"_source" : {
"title" : "VW Golf",
"variants" : [
{
"color" : "red",
"forsale" : true
},
{
"color" : "blue",
"forsale" : false
}
]
},
"inner_hits" : {
"variants" : {
"hits" : {
"total" : 1,
"hits" : [
{
"_nested" : {
"field" : "variants",
"offset" : 1
},
"_source" : {
"color" : "blue",
"forsale" : false
}
}
]
}
}
}
}
]
Query by brand
GET test/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"title": "golf"
}
},
{
"nested": {
"path": "variants",
"query": {
"match": {
"variants.color": "golf"
}
},
"inner_hits": {}
}
}
]
}
}
}
Brand query result :-(
"hits" : [
{
"_source" : {
"title" : "VW Golf",
"variants" : [
{
"color" : "red",
"forsale" : true
},
{
"color" : "blue",
"forsale" : false
}
]
},
"inner_hits" : {
"variants" : {
"hits" : {
"total" : 0,
"hits" : [ ]
}
}
}
}
You already know it but inner_hits returns an empty array because no nested documents matched in the nested query.
A simple solution is to change the query such that the nested query will always match. This can be done by wrapping the nested query into a bool query and add a match_all query.
If you set the boost of the match_all query to 0, it will not contribute to the score. Consequently, if a nested document match it will be first.
Now the inner hits will not be empty, but there is a second problem, all the documents will match. You can either:
set a min_score with a very small value (e.g., 0.00000001) to discard document with a score of 0
duplicate the original nested query and use a minimum_should_match at 2.
{
"query": {
"bool": {
// Ensure that at least 1 of the first 2 queries will match
// The third query will always match
"minimum_should_match": 2,
"should": [
{
"match": {
"title": <SEARCH_TERM>
}
},
{
"nested": {
"path": "variants",
"query": {
"match": {
"variants.color": <SEARCH_TERM>
}
}
}
},
{
"nested": {
"path": "variants",
"query": {
"bool": {
"should": [
{
"match": {
"variants.color": <SEARCH_TERM>
}
},
{
// Disable scoring
"match_all": { "boost": 0 }
}
]
}
},
"inner_hits": {}
}
}
]
}
}
}
One way to do it is using a script_fields clause.
You would write a little script in painless that would do the following:
store the List you get from variants in a variable
then iterate over the Maps in this List
if the Map has
color blue you return the Map . (If none evaluate to true you return an empty
Map). This would create an additional field per searchresult with only those variants where the color is blue.
One important drawback is that this is a very heavy operation, especially if you have many records.
You can take this approach if it is something only you will ever do, maybe a few times a year outside peak hours. If your use case is something with regular use and to be performed by many users, I would change the mapping, return variants as a whole or choose some other solution.

Retrieve item list by checking multiple attribute values in MongoDB in golang

This question based on MongoDB,How to retrieve selected items retrieve by selecting multiple condition.It is like IN condition in Mysql
SELECT * FROM venuelist WHERE venueid IN (venueid1, venueid2)
I have attached json data structure that I have used.[Ref: JSON STRUCTUE OF MONGODB ].
As an example, it has a venueList then inside the venue list, It has several attribute venue id and sum of user agents name and total count as value.user agents mean user Os,browser and device information. In this case I used os distribution.In that case i was count linux,ubuntu count on particular venueid.
it is like that,
"sum" : [
{
"name" : "linux",
"value" : 12
},
{
"name" : "ubuntu",
"value" : 4
}
],
Finally I want to get count of all linux user count by selecting venueid list in one find query in MongoDB.
As example, I want to select all count of linux users by conditioning if venue id VID1212 or VID4343
Ref: JSON STRUCTUE OF MONGODB
{
"_id" : ObjectId("57f940c4932a00aba387b0b0"),
"tenantID" : 1,
"date" : "2016-10-09 00:23:56",
"venueList" : [
{
"id" : “VID1212”,
"sum" : [
{
"name" : "linux",
"value" : 12
},
{
"name" : "ubuntu",
"value" : 4
}
],
“ssidList” : [ // this is list of ssid’s in venue
{
"id" : “SSID1212”,
"sum" : [
{
"name" : "linux",
"value" : 8
},
{
"name" : "ubuntu",
"value" : 6
}
],
“macList” : [ // this is mac list inside particular ssid ex: this is mac list inside the SSID1212
{
"id" : “12:12:12:12:12:12”,
"sum" : [
{
"name" : "linux",
"value" : 12
},
{
"name" : "ubuntu",
"value" : 1
}
]
}
]
}
]
},
{
"id" : “VID4343”,
"sum" : [
{
"name" : "linux",
"value" : 2
}
],
"ssidList" : [
{
"id" : “SSID4343”,
"sum" : [
{
"name" : "linux",
"value" : 2
}
],
"macList" : [
{
"id" : “43:43:43:43:43:34”,
"sum" : [
{
"name" : "linux",
"value" : 2
}
]
}
]
}
]
}
]
}
I am using golang as language to manipulation data with mongoldb using mgo.v2 package
expected out put is :
output
linux : 12+2 = 14
ubuntu : 4+0 = 4
Don't consider inner list in venuelist.
You'd need to use the aggregation framework where you would run an aggregation pipeline that first filters the documents in the collection based on
the venueList ids using the $match operator.
The second pipeline would entail flattening the venueList and sum subdocument arrays in order for the data in the documents to be processed further down the pipeline as denormalised entries. The $unwind operator is useful here.
A further filter using $match is necessary after unwinding so that only the documents you want to aggregate are allowed into the next pipeline.
The main pipeline would be the $group operator stage which aggregates the filtered documents to create the desired sums using the accumulator operator $sum. For the desired result, you would need to use a tenary operator like $cond to create the independent count fields since that will feed the number of documents to the $sum expression depending on the name value.
Putting this altogether, consider running the following pipeline:
db.collection.aggregate([
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList" },
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList.sum" },
{
"$group": {
"_id": null,
"linux": {
"$sum": {
"$cond": [
{ "$eq": [ "$venueList.sum.name", "linux" ] },
"$venueList.sum.value", 0
]
}
},
"ubuntu": {
"$sum": {
"$cond": [
{ "$eq": [ "$venueList.sum.name", "ubuntu" ] },
"$venueList.sum.value", 0
]
}
}
}
}
])
For usage with mGo, you can convert the above pipeline using the guidance in http://godoc.org/labix.org/v2/mgo#Collection.Pipe
For a more flexible and better performant alternative which executes much faster than the above, and also takes into consideration unknown values for the sum list, run the alternative pipeline as follows
db.collection.aggregate([
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList" },
{ "$match": { "venueList.id": { "$in": ["VID1212", "VID4343"] } } },
{ "$unwind": "$venueList.sum" },
{
"$group": {
"_id": "$venueList.sum.name",
"count": { "$sum": "$venueList.sum.value" }
}
},
{
"$group": {
"_id": null,
"counts": {
"$push": {
"name": "$_id",
"count": "$count"
}
}
}
}
])

Conversion from sql to elastic search query

I want to convet the foll. sql query to elastic json query
select count(distinct(fk_id)),city_id from table
where status1 != "xyz" and satus2 = "abc" and
cr_date >="date1" and cr_date<="date2" group by city_id
Also is there any way of writing nested queries in elastic.
select * from table where status in (select status from table2)
The first query can be translated like this in the Elasticsearch query DSL:
curl -XPOST localhost:9200/table/_search -d '{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"status2": "abc"
}
},
{
"range": {
"cr_date": {
"gt": "date1", <--- don't forget to change the date
"lt": "date2" <--- don't forget to change the date
}
}
}
],
"must_not": [
{
"term": {
"status1": "xyz"
}
}
]
}
}
}
},
"aggs": {
"by_cities": {
"terms": {
"field": "city_id"
},
"aggs": {
"fk_count": {
"cardinality": {
"field": "fk_id"
}
}
}
}
}
}'
Using Sql API In Elastic search, we can write queries and also we can translate them to elastic query
POST /_sql/translate
{
"query": "SELECT * FROM customer where address.Street='JanaChaitanya Layout' and Name='Pavan Kumar'"
}
Response for this is
{
"size" : 1000,
"query" : {
"bool" : {
"must" : [
{
"term" : {
"address.Street.keyword" : {
"value" : "JanaChaitanya Layout",
"boost" : 1.0
}
}
},
{
"term" : {
"Name.keyword" : {
"value" : "Pavan Kumar",
"boost" : 1.0
}
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
"_source" : {
"includes" : [
"Name",
"address.Area",
"address.Street"
],
"excludes" : [ ]
},
"docvalue_fields" : [
{
"field" : "Age"
}
],
"sort" : [
{
"_doc" : {
"order" : "asc"
}
}
]
}
Now we can use this result to query elastic search
For further details please go through this article
https://xyzcoder.github.io/elasticsearch/2019/06/25/making-use-of-sql-rest-api-in-elastic-search-to-write-queries-easily.html

Nested filter numerical range

I have the following json object:
{
"Title": "Terminator,
"Purchases": [
{"Country": "US", "Site": "iTunes", "Price": 4.99},
{"Country": "FR", "Site": "Google", "Price": 5.99}
]
}
I want to be able to find an object specifying a Country+Site+PriceRange. For example, the above should return True on Country=US&Price<5.00, but should return False on Country=FR&Price<5.00. How would the index and query look to do this? Here is another answer that this is a follow-up question to: Search within array object.
Simply add a Range query to your Bool query logic tree. This will return documents that match US for country and have the Price field with a numeric value less than 5.
{ "query":
{ "nested" : {
"path" : "Purchases",
"score_mode" : "avg",
"query" : {
"bool" : {
"must" : [
{
"match" : {"Purchases.Country" : "US"}
},
{
"range" : "Purchases.Price":
{
"lte": 5
}
}
]
}
}
}
}
}

Finding JSON objects in mongoDB

I'm trying to find objects using the built it queries and It just doesn't work..
My JSON file is something like this:
{ "Text1":
{
"id":"2"
},
"Text2":
{
"id":"2,3"
},
"Text3":
{
"id":"1"
}
}
And I write this db.myCollection.find({"id":2})
And it doesn't find anything.
When I write db.myCollection.find() it shows all the data as it should.
Anyone knows how to do it correctly?
Its hard to change the data-structure but as you want just your matching sub-document and you don't know where is your target sub-document (for example the query should be on Text1 or Text2 , ...) there is a good data structure for this:
{
"_id" : ObjectId("548dd9261a01c68fab8d67d7"),
"pair" : [
{
"id" : "2",
"key" : "Text1"
},
{
"id" : [
"2",
"3"
],
"key" : "Text2"
},
{
"id" : "1",
"key" : "Text3"
}
]
}
and your query is:
db.myCollection.findOne({'pair.id' : "2"} , {'pair.$':1, _id : -1}).pair // there is better ways (such as aggregation instead of above query)
as result you will have:
{
"0" : {
"id" : "2",
"key" : "Text1"
}
}
Update 1 (newbie way)
If you want all the document not just one use this
var result = [];
db.myCollection.find({'pair.id' : "2"} , {'pair.$':1, _id : -1}).forEach(function(item)
{
result.push(item.pair);
});
// the output will be in result
Update 2
Use this query to get all sub-documents
db.myCollection.aggregate
(
{ $unwind: '$pair' },
{ $match : {'pair.id' : "2"} }
).result
it produce output as
{
"0" : {
"_id" : ObjectId("548deb511a01c68fab8d67db"),
"pair" : {
"id" : "2",
"key" : "Text1"
}
},
"1" : {
"_id" : ObjectId("548deb511a01c68fab8d67db"),
"pair" : {
"id" : [
"2",
"3"
],
"key" : "Text2"
}
}
}
Since your are query specify a field in a subdocument this is what will work. see .find() documentation.
db.myCollection.find({"Text1.id" : "2"}, {"Text1.id": true})
{ "_id" : ObjectId("548dd798e2fa652e675af11d"), "Text1" : { "id" : "2" } }
If the query is on "Text1" or "Text2" the best thing to do here as mention in the accepted answer is changing you document structure. This can be easily done using the "Bulk" API.
var bulk = db.mycollection.initializeOrderedBulkOp(),
count = 0;
db.mycollection.find().forEach(function(doc) {
var pair = [];
for(var key in doc) {
if(key !== "_id") {
var id = doc[key]["id"].split(/[, ]/);
pair.push({"key": key, "id": id});
}
}
bulk.find({"_id": doc._id}).replaceOne({ "pair": pair });
count++; if (count % 300 == 0){
// Execute per 300 operations and re-Init
bulk.execute();
bulk = db.mycollection.initializeOrderedBulkOp();
}
})
// Clean up queues
if (count % 300 != 0 )
bulk.execute();
Your document now look like this:
{
"_id" : ObjectId("55edddc6602d0b4fd53a48d8"),
"pair" : [
{
"key" : "Text1",
"id" : [
"2"
]
},
{
"key" : "Text2",
"id" : [
"2",
"3"
]
},
{
"key" : "Text3",
"id" : [
"1"
]
}
]
}
Running the following query:
db.mycollection.aggregate([
{ "$project": {
"pair": {
"$setDifference": [
{ "$map": {
"input": "$pair",
"as": "pr",
"in": {
"$cond": [
{ "$setIsSubset": [ ["2"], "$$pr.id" ]},
"$$pr",
false
]
}
}},
[false]
]
}
}}
])
returns:
{
"_id" : ObjectId("55edddc6602d0b4fd53a48d8"),
"pair" : [
{
"key" : "Text1",
"id" : [
"2"
]
},
{
"key" : "Text2",
"id" : [
"2",
"3"
]
}
]
}