Mongodb aggregate, match, group and sort on complex JSON - json

I receive a large amount of JSON back (flight data) which I've been thinking about using mongo as the tool to query/filter it before returning back to the UI.
Here's the json response from Sabre (flight search)
{
"OTA_AirLowFareSearchRS": {
"PricedItineraries": {
"PricedItinerary": [
{
"SequenceNumber": 1,
"AirItinerary": {
"OriginDestinationOptions": {
"OriginDestinationOption": [
{
"FlightSegment": [
{
"DepartureDateTime": "2015-11-12T11:40:00",
"ArrivalDateTime": "2015-11-12T17:35:00",
"FlightNumber": "1980"
},
{
"DepartureDateTime": "2015-11-12T19:35:00",
"ArrivalDateTime": "2015-11-13T02:00:00",
"FlightNumber": "760"
}
]
},
{
"FlightSegment": [
{
"DepartureDateTime": "2015-11-19T08:25:00",
"ArrivalDateTime": "2015-11-19T11:40:00",
"FlightNumber": "763"
},
{
"DepartureDateTime": "2015-11-19T12:55:00",
"ArrivalDateTime": "2015-11-19T15:05:00",
"FlightNumber": "1985"
}
]
}
]
}
},
"AirItineraryPricingInfo": [
{
"ItinTotalFare": {
"TotalFare": {
"Amount": 269.56,
"CurrencyCode": "GBP"
}
}
}
]
},
{
"SequenceNumber": 2,
"AirItinerary": {
"OriginDestinationOptions": {
"OriginDestinationOption": [
{
"FlightSegment": [
{
"DepartureDateTime": "2015-11-12T16:45:00",
"ArrivalDateTime": "2015-11-12T22:40:00",
"FlightNumber": "1986"
},
{
"DepartureDateTime": "2015-11-13T00:40:00",
"ArrivalDateTime": "2015-11-13T07:10:00",
"FlightNumber": "762"
}
]
},
{
"ElapsedTime": 640,
"FlightSegment": [
{
"DepartureDateTime": "2015-11-19T08:25:00",
"ArrivalDateTime": "2015-11-19T11:40:00",
"FlightNumber": "763"
},
{
"DepartureDateTime": "2015-11-19T12:55:00",
"ArrivalDateTime": "2015-11-19T15:05:00",
"FlightNumber": "1985"
}
]
}
]
}
},
"AirItineraryPricingInfo": [
{
"ItinTotalFare": {
"TotalFare": {
"Amount": 269.56,
"CurrencyCode": "GBP"
}
}
}
]
},
{
"SequenceNumber": 6,
"AirItinerary": {
"OriginDestinationOptions": {
"OriginDestinationOption": [
{
"FlightSegment": [
{
"DepartureDateTime": "2015-11-12T11:40:00",
"ArrivalDateTime": "2015-11-12T17:35:00",
"FlightNumber": "1980"
},
{
"DepartureDateTime": "2015-11-12T19:35:00",
"ArrivalDateTime": "2015-11-13T02:00:00",
"FlightNumber": "760"
}
]
},
{
"FlightSegment": [
{
"DepartureDateTime": "2015-11-19T03:15:00",
"ArrivalDateTime": "2015-11-19T06:30:00",
"FlightNumber": "761"
},
{
"DepartureDateTime": "2015-11-19T12:55:00",
"ArrivalDateTime": "2015-11-19T15:05:00",
"FlightNumber": "1985"
}
]
}
]
}
},
"AirItineraryPricingInfo": [
{
"ItinTotalFare": {
"TotalFare": {
"Amount": 269.56
}
}
}
]
}
]
}
}
}
I've been trying to get this into a view that I want but struggling with the complexity of all the nested arrays. My question is how can I achieve a desired result like this:
{
'Price': 269.56, <-- //Group on price (TotalFare)
'Outbound': [{
<Outbound Flights> <--- //$push? flights at array position [0] of OriginDestinationOption
}],
'Inbound': [{
<Inbound Flights> <-- // flights at array position [1] of OriginDestinationOption
}]
},
...
The locations of the data for these in the JSON is:
Price: OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary[x].AirItineraryPricingInfo[0].ItinTotalFare.TotalFare.Amount;
Inbound: OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary[x].AirItinerary.OriginDestinationOptions.OriginDestinationOption[0]
Outbound: OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary[x].AirItinerary.OriginDestinationOptions.OriginDestinationOption[1]

With the current MongoDB release, the only way you can get results which are closer to what you want is by using the aggregation framework, and working on the premise that the OriginDestinationOption array will have two elememts, you'd need the $first and $last operators to select the first and last elements in the array after the $unwind operator. For now (based on the above assumptions) you may have to do with running this pipeline:
db.flights.aggregate([
{ "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary" },
{ "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo" },
{ "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption" },
{
"$project": {
"Price": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo.ItinTotalFare.TotalFare.Amount",
"DestinationOptions": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption"
}
},
{
"$group": {
"_id": "$Price",
"Outbound" : { "$first": "$DestinationOptions" },
"Inbound" : { "$last": "$DestinationOptions" }
}
}
])
which will yield the result (from the sample data):
/* 1 */
{
"result" : [
{
"_id" : 269.56,
"Outbound" : {
"ElapsedTime" : 620,
"FlightSegment" : [
{
"DepartureDateTime" : "2015-11-12T11:40:00",
"ArrivalDateTime" : "2015-11-12T17:35:00",
"StopQuantity" : 0,
"FlightNumber" : "1980",
"ElapsedTime" : 235,
"DepartureAirport" : {
"LocationCode" : "LHR",
"TerminalID" : "2",
"content" : ""
},
"ArrivalAirport" : {
"LocationCode" : "IST",
"TerminalID" : "I",
"content" : ""
},
"OperatingAirline" : {
"Code" : "TK",
"FlightNumber" : "1980",
"content" : ""
}
},
{
"DepartureDateTime" : "2015-11-12T19:35:00",
"ArrivalDateTime" : "2015-11-13T02:00:00",
"StopQuantity" : 0,
"FlightNumber" : "760",
"ResBookDesigCode" : "W",
"ElapsedTime" : 265,
"DepartureAirport" : {
"LocationCode" : "IST",
"TerminalID" : "I",
"content" : ""
},
"ArrivalAirport" : {
"LocationCode" : "DXB",
"TerminalID" : "1",
"content" : ""
},
"OperatingAirline" : {
"Code" : "TK",
"FlightNumber" : "760",
"content" : ""
},
"Equipment" : [
{
"AirEquipType" : "343",
"content" : ""
}
],
"MarketingAirline" : {
"Code" : "TK",
"content" : ""
},
"MarriageGrp" : "I",
"DepartureTimeZone" : {
"GMTOffset" : 2
},
"ArrivalTimeZone" : {
"GMTOffset" : 4
},
"TPA_Extensions" : {
"eTicket" : {
"Ind" : true
}
}
}
]
},
"Inbound" : {
"ElapsedTime" : 730,
"FlightSegment" : [
{
"DepartureDateTime" : "2015-11-19T08:25:00",
"ArrivalDateTime" : "2015-11-19T11:40:00",
"StopQuantity" : 0,
"FlightNumber" : "763",
"ResBookDesigCode" : "W",
"ElapsedTime" : 315,
"DepartureAirport" : {
"LocationCode" : "DXB",
"TerminalID" : "1",
"content" : ""
},
"ArrivalAirport" : {
"LocationCode" : "IST",
"TerminalID" : "I",
"content" : ""
},
"OperatingAirline" : {
"Code" : "TK",
"FlightNumber" : "763",
"content" : ""
},
"Equipment" : [
{
"AirEquipType" : "330",
"content" : ""
}
],
"MarketingAirline" : {
"Code" : "TK",
"content" : ""
},
"MarriageGrp" : "O",
"DepartureTimeZone" : {
"GMTOffset" : 4
},
"ArrivalTimeZone" : {
"GMTOffset" : 2
},
"TPA_Extensions" : {
"eTicket" : {
"Ind" : true
}
}
},
{
"DepartureDateTime" : "2015-11-19T14:25:00",
"ArrivalDateTime" : "2015-11-19T16:35:00",
"StopQuantity" : 0,
"FlightNumber" : "1971",
"ResBookDesigCode" : "W",
"ElapsedTime" : 250,
"DepartureAirport" : {
"LocationCode" : "IST",
"TerminalID" : "I",
"content" : ""
},
"ArrivalAirport" : {
"LocationCode" : "LHR",
"TerminalID" : "2",
"content" : ""
},
"OperatingAirline" : {
"Code" : "TK",
"FlightNumber" : "1971",
"content" : ""
},
"Equipment" : [
{
"AirEquipType" : "32B",
"content" : ""
}
],
"MarketingAirline" : {
"Code" : "TK",
"content" : ""
},
"MarriageGrp" : "I",
"DepartureTimeZone" : {
"GMTOffset" : 2
},
"ArrivalTimeZone" : {
"GMTOffset" : 0
},
"TPA_Extensions" : {
"eTicket" : {
"Ind" : true
}
}
}
]
}
}
],
"ok" : 1
}
However, with the future releases (MongoDB 3.2 and newer), there are two operators $slice and $arrayElemAt which will work for you to produce the desired result. The $slice operator returns a subset of an array and the $arrayElemAt returns the element at the specified array index.
Thus you will implement the pipeline as follows -
Case 1. Using the $slice operator:
db.flights.aggregate([
{ "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary" },
{ "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo" },
{ "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption" },
{
"$project": {
"Price": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo.ItinTotalFare.TotalFare.Amount",
"DestinationOptions": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption"
}
},
{
"$group": {
"_id": "$Price",
"DestinationOptions" : { "$push": "$DestinationOptions" }
}
},
{
"$project": {
"Inbound": { "$slice": [ "$DestinationOptions", 0, 1] },
"Outbound": { "$slice": [ "$DestinationOptions", 1, 1 ] },
"Price": "$_id",
"_id": 0
}
}
])
Case 2. Using the $arrayElemAt operator:
db.flights.aggregate([
{ "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary" },
{ "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo" },
{ "$unwind": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption" },
{
"$project": {
"Price": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItineraryPricingInfo.ItinTotalFare.TotalFare.Amount",
"DestinationOptions": "$OTA_AirLowFareSearchRS.PricedItineraries.PricedItinerary.AirItinerary.OriginDestinationOptions.OriginDestinationOption"
}
},
{
"$group": {
"_id": "$Price",
"DestinationOptions" : { "$push": "$DestinationOptions" }
}
},
{
"$project": {
"Inbound": { "$arrayElemAt": [ "$DestinationOptions", 0] },
"Outbound": { "$arrayElemAt": [ "$DestinationOptions", 1 ] },
"Price": "$_id",
"_id": 0
}
}
])

Related

Query with aggregate, lookup and pipeline in Mongodb

I have to manage a wall with comments, each wall have many parent comments and each parent comment have child comments.
my collection walls is like this
groupId : {type: Schema.Types.ObjectId, ref: 'groups', unique: true},
comments : [{
commentId : {type: Schema.Types.ObjectId, ref: 'comments'},
user : {type: Schema.Types.ObjectId, ref: 'users'},
}],
and the collection comments is like this
text : String,
parentCommentId : {type: Schema.Types.ObjectId, ref: 'comments', default : null},
I want to display my wall by parent comments, each child comment under its parent comment.
i tried this query but it didn't return any results
db.getCollection('walls').aggregate([
{$match: {groupId: ObjectId("5e8c5caa75b1cd342a1175eb")}},
{
"$lookup": {
from: "comments",
let: { item: "$comments.commentId" },
pipeline: [
{ $match:
{ $expr: { $eq: [ "$parentCommentId", "$$item" ] }
}
},
{ $project: {
"_id": 1,
"parentCommentId": 1,
"text": 1
} }
],
as: "comments"
}
},
{
$project: {
groupId: 1,
"comments":1,
date: 1
}
}
])
data in walls
{
"_id" : ObjectId("5e95b4b49d3e303d667a8b71"),
"groupId" : ObjectId("5e8c5caa75b1cd342a1175eb"),
"comments" : [
{
"_id" : ObjectId("5e95b4b49d3e303d667a8b72"),
"commentId" : ObjectId("5e95b4b49d3e303d667a8b70")
},
{
"_id" : ObjectId("5e95b4ef80ae1244693aa857"),
"commentId" : ObjectId("5e95b4ef80ae1244693aa856")
},
{
"_id" : ObjectId("5e95b51080ae1244693aa859"),
"commentId" : ObjectId("5e95b51080ae1244693aa858")
},
{
"_id" : ObjectId("5e95b51d80ae1244693aa85b"),
"commentId" : ObjectId("5e95b51d80ae1244693aa85a")
},
{
"_id" : ObjectId("5e95b53580ae1244693aa85e"),
"commentId" : ObjectId("5e95b53580ae1244693aa85c")
}
],
}
data in comments
{
"_id" : ObjectId("5e95b4b49d3e303d667a8b70"),
"parentCommentId" : null,
"text" : "Hello parent 1"
}
{
"_id" : ObjectId("5e95b4ef80ae1244693aa856"),
"parentCommentId" : null,
"text" : "Hello parent 2",
"date" : ISODate("2020-04-14T13:04:47.860Z")
}
{
"_id" : ObjectId("5e95b51080ae1244693aa858"),
"parentCommentId" : ObjectId("5e95b4b49d3e303d667a8b70"),
"text" : "Hello child 1 parent 1"
}
{
"_id" : ObjectId("5e95b51d80ae1244693aa85a"),
"parentCommentId" : ObjectId("5e95b4b49d3e303d667a8b70"),
"text" : "Hello child 2 parent 1"
}
{
"_id": "5e95b53580ae1244693aa85c",
"parentCommentId": "5e95b4ef80ae1244693aa856",
"text": "Hello child 1 parent 2",
}
desired result
{
"success": true,
"data": [
{
"_id": "5e95b4b49d3e303d667a8b71",
"groupId": "5e8c5caa75b1cd342a1175eb",
"comments": [
{
"_id": "5e95b4b49d3e303d667a8b70",
"parentCommentId": null,
"text": "Hello parent 1",
"childs": {
{
"_id": "5e95b51080ae1244693aa858",
"parentCommentId": "5e95b4b49d3e303d667a8b70",
"text": "Hello child 1 parent 1",
},
{
"_id": "5e95b51d80ae1244693aa85a",
"parentCommentId": "5e95b4b49d3e303d667a8b70",
"text": "Hello child 2 parent 1",
},
}
},
{
"_id": "5e95b4ef80ae1244693aa856",
"parentCommentId": null,
"text": "Hello parent 2",
"childs": {
{
"_id": "5e95b53580ae1244693aa85c",
"parentCommentId": "5e95b4ef80ae1244693aa856",
"text": "Hello child 1 parent 2",
}
}
},
],
}
]
}
How can i modify my query ? thank you.
You can use below aggregation
db.walls.aggregate([
{ "$lookup": {
"from": "comments",
"let": { "commentIds": "$comments.commentId" },
"pipeline": [
{ "$match": {
"$expr": { "$in": ["$_id", "$$commentIds"] },
"parentCommentId": null
}},
{ "$sort": { "text": -1 }},
{ "$graphLookup": {
"from": "comments",
"startWith": "$_id",
"connectFromField": "parentCommentId",
"connectToField": "parentCommentId",
"as": "childs"
}}
],
"as": "comments"
}}
])
MongoPlayground

Elasticsearch filtering nested object

I have a simplified object that looks something like this:
"name" : "Partner Name",
"features" : [
{
"val" : "Family",
"key" : "Type"
},
{
"val" : "Paris",
"key" : "City"
}
],
"variants" : [
{
"name" : "Activity 1 Name",
"description" : "Quick description",
"price" : 20
}
]
I want to filter by the City and Type keys. My current query filters by price but I can't get it working for City or Type. Adding more terms to the filter array didn't do the trick.
'query':{
'filtered':{
'query':{
'query_string':{
'query':query
}
},
'filter': {
'bool':{
'filter': [{
'range': {
'variants.price': {
'gte': 0
}
}
},
{
'range': {
'variants.price': {
'lte': 50
}
}
},
{
'term': {
'active': true
}
}
]
}
}
}
}
Any help would be appreciated. Thanks!
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"filter": [
{
"range": {
"variants.price": {
"gte": 0
}
}
},
{
"range": {
"variants.price": {
"lte": 50
}
}
},
{
"nested": {
"path": "features",
"query": {
"bool": {
"should": [
{"term":{"features.key":"type"}},
{"term":{"features.key":"city"}}
]
}
}
}
}
]
}
}
}
}
}

How to unwind single document and then process into single result in an array

I have had some great help from #Joao and #Blakes Seven in order to get me as far as I've got. Awesome, thanks guys very much.
What I have a problem with is going from my original simple example and applying it to my real life scenario.
Where I've got to is two separate scripts which work perfectly by themselves but when I try to being the two together, it then only does the first part without applying the second part; it's my lack of experience in Mongo letting me down here.
So, I am able to get a name value pair from the first array set within a document using the following code:
db.raw_originBusinessData.aggregate([
{ "$match": {objectOriginAPI : "Profit & Loss"}}
,{ "$unwind": "$objectRawOriginData.Reports" }
,{ "$unwind": "$objectRawOriginData.Reports.Rows" }
,{ "$unwind": "$objectRawOriginData.Reports.Rows.Rows" }
,{ "$group": {"_id": "$_id","first": { "$first": "$objectRawOriginData.Reports.Rows.Rows.Cells.Value" }
, "temp": { "$push": "$objectRawOriginData.Reports.Rows.Rows.Cells.Value" }
}},
{ "$unwind": "$temp" }
,{"$skip":1}
,{ "$group": {"_id": "$_id", "AccountBalance":{ "$first": "$first" }
}}
])
This gives me the result below, which I'm happy with except for the fact I am not able to name the two values in the Account Balances array.
Mission 1: I want AccountBalance to be an array and the first position has two values: "AccountName" : "Sales", "AccountValue" : 5428.04.
{
"result" : [
{
"_id" : ObjectId("564d12da1506995581569428"),
"AccountBalance" : [
"Sales",
"5428.64"
]
}
],
"ok" : 1.0000000000000000
}
The second part of the problem is that it is only processing one set of values whereas, the document I am processing on has 9 sets to do. I have run the following $unwind on the document and it has perfectly split them into 9 results:
db.raw_originBusinessData.aggregate([
// find document
{ "$match": {objectOriginAPI : "Profit & Loss"}}
// unwind into multiple documents
,{ "$unwind": "$objectRawOriginData.Reports" }
,{ "$unwind": "$objectRawOriginData.Reports.Rows" }
,{ "$unwind": "$objectRawOriginData.Reports.Rows.Rows" }
])
So I get 9 results perfect. Mission 2: What I want to do is to combine this with the script I showed above. I tried the following but it does absolutely no more than the script just above.
db.raw_originBusinessData.aggregate([
// find document
{ "$match": {objectOriginAPI : "Profit & Loss"}}
// unwind into multiple documents
,{ "$unwind": "$objectRawOriginData.Reports" }
,{ "$unwind": "$objectRawOriginData.Reports.Rows" }
,{ "$unwind": "$objectRawOriginData.Reports.Rows.Rows" }
]
// process each document
,{ "$unwind": "$objectRawOriginData.Reports" }
, {"$unwind": "$objectRawOriginData.Reports.Rows" }
, {"$unwind": "$objectRawOriginData.Reports.Rows.Rows" }
, {"$group": {"_id": "$_id","first": { "$first": "$objectRawOriginData.Reports.Rows.Rows.Cells.Value" }
, "a": { "$push": "$objectRawOriginData.Reports.Rows.Rows.Cells.Value" }
}},
{ "$unwind": "$a" }
,{"$skip":1}
,{ "$group": {"_id": "$_id", "AccountBalance":{ "$first": "$first" }
}}
)
Desired Outcome
What I want to get is the following. This is combination of mission 1 and 2.
{
"result" : [
{
"_id" : ObjectId("564d12da1506995581569428"),
"AccountBalance" : [
{"AccountName" : "Sales",
"AccountValue" : "5428.64"},
{"AccountName" : "Total Income",
"AccountValue" : "5428.64"},
{"AccountName" : "Cost of Sales",
"AccountValue" : "100.00"},
{"AccountName" : "Total Cost of Sales",
"AccountValue" : "100.00"},
{"AccountName" : "Gross Profit",
"AccountValue" : "5328.64"},
{"AccountName" : "Advertising",
"AccountValue" : "100.00"},
{"AccountName" : "General Expenses",
"AccountValue" : "100.00"},
{"AccountName" : "Total Operating Expenses",
"AccountValue" : "200.00"},
{"AccountName" : "Net Profit",
"AccountValue" : "5128.64"}
]
}
],
"ok" : 1.0000000000000000
}
The document that I am using as a source is from Xero API - it's one of their reports. It has the same pattern as most reports exported in JSON like Oracle. Below is the actual report so that you have it for reference.
Thanks a million guys, very much appreciated!
{
"_id" : ObjectId("564d12da1506995581569428"),
"objectClass" : "Origin Data",
"objectCategory" : "Application",
"objectType" : "Customer",
"connection_id" : "562033dfca91840cd0c7c54f",
"connectionName" : "Building Accounts",
"entity_id" : "564149bcca9183a8d0c7c83c",
"objectCreationDate" : "2015-11-19 14:43:40",
"objectCycleID" : "12345678",
"objectStatus" : "PROCESSED",
"objectOrigin" : "Xero",
"objectOriginAPI" : "Profit & Loss",
"objectOriginService" : "Xero API - Profit & Loss v 1.0.0.8",
"objectRawOriginData" : {
"Id" : "d6e7fb37-9f2e-45ae-b0a4-de62aa95a783",
"Status" : "OK",
"ProviderName" : "Xero API Previewer",
"DateTimeUTC" : "/Date(1443405874333)/",
"Reports" : [
{
"ReportID" : "ProfitAndLoss",
"ReportName" : "Profit and Loss",
"ReportType" : "ProfitAndLoss",
"ReportTitles" : [
"Profit & Loss",
"Paddy's markets",
"28 September 2014 to 28 September 2015"
],
"ReportDate" : "28 September 2015",
"UpdatedDateUTC" : "/Date(1443405874333)/",
"Fields" : [],
"Rows" : [
{
"RowType" : "Header",
"Cells" : [
{
"Value" : ""
},
{
"Value" : "28 Sep 15"
}
]
},
{
"RowType" : "Section",
"Title" : "Income",
"Rows" : [
{
"RowType" : "Row",
"Cells" : [
{
"Value" : "Sales",
"Attributes" : [
{
"Value" : "a7e3f9e4-6f63-4b25-ae36-107131e8b9be",
"Id" : "account"
}
]
},
{
"Value" : "5428.64",
"Attributes" : [
{
"Value" : "a7e3f9e4-6f63-4b25-ae36-107131e8b9be",
"Id" : "account"
}
]
}
]
},
{
"RowType" : "SummaryRow",
"Cells" : [
{
"Value" : "Total Income"
},
{
"Value" : "5428.64"
}
]
}
]
},
{
"RowType" : "Section",
"Title" : "Less Cost of Sales",
"Rows" : [
{
"RowType" : "Row",
"Cells" : [
{
"Value" : "Cost of Sales",
"Attributes" : [
{
"Value" : "f78f7118-ad98-4862-a63c-39dd3c5ace8a",
"Id" : "account"
}
]
},
{
"Value" : "100.00",
"Attributes" : [
{
"Value" : "f78f7118-ad98-4862-a63c-39dd3c5ace8a",
"Id" : "account"
}
]
}
]
},
{
"RowType" : "SummaryRow",
"Cells" : [
{
"Value" : "Total Cost of Sales"
},
{
"Value" : "100.00"
}
]
}
]
},
{
"RowType" : "Section",
"Title" : "",
"Rows" : [
{
"RowType" : "Row",
"Cells" : [
{
"Value" : "Gross Profit"
},
{
"Value" : "5328.64"
}
]
}
]
},
{
"RowType" : "Section",
"Title" : "Less Operating Expenses",
"Rows" : [
{
"RowType" : "Row",
"Cells" : [
{
"Value" : "Advertising",
"Attributes" : [
{
"Value" : "67466588-132b-48ce-b897-0ceabffd7a9d",
"Id" : "account"
}
]
},
{
"Value" : "100.00",
"Attributes" : [
{
"Value" : "67466588-132b-48ce-b897-0ceabffd7a9d",
"Id" : "account"
}
]
}
]
},
{
"RowType" : "Row",
"Cells" : [
{
"Value" : "General Expenses",
"Attributes" : [
{
"Value" : "fdb25d7a-2fc8-406a-bf4b-6b4e8014b8cb",
"Id" : "account"
}
]
},
{
"Value" : "100.00",
"Attributes" : [
{
"Value" : "fdb25d7a-2fc8-406a-bf4b-6b4e8014b8cb",
"Id" : "account"
}
]
}
]
},
{
"RowType" : "SummaryRow",
"Cells" : [
{
"Value" : "Total Operating Expenses"
},
{
"Value" : "200.00"
}
]
}
]
},
{
"RowType" : "Section",
"Title" : "",
"Rows" : [
{
"RowType" : "Row",
"Cells" : [
{
"Value" : "Net Profit"
},
{
"Value" : "5128.64"
}
]
}
]
}
]
}
]
}
}
done it, see below, thanks:
db.raw_originBusinessData.aggregate([
{ "$match": {objectOriginAPI : "Profit & Loss"}}
,{ "$unwind": "$objectRawOriginData.Reports" }
,{ "$unwind": "$objectRawOriginData.Reports.Rows" }
,{ "$unwind": "$objectRawOriginData.Reports.Rows.Rows" }
,{ "$group": {"_id": "$_id","accountBalances": { "$push": "$objectRawOriginData.Reports.Rows.Rows.Cells.Value" }
}},
])

How can I get all the documents which have at least the properties specified in an Elasticsearch query?

It is possible to select an item from the index which match multiple values for a certain sub-item? I think this is not so clear but I added more details below.
I have the following index:
{
"mappings" : {
"entity" : {
"properties" : {
"name" : {"type" : "string"},
"features" : {
"type" : "nested",
"include_in_parent" : false,
"properties" : {
"id" : {"type" : "integer"},
"value_int" : {"type" : "integer"},
"value_text" : {"type" : "string"},
"value_decimal" : {"type" : "integer"}
}
}
}
}
},
"settings" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
}
}
Some items from the index
{
"name" : "Bazar",
"features" : [
{
"id" : 1,
"value_text" : null,
"value_decimal" : null,
"value_int": 51
},
{
"id" : 9,
"value_text" : "Amsterdam",
"value_decimal" : null,
"value_int": null
}
]
}
{
"name" : "Bazar Test",
"features" : [
{
"id" : 1,
"value_text" : null,
"value_decimal" : null,
"value_int": 52
},
{
"id" : 9,
"value_text" : "Leiden",
"value_decimal" : null,
"value_int": null
}
]
}
{
"name" : "Bazar no city",
"features" : [
{
"id" : 1,
"value_text" : null,
"value_decimal" : null,
"value_int": 51
},
]
}
What I need is a way to find just the items which have the features.id = 1 and features.id = 2 (ex: "Bazar" and "Bazar Test" items).
The query I got some far is
{
"query" : {
"nested" : {
"path" : "features",
"query" : {
"bool" : {
"must" : [
{ "terms" : { "features.id" : [1, 9]} }
]
}
}
}
}
}
The problem with this query is that it selects the items which have features.id = 1 OR features.id = 9 so all the items are returned.
Edit
Tried a new query
{
"query" : {
"nested" : {
"path" : "features",
"query" : {
"bool" : {
"must" : [
{ "terms" : {
"features.id" : [1, 9],
"minimum_should_match": 2
}
}
]
}
}
}
}
}
But I got no results.
Edit:
After I combined the answers, I managed to get it working.
Thank you for help :)
This is my query (a bit modified)
{
"from": 0,
"size": 20,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"match_phrase_prefix": {
"title": {
"query": "deli",
"max_expansions": 5
}
}
},
{
"match": {
"entity_type_id": 5
}
}
]
}
},
"filter": {
"and": {
"filters": [
{
"nested": {
"path": "features",
"query": {
"bool": {
"must": [
{
"match": {
"features.id": 31
}
},
{
"match": {
"features.value_int": {
"query": [
56, 57
],
"operator": "and"
}
}
}
]
}
}
}
}
]
}
}
}
}
}
Thank you.
The match query supports a Boolean operator parameter. You should also wrap the query in a nested query, as the features field is nested in your mapping.
Try this query:
{
"query": {
"nested": {
"query": {
"match": {
"features.id": {
"query": "1 9",
"operator": "and"
}
}
},
"path": "features"
}
}
}
Nested documents are more difficult to query. This should be what you want:
{
"query": {
"filtered": {
"filter": {
"and": {
"filters": [
{
"nested": {
"path": "features",
"query": {
"term": {
"features.id": {
"value": "1"
}
}
}
}
},
{
"nested": {
"path": "features",
"query": {
"term": {
"features.id": {
"value": "9"
}
}
}
}
}
]
}
}
}
}
}

Elasticsearch aggregation

I'm looking how to merge elasticsearch hits into one. Basicly I want to do the equivalent of
SELECT COUNT(*) WHERE TOTO = 1 AND TATA = 2 AND TITI = 3
I've manage to deal with the WHERE part using filters but I get all the hits in separate entities
so what I have is
SELECT TOTO, TATA, TITI WHERE TOTO = 1 AND TATA =2 AND TATA =3
How do I manage to get only one hit containing the COUNT(*) value ?
Environment
{
"my_element": {
"mappings": {
"test": {
"properties": {
"baskets": {
"type": "nested",
"properties": {
"basket_id": {
"type": "string"
},
"num_basket": {
"type": "integer"
},
"tp_basket": {
"type": "string"
}
}
},
"test_id": {
"type": "string"
},
"test_name": {
"type": "string"
}
}
}
}
}
}
so I want to count how many test element have a basket_id of X and a num_basket of 3 (if they do that means baskets are Identical, so I want also to show baskets field)
{
"fields": [
"bucket_list",
"baskets.basket_id",
"baskets.num_basket"
],
"query": {
"filtered": {
"filter": {
"and": {
"filter": [
{
"nested": {
"path": "baskets",
"filter": {
"and": {
"filters": [
{
"or": {
"filters": [
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "40"
}
},
{
"term": {
"baskets.num_basket": "1"
}
},
{
"term": {
"baskets.tp_basket": "1"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "41"
}
},
{
"term": {
"baskets.num_basket": "1"
}
},
{
"term": {
"baskets.tp_basket": "1"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "342"
}
},
{
"term": {
"baskets.num_basket": "1"
}
},
{
"term": {
"baskets.tp_basket": "1"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "342"
}
},
{
"term": {
"baskets.num_basket": "1"
}
},
{
"term": {
"baskets.tp_basket": "1"
}
}
]
}
}
]
}
}
]
}
}
}
},
{
"nested": {
"path": "baskets",
"filter": {
"and": {
"filters": [
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "15"
}
},
{
"term": {
"baskets.num_basket": "2"
}
}
]
}
}
]
}
}
}
},
{
"nested": {
"path": "baskets",
"filter": {
"and": {
"filters": [
{
"bool": {
"must": [
{
"term": {
"baskets.basket_id": "15"
}
},
{
"term": {
"baskets.num_basket": "3"
}
}
]
}
}
]
}
}
}
}
]
}
}
}
}
}
and result
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 2,
"successful": 2,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "my_element",
"_type": "test",
"_id": "sMHPv3i4RTqNCIChGh4Iew",
"_score": 1,
"fields": {
"baskets.basket_id": [
"15",
"15",
"15"
],
"baskets.num_basket": [
2,
3,
1
]
}
},
{
"_index": "my_element",
"_type": "test",
"_id": "KL3U-g-7RtuusNV8hi9YHQ",
"_score": 1,
"fields": {
"baskets.basket_id": [
"15",
"15",
"15"
],
"baskets.num_basket": [
1,
2,
3
]
}
}
]
}
}
Probably I am answering really late, but for those who are looking for the answer. This query can be written in very simple way, as follows.
GET _count
{
"query":
{
"bool" : {
"must" : [ {
"term" : {
"basket_id" : "X"
}
}, {
"term" : {
"num_basket" : 3
}
} ]
}
}
}
The above query will give you following result.
{
"count": 6,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
}
}
You can read the count from above response, which is your desired result.
Hope this was helpful.
When the search results are returned from Elasticsearch you should see a total field - this has the count of matching documents.
for example, see "total" : 2, below:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 12,
"successful" : 12,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 2.098612,
"hits" : [ {
"_index" : "mytest",
"_type" : "message",
"_id" : "P9wGgJHjQmK8GUvw8M5Q8A",
"_score" : 2.098612,
"fields" : {
"tata" : [ "1" ],
"toto" : [ "1" ],
"titi" : [ "2" ]
}
}, {
"_index" : "mytest",
"_type" : "message",
"_id" : "M26ychoyRR6HkordRdS_HA",
"_score" : 0.30685282,
"fields" : {
"tata" : [ "1" ],
"toto" : [ "1" ],
"titi" : [ "2" ]
}
} ]
}
}