Find the average value in MongoDB from JSON

Find the average value in MongoDB from JSON - json

In my MongoDB (export from JSON file) I have database "dab" with structure like this:
id:"1"
datetime:"2020-05-08 5:09:56"
name:"namea"
lat:55.826738
lon:45.0423412
analysis:"[{"0":0.36965591924860347},{"5":0.10391287134268598},{"10":0.086884394..."
I'm using that db for spark analysis via MongoDB-Spark Connector.
My problem is field "analysis" - I need average result for all values from every interval ("0", "5", "10", ..., "1000"), so I have to sum 0.36965591924860347 + 0.10391287134268598 + 0.086884394 + ... and divide by number of intervals (I have 200 intervals in every column), and finally multiply the result by 100.

My solution would be this one:
db.collection.aggregate([
{
$set: {
analysis: {
$map: {
input: "$analysis",
in: { $objectToArray: "$$this" }
}
}
}
},
{
$set: {
analysis: {
$map: {
input: "$analysis",
in: { $first: "$$this.v" }
}
}
}
},
{ $set: { average: { $multiply: [ { $avg: "$analysis" }, 100 ] } } }
])
Mongo playground

You can use $reduce on that array,sum the values,and then divide with the number of elements and then multiply with 100
db.collection.aggregate([
{
"$addFields": {
"average": {
"$multiply": [
{
"$divide": [
{
"$reduce": {
"input": "$analysis",
"initialValue": 0,
"in": {
"$let": {
"vars": {
"sum": "$$value",
"data": "$$this"
},
"in": {
"$add": [
"$$sum",
{
"$arrayElemAt": [
{
"$arrayElemAt": [
{
"$map": {
"input": {
"$objectToArray": "$$data"
},
"as": "m",
"in": [
"$$m.k",
"$$m.v"
]
}
},
0
]
},
1
]
}
]
}
}
}
}
},
{
"$size": "$analysis"
}
]
},
100
]
}
}
}
])
You can test the code here
But this code has 1 problem, you save data in documents, and MongoDB
doesn't have a function like get(document,$$k), the new MongoDB v5.0 has a $getField but still accepts only constants no variables.
I mean we cant do in your case getField(doc,"5").
So we have the cost of converting each document to an array.

Related

JMESPATH query expression for matching Value in an Array

I have the following JSON
{
“Record”: [
{
“FirstName": “John”,
“LastName”: “Smith”,
“City”: “Chicago”,
“Possessions”: [
{
“Item”: “TV”
},
{
“Item”: “XBOX-S”
},
{
“Item”: “DVR”
},
{
“Item”: “Setup Box”
}
]
},
{
“FirstName": “Jane”,
“LastName”: “Doe”,
“City”: “Seattle”,
“Possessions”: [
{
“Item”: “DVR”
},
{
“Item”: “PS5”
},
{
“Item”: “FireStick”
}
]
},
{
“FirstName": “Jane”,
“LastName”: “Lee”,
“City”: “Dallas”,
“Possessions”: [
{
“Item”: “TV”
},
{
“Item”: “PS5”
},
{
“Item”: “FireStick”
}
]
}
]
}
How do I get a table of First Name and Last Name by matching a particular possession ("TV") using JMESPATH query?
I need to match the Value in the Array Possessions for each entry in the Record
I have tried using the following query, but it does not work,
Record[?contains(Possessions[].Item, `TV`) == `true`]
anyone knows how this request will work or point me in the right direction, it would be greatly appreciated.

How to do custom window function on JSON object with pandas?

I have a rather nested JSON object below, and I am trying to calculate the user (ie 'profileId') with the most events (ie length of 'parameters' key.
I have the code below to get the length of the parameter, but I am trying to now have that calculation be correct for each record, as they way I have it set now would set it the same value for each record - I looked into pandas window functions https://pandas.pydata.org/docs/user_guide/window.html but am having trouble getting to the correct outcome.
response = response.json()
df = pd.json_normalize(response['items'])
df['calcfield'] = len(df["events"].iloc[0][0].get('parameters'))
the output of df['arrayfield'] is below:
[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"admin"
},
{
"name":"method_name",
"value":"directory.users.list"
},
{
"name":"client_id",
"value":"722230783769-dsta4bi9fkom72qcu0t34aj3qpcoqloq.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"7158"
},
{
"name":"product_bucket",
"value":"GSUITE_ADMIN"
},
{
"name":"app_name",
"value":"Untitled project"
},
{
"name":"client_type",
"value":"WEB"
}
]
}
] }, {
"kind":"admin#reports#activity",
"id":{
"time":"2022-05-05T23:58:48.914Z",
"uniqueQualifier":"-4002873813067783265",
"applicationName":"token",
"customerId":"C02f6wppb"
},
"etag":"\"5T53xK7dpLei95RNoKZd9uz5Xb8LJpBJb72fi2HaNYM/9DTdB8t7uixvUbjo4LUEg53_gf0\"",
"actor":{
"email":"nancy.admin#hyenacapital.net",
"profileId":"100230688039070881323"
},
"ipAddress":"54.80.168.30",
"events":[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"gmail"
},
{
"name":"method_name",
"value":"gmail.users.messages.list"
},
{
"name":"client_id",
"value":"927538837578.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"2"
},
{
"name":"product_bucket",
"value":"GMAIL"
},
{
"name":"app_name",
"value":"Zapier"
},
{
"name":"client_type",
"value":"WEB"
}
]
ORIGINAL JSON BLOB I READ IN
{
"kind":"admin#reports#activities",
"etag":"\"5g8\"",
"nextPageToken":"A:1651795128914034:-4002873813067783265:151219070090:C02f6wppb",
"items":[
{
"kind":"admin#reports#activity",
"id":{
"time":"2022-05-05T23:59:39.421Z",
"uniqueQualifier":"5526793068617678141",
"applicationName":"token",
"customerId":"cds"
},
"etag":"\"jkYcURYoi8\"",
"actor":{
"email":"blah#blah.net",
"profileId":"1323"
},
"ipAddress":"107.178.193.87",
"events":[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"admin"
},
{
"name":"method_name",
"value":"directory.users.list"
},
{
"name":"client_id",
"value":"722230783769-dsta4bi9fkom72qcu0t34aj3qpcoqloq.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"7158"
},
{
"name":"product_bucket",
"value":"GSUITE_ADMIN"
},
{
"name":"app_name",
"value":"Untitled project"
},
{
"name":"client_type",
"value":"WEB"
}
]
}
]
},
{
"kind":"admin#reports#activity",
"id":{
"time":"2022-05-05T23:58:48.914Z",
"uniqueQualifier":"-4002873813067783265",
"applicationName":"token",
"customerId":"df"
},
"etag":"\"5T53xK7dpLei95RNoKZd9uz5Xb8LJpBJb72fi2HaNYM/9DTdB8t7uixvUbjo4LUEg53_gf0\"",
"actor":{
"email":"blah.blah#bebe.net",
"profileId":"1324"
},
"ipAddress":"54.80.168.30",
"events":[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"gmail"
},
{
"name":"method_name",
"value":"gmail.users.messages.list"
},
{
"name":"client_id",
"value":"927538837578.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"2"
},
{
"name":"product_bucket",
"value":"GMAIL"
},
{
"name":"client_type",
"value":"WEB"
}
]
}
]
}
]
}

Use:
df.groupby('actor.profileId')['events'].apply(lambda x: [len(x.iloc[i][0]['parameters']) for i in range(len(x))])
which returns the list of each profileid count of parameters. Output and the sample data:
actor.profileId
1323 [7]
1324 [7]
Name: events, dtype: object

It's not entirely clear what you asking and df['arrayfield'] isn't in your example provided. However, if you look at the events column after json_normalize, you can use the following line to pull out the length of each parameters key. The blob you gave as an example was set to response...
df = pd.json_normalize(response['items'])
df['calcfield'] = df['events'].str[0].str.get('parameters').str.len()
Becauase each parameters key has 7 elements, it's tough to say this is what you really want.

Gatsby graphql filter statement is not working as expected

I have a query (based on JSON input) defined in Gatsbys GraphiQL IDE which looks this trying to return only those items which have a value of less than 10 in playtime_forever:
allContentJson(
filter: {response: {games: {elemMatch: {playtime_forever: {lt: 10}}}}}
) {
edges {
node {
response {
games {
playtime_forever
}
}
}
}
}
}
However running this query I also receive those which are greater than 10:
{
"data": {
"allContentJson": {
"edges": [
{
"node": {
"response": {
"games": [
{
"playtime_forever": 0
},
{
"playtime_forever": 93
},
{
"playtime_forever": 5302
}
]
},
}
}
]
}
},
"extensions": {}
}
I have this feeling this is how elemMatch is supposed to work or is there any other approach (possibly by defining types?)

Removing a specific attribute in an array of nested documents

Excuse my English, I'm from Russia.
I asked this question in the Russian version SO, but they still haven't answered it.
There is a record collection that stores archival files. Here is its simplified structure (I omitted most of the attributes):
{
"_id": 1,
"tomes": [
{
"number":1,
"archive_number":1
},
{
"number":2,
"archive_number":1
}
]
}
{
"_id": 2,
"tomes": [
{
"number":1,
"archive_number":1
},
{
"number":2,
"archive_number":1
},
{
"number":3,
"archive_number":1
}
]
}
I need to remove the archive_number attribute from each of the nested documents of the tomes array for all documents in the record collection.
After deletion, the structure should look like this:
{
"_id": 1,
"tomes": [
{
"number":1,
},
{
"number":2,
}
]
}
{
"_id": 2,
"tomes": [
{
"number":1,
},
{
"number":2,
},
{
"number":3,
}
]
}
I was able to write a query like this:
db.record.update(
{
"tomes": {
$elemMatch:{
"archive_number":{$exists:true}
}
}
},
{
$unset: {
"tomes.$.archive_number":1
}
},
false, true
)
But this query only removes the archive_number attribute on one volume per archive case. I.e., after launch, we will see the following picture:
{
"_id": 1,
"tomes": [
{
"number":1,
},
{
"number":2,
"archive_number":1
}
]
}
{
"_id": 2,
"tomes": [
{
"number":1,
},
{
"number":2,
"archive_number":1
},
{
"number":3,
"archive_number":1
}
]
}
Can you please tell me how to delete all volumes? I don’t know how to correct the request, but my head doesn’t understand anymore.

Solution 1
With $[<indentifier>] (filtered positional operator) and arrayFilters to update the document(s) in the array.
db.collection.update({
"tomes": {
$elemMatch: {
"archive_number": {
$exists: true
}
}
}
},
{
$unset: {
"tomes.$[tome].archive_number": 1
}
},
{
arrayFilters: [
{
"tome.archive_number": {
$exists: true
}
}
],
multi: true
})
Sample Mongo Playground (Solution 1)
Solution 2
With $[] (all positional operator).
The all positional operator $[] indicates that the update operator should modify all elements in the specified array field.
db.collection.update({
"tomes": {
$elemMatch: {
"archive_number": {
$exists: true
}
}
}
},
{
$unset: {
"tomes.$[].archive_number": 1
}
},
{
multi: true
})
Sample Mongo Playground (Solution 2)
References
How the arrayFilters Parameter Works in MongoDB

Presto Json Parsing

I have a json field(attached sample) and i need to extract the values in ProvisioningSystem path but it works only if i hardcode the array location.How can i extract the value without hardcoding ?Thanks in Advance!
Code:
TRANSFORM(CAST(JSON_EXTRACT(order_json, '$.Order.Accounts.Account') AS ARRAY), x -> JSON_EXTRACT_SCALAR(x,'$.ProvisioningSystems.ProvisioningSystem[1].SystemName'))
Json:
{
"Order":
{
"Accounts": {
"Account": [
{
"ProvisioningSystems": {},
},
{
"ProvisioningSystems": {
"ProvisioningSystem": [
{
"SystemOrderRef": "12345",
"SystemName": "Testsystem",
"SystemOrderRefType": "Provision"
}
]
},
}
]
},
}
}
}

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

Find the average value in MongoDB from JSON - json

Related

JMESPATH query expression for matching Value in an Array

How to do custom window function on JSON object with pandas?

Gatsby graphql filter statement is not working as expected

Removing a specific attribute in an array of nested documents

Presto Json Parsing

Categories

Resources