Mongodb insert with multiple conditions - json

I'm having multiple documents in a collection, each document has this data structure :
{
_id: "some object id",
data1: [
{
data2_id : 13233,
data2: [
{
sub_data1: "text1",
sub_data2: "text2",
sub_data3: "text3",
},
{
sub_data1: "text4",
sub_data2: "text5",
sub_data3: "text6",
}
]
},
{
data2_id : 53233,
data2: [
{
sub_data1: "text4",
sub_data2: "text5",
sub_data3: "text6",
}
...
]
},
{
data2_id : 56233,
data2: [
{
sub_data1: "text7",
sub_data2: "text8",
sub_data3: "text9",
}
...
]
},
{
data2_id : 53236,
data2: [
{
sub_data1: "text10",
sub_data2: "text22",
sub_data3: "text33",
}
...
]
}
]
}
I'd like to update to a set of ids that maches some condition, update only the sub object within the document.
I've tries this:
db.collection.update({
"$and": [
{
"_id": {
"$in": [
{
"$id": "54369aca9bc25af3ca8b4568"
},
{
"$id": "54369aca9bc25af3ca8b4562"
}
]
}
},
{
"data1.data2": {
"$elemMatch": {
"sub_data1": "text4",
"sub_data2": "text5"
}
}
}
]
},
{
"data1.data2.$.sub_data3" : "text updated"
}
)
But I get the following error:
Update of data into MongoDB failed: dev.**.com:27017: cannot use the part (data2 of data1.data2.0.sub_data3) to traverse the element...
Any Ideas?

There is an open issue here that imposes a limitation when trying to update elements of an array nested within another array.
Besides, there are some improvements you can do here:
For your query you don't need the $and
db.collection.update(
{
"_id": {
"$in": [
{"$id": "54369aca9bc25af3ca8b4568"},
{"$id": "54369aca9bc25af3ca8b4562"}
]},
"data1.data2": {
"$elemMatch": {
"sub_data1": "text4",
"sub_data2": "text5"
}
},{..update...})
You might want to use $set:
db.collection.update(query,{ $set:{"name": "Mike"} })
Otherwise, you might lose the rest of the data within your document.

Related

How to do custom window function on JSON object with pandas?

I have a rather nested JSON object below, and I am trying to calculate the user (ie 'profileId') with the most events (ie length of 'parameters' key.
I have the code below to get the length of the parameter, but I am trying to now have that calculation be correct for each record, as they way I have it set now would set it the same value for each record - I looked into pandas window functions https://pandas.pydata.org/docs/user_guide/window.html but am having trouble getting to the correct outcome.
response = response.json()
df = pd.json_normalize(response['items'])
df['calcfield'] = len(df["events"].iloc[0][0].get('parameters'))
the output of df['arrayfield'] is below:
[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"admin"
},
{
"name":"method_name",
"value":"directory.users.list"
},
{
"name":"client_id",
"value":"722230783769-dsta4bi9fkom72qcu0t34aj3qpcoqloq.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"7158"
},
{
"name":"product_bucket",
"value":"GSUITE_ADMIN"
},
{
"name":"app_name",
"value":"Untitled project"
},
{
"name":"client_type",
"value":"WEB"
}
]
}
] }, {
"kind":"admin#reports#activity",
"id":{
"time":"2022-05-05T23:58:48.914Z",
"uniqueQualifier":"-4002873813067783265",
"applicationName":"token",
"customerId":"C02f6wppb"
},
"etag":"\"5T53xK7dpLei95RNoKZd9uz5Xb8LJpBJb72fi2HaNYM/9DTdB8t7uixvUbjo4LUEg53_gf0\"",
"actor":{
"email":"nancy.admin#hyenacapital.net",
"profileId":"100230688039070881323"
},
"ipAddress":"54.80.168.30",
"events":[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"gmail"
},
{
"name":"method_name",
"value":"gmail.users.messages.list"
},
{
"name":"client_id",
"value":"927538837578.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"2"
},
{
"name":"product_bucket",
"value":"GMAIL"
},
{
"name":"app_name",
"value":"Zapier"
},
{
"name":"client_type",
"value":"WEB"
}
]
ORIGINAL JSON BLOB I READ IN
{
"kind":"admin#reports#activities",
"etag":"\"5g8\"",
"nextPageToken":"A:1651795128914034:-4002873813067783265:151219070090:C02f6wppb",
"items":[
{
"kind":"admin#reports#activity",
"id":{
"time":"2022-05-05T23:59:39.421Z",
"uniqueQualifier":"5526793068617678141",
"applicationName":"token",
"customerId":"cds"
},
"etag":"\"jkYcURYoi8\"",
"actor":{
"email":"blah#blah.net",
"profileId":"1323"
},
"ipAddress":"107.178.193.87",
"events":[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"admin"
},
{
"name":"method_name",
"value":"directory.users.list"
},
{
"name":"client_id",
"value":"722230783769-dsta4bi9fkom72qcu0t34aj3qpcoqloq.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"7158"
},
{
"name":"product_bucket",
"value":"GSUITE_ADMIN"
},
{
"name":"app_name",
"value":"Untitled project"
},
{
"name":"client_type",
"value":"WEB"
}
]
}
]
},
{
"kind":"admin#reports#activity",
"id":{
"time":"2022-05-05T23:58:48.914Z",
"uniqueQualifier":"-4002873813067783265",
"applicationName":"token",
"customerId":"df"
},
"etag":"\"5T53xK7dpLei95RNoKZd9uz5Xb8LJpBJb72fi2HaNYM/9DTdB8t7uixvUbjo4LUEg53_gf0\"",
"actor":{
"email":"blah.blah#bebe.net",
"profileId":"1324"
},
"ipAddress":"54.80.168.30",
"events":[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"gmail"
},
{
"name":"method_name",
"value":"gmail.users.messages.list"
},
{
"name":"client_id",
"value":"927538837578.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"2"
},
{
"name":"product_bucket",
"value":"GMAIL"
},
{
"name":"client_type",
"value":"WEB"
}
]
}
]
}
]
}
Use:
df.groupby('actor.profileId')['events'].apply(lambda x: [len(x.iloc[i][0]['parameters']) for i in range(len(x))])
which returns the list of each profileid count of parameters. Output and the sample data:
actor.profileId
1323 [7]
1324 [7]
Name: events, dtype: object
It's not entirely clear what you asking and df['arrayfield'] isn't in your example provided. However, if you look at the events column after json_normalize, you can use the following line to pull out the length of each parameters key. The blob you gave as an example was set to response...
df = pd.json_normalize(response['items'])
df['calcfield'] = df['events'].str[0].str.get('parameters').str.len()
Becauase each parameters key has 7 elements, it's tough to say this is what you really want.

Removing a specific attribute in an array of nested documents

Excuse my English, I'm from Russia.
I asked this question in the Russian version SO, but they still haven't answered it.
There is a record collection that stores archival files. Here is its simplified structure (I omitted most of the attributes):
{
"_id": 1,
"tomes": [
{
"number":1,
"archive_number":1
},
{
"number":2,
"archive_number":1
}
]
}
{
"_id": 2,
"tomes": [
{
"number":1,
"archive_number":1
},
{
"number":2,
"archive_number":1
},
{
"number":3,
"archive_number":1
}
]
}
I need to remove the archive_number attribute from each of the nested documents of the tomes array for all documents in the record collection.
After deletion, the structure should look like this:
{
"_id": 1,
"tomes": [
{
"number":1,
},
{
"number":2,
}
]
}
{
"_id": 2,
"tomes": [
{
"number":1,
},
{
"number":2,
},
{
"number":3,
}
]
}
I was able to write a query like this:
db.record.update(
{
"tomes": {
$elemMatch:{
"archive_number":{$exists:true}
}
}
},
{
$unset: {
"tomes.$.archive_number":1
}
},
false, true
)
But this query only removes the archive_number attribute on one volume per archive case. I.e., after launch, we will see the following picture:
{
"_id": 1,
"tomes": [
{
"number":1,
},
{
"number":2,
"archive_number":1
}
]
}
{
"_id": 2,
"tomes": [
{
"number":1,
},
{
"number":2,
"archive_number":1
},
{
"number":3,
"archive_number":1
}
]
}
Can you please tell me how to delete all volumes? I don’t know how to correct the request, but my head doesn’t understand anymore.
Solution 1
With $[<indentifier>] (filtered positional operator) and arrayFilters to update the document(s) in the array.
db.collection.update({
"tomes": {
$elemMatch: {
"archive_number": {
$exists: true
}
}
}
},
{
$unset: {
"tomes.$[tome].archive_number": 1
}
},
{
arrayFilters: [
{
"tome.archive_number": {
$exists: true
}
}
],
multi: true
})
Sample Mongo Playground (Solution 1)
Solution 2
With $[] (all positional operator).
The all positional operator $[] indicates that the update operator should modify all elements in the specified array field.
db.collection.update({
"tomes": {
$elemMatch: {
"archive_number": {
$exists: true
}
}
}
},
{
$unset: {
"tomes.$[].archive_number": 1
}
},
{
multi: true
})
Sample Mongo Playground (Solution 2)
References
How the arrayFilters Parameter Works in MongoDB

Mongodb query on triple nested array of object

I'm having some problem to write a query to return a triple nested value from a document. The documents I'm using are structured like this
{
"areaname": "name1",
"places": [
{
"placename": "place1",
"objects": [
{
"objname": "obj1",
"tags": [
"tag1",
"tag2"
]
},
{
"objname": "obj2",
"tags": [
"tag6",
"tag7"
]
}
]
},
{
"placename": "place2",
"objects": [
{
"objname": "obj45",
"tags": [
"tag46",
"tag34"
]
},
{
"objname": "obj77",
"tags": [
"tag56",
"tag11"
]
}
]
}
]
}
It is quite simple actually but I can't find a solution to a simple query like:
"return the objname of the object that contains tag1 inside their tag"
So for the give document if I use "tag1" as a parameter it is expected for the query to return "obj1"
It should give me the same result if I use "tag2" as a parameter
Other example: using "tag56" it should return only "obj77"
Right now i have no problem returning the whole document using the dot-notation or top level field such as areaname or others
db.users.find( {"places.objects.tags":"tag1"}, { areaname: 1, _id:0 } )
Is this even possible?
Keeping it simple:
[
{
"$match" : {
"places.objects.tags" : "tag1"
}
},
{
"$unwind" : "$places"
},
{
"$unwind" : "$places.objects"
},
{
"$match" : {
"places.objects.tags" : "tag1"
}
},
{
"$group" : {
"_id" : "$_id",
"obj_names" : {
"$push" : "$places.objects.objname"
}
}
}
],
You should add any other fields you want to keep to the group stage,
this can also be done without the double $unwind stage but i choose this for read-ability.

Return selected JSON object from mongo find method

Here is the sample JSON
Sample JSON:
[
{
"_id": "123456789",
"YEAR": "2019",
"VERSION": "2019.Version",
"QUESTION_GROUPS": [
{
"QUESTIONS": [
{
"QUESTION_NAME": "STATE_CODE",
"QUESTION_VALUE": "MH"
},
{
"QUESTION_NAME": "COUNTY_NAME",
"QUESTION_VALUE": "IN"
}
]
},
{
"QUESTIONS": [
{
"QUESTION_NAME": "STATE_CODE",
"QUESTION_VALUE": "UP"
},
{
"QUESTION_NAME": "COUNTY_NAME",
"QUESTION_VALUE": "IN"
}
]
}
]
}
]
Query that am using :
db.collection.find({},
{
"QUESTION_GROUPS.QUESTIONS.QUESTION_NAME": "STATE_CODE"
})
My requirement is retrive all QUESTION_VALUE whose QUESTION_NAME is equals to STATE_CODE.
Thanks in Advance.
If I get you well, What you are trying to do is something like:
db.collection.find(
{
"QUESTION_GROUPS.QUESTIONS.QUESTION_NAME": "STATE_CODE"
},
{
"QUESTION_GROUPS.QUESTIONS.QUESTION_VALUE": 1
})
Attention: you will get ALL the "QUESTION_VALUE" for ANY document which has a QUESTION_GROUPS.QUESTIONS.QUESTION_NAME with that value.
Attention 2: You will get also the _Id. It is by default.
In case you would like to skip those issues, you may need to use Aggregations, and unwind the "QUESTION_GROUPS"-> "QUESTIONS". This way you can skip both the irrelevant results, and the _id field.
It sounds like you want to unwind the arrays and grab only the question values back
Try this
db.collection.aggregate([
{
$unwind: "$QUESTION_GROUPS"
},
{
$unwind: "$QUESTION_GROUPS.QUESTIONS"
},
{
$match: {
"QUESTION_GROUPS.QUESTIONS.QUESTION_NAME": "STATE_CODE"
}
},
{
$project: {
"QUESTION_GROUPS.QUESTIONS.QUESTION_VALUE": 1
}
}
])

Type of "freeplay" (string) is not supported

I have a json file which looks like this
{
"language":[
{
"lang":"English"
},
{
"lang":"Polish"
},
{
"lang":"German"
},
{
"lang":"Swedish"
},
{
"lang":"Dutch"
},
{
"lang":"Finnish"
},
{
"lang":"Turkish"
}
],
"currency":[
{
"curr" : "dollar"
},
{
"curr" : "pound"
},
{
"curr" : "rupees"
},
{
"curr" : "euro"
},
{
"curr" : "euro"
}
],
"gamename":[
{
"gname":"poker"
},
{
"gname":"slot"
}
],
"freeplay": "false"
}
I installed json-server-init globally and then ran watch command which threw the following error
Type of "freeplay" (string) in linkto.json is not supported. Use
objects or arrays of objects.
Can someone help me in understanding what is wrong or what did I do wrong?
From my understanding of json-server, the value of each key must be a valid JSON object, which is not the case for a simple string.
For example, change the value (contents of other keys omitted) to:
{
"language":[
...
],
"currency":[
...
],
"gamename":[
...
],
"freeplay": {
"enabled": "false"
}
}
if you'd like the request to:
http://localhost:3000/freeplay
to return:
{
"enabled": "false"
}