I have a rather nested JSON object below, and I am trying to calculate the user (ie 'profileId') with the most events (ie length of 'parameters' key.
I have the code below to get the length of the parameter, but I am trying to now have that calculation be correct for each record, as they way I have it set now would set it the same value for each record - I looked into pandas window functions https://pandas.pydata.org/docs/user_guide/window.html but am having trouble getting to the correct outcome.
response = response.json()
df = pd.json_normalize(response['items'])
df['calcfield'] = len(df["events"].iloc[0][0].get('parameters'))
the output of df['arrayfield'] is below:
[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"admin"
},
{
"name":"method_name",
"value":"directory.users.list"
},
{
"name":"client_id",
"value":"722230783769-dsta4bi9fkom72qcu0t34aj3qpcoqloq.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"7158"
},
{
"name":"product_bucket",
"value":"GSUITE_ADMIN"
},
{
"name":"app_name",
"value":"Untitled project"
},
{
"name":"client_type",
"value":"WEB"
}
]
}
] }, {
"kind":"admin#reports#activity",
"id":{
"time":"2022-05-05T23:58:48.914Z",
"uniqueQualifier":"-4002873813067783265",
"applicationName":"token",
"customerId":"C02f6wppb"
},
"etag":"\"5T53xK7dpLei95RNoKZd9uz5Xb8LJpBJb72fi2HaNYM/9DTdB8t7uixvUbjo4LUEg53_gf0\"",
"actor":{
"email":"nancy.admin#hyenacapital.net",
"profileId":"100230688039070881323"
},
"ipAddress":"54.80.168.30",
"events":[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"gmail"
},
{
"name":"method_name",
"value":"gmail.users.messages.list"
},
{
"name":"client_id",
"value":"927538837578.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"2"
},
{
"name":"product_bucket",
"value":"GMAIL"
},
{
"name":"app_name",
"value":"Zapier"
},
{
"name":"client_type",
"value":"WEB"
}
]
ORIGINAL JSON BLOB I READ IN
{
"kind":"admin#reports#activities",
"etag":"\"5g8\"",
"nextPageToken":"A:1651795128914034:-4002873813067783265:151219070090:C02f6wppb",
"items":[
{
"kind":"admin#reports#activity",
"id":{
"time":"2022-05-05T23:59:39.421Z",
"uniqueQualifier":"5526793068617678141",
"applicationName":"token",
"customerId":"cds"
},
"etag":"\"jkYcURYoi8\"",
"actor":{
"email":"blah#blah.net",
"profileId":"1323"
},
"ipAddress":"107.178.193.87",
"events":[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"admin"
},
{
"name":"method_name",
"value":"directory.users.list"
},
{
"name":"client_id",
"value":"722230783769-dsta4bi9fkom72qcu0t34aj3qpcoqloq.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"7158"
},
{
"name":"product_bucket",
"value":"GSUITE_ADMIN"
},
{
"name":"app_name",
"value":"Untitled project"
},
{
"name":"client_type",
"value":"WEB"
}
]
}
]
},
{
"kind":"admin#reports#activity",
"id":{
"time":"2022-05-05T23:58:48.914Z",
"uniqueQualifier":"-4002873813067783265",
"applicationName":"token",
"customerId":"df"
},
"etag":"\"5T53xK7dpLei95RNoKZd9uz5Xb8LJpBJb72fi2HaNYM/9DTdB8t7uixvUbjo4LUEg53_gf0\"",
"actor":{
"email":"blah.blah#bebe.net",
"profileId":"1324"
},
"ipAddress":"54.80.168.30",
"events":[
{
"type":"auth",
"name":"activity",
"parameters":[
{
"name":"api_name",
"value":"gmail"
},
{
"name":"method_name",
"value":"gmail.users.messages.list"
},
{
"name":"client_id",
"value":"927538837578.apps.googleusercontent.com"
},
{
"name":"num_response_bytes",
"intValue":"2"
},
{
"name":"product_bucket",
"value":"GMAIL"
},
{
"name":"client_type",
"value":"WEB"
}
]
}
]
}
]
}
Use:
df.groupby('actor.profileId')['events'].apply(lambda x: [len(x.iloc[i][0]['parameters']) for i in range(len(x))])
which returns the list of each profileid count of parameters. Output and the sample data:
actor.profileId
1323 [7]
1324 [7]
Name: events, dtype: object
It's not entirely clear what you asking and df['arrayfield'] isn't in your example provided. However, if you look at the events column after json_normalize, you can use the following line to pull out the length of each parameters key. The blob you gave as an example was set to response...
df = pd.json_normalize(response['items'])
df['calcfield'] = df['events'].str[0].str.get('parameters').str.len()
Becauase each parameters key has 7 elements, it's tough to say this is what you really want.
In my MongoDB (export from JSON file) I have database "dab" with structure like this:
id:"1"
datetime:"2020-05-08 5:09:56"
name:"namea"
lat:55.826738
lon:45.0423412
analysis:"[{"0":0.36965591924860347},{"5":0.10391287134268598},{"10":0.086884394..."
I'm using that db for spark analysis via MongoDB-Spark Connector.
My problem is field "analysis" - I need average result for all values from every interval ("0", "5", "10", ..., "1000"), so I have to sum 0.36965591924860347 + 0.10391287134268598 + 0.086884394 + ... and divide by number of intervals (I have 200 intervals in every column), and finally multiply the result by 100.
My solution would be this one:
db.collection.aggregate([
{
$set: {
analysis: {
$map: {
input: "$analysis",
in: { $objectToArray: "$$this" }
}
}
}
},
{
$set: {
analysis: {
$map: {
input: "$analysis",
in: { $first: "$$this.v" }
}
}
}
},
{ $set: { average: { $multiply: [ { $avg: "$analysis" }, 100 ] } } }
])
Mongo playground
You can use $reduce on that array,sum the values,and then divide with the number of elements and then multiply with 100
db.collection.aggregate([
{
"$addFields": {
"average": {
"$multiply": [
{
"$divide": [
{
"$reduce": {
"input": "$analysis",
"initialValue": 0,
"in": {
"$let": {
"vars": {
"sum": "$$value",
"data": "$$this"
},
"in": {
"$add": [
"$$sum",
{
"$arrayElemAt": [
{
"$arrayElemAt": [
{
"$map": {
"input": {
"$objectToArray": "$$data"
},
"as": "m",
"in": [
"$$m.k",
"$$m.v"
]
}
},
0
]
},
1
]
}
]
}
}
}
}
},
{
"$size": "$analysis"
}
]
},
100
]
}
}
}
])
You can test the code here
But this code has 1 problem, you save data in documents, and MongoDB
doesn't have a function like get(document,$$k), the new MongoDB v5.0 has a $getField but still accepts only constants no variables.
I mean we cant do in your case getField(doc,"5").
So we have the cost of converting each document to an array.
i have following JAVASCRIPT OBJECT and i need to convert it to primeng tree format , please help
INPUT
{
"com": {
"ups": {
"demo": {
"a": 9
}
}
}
}
OUTPUT expected
[
{
"label": "COM",
"data": "COM",
"children": [{
"label": "ABC",
"data": "abc",
"children": [ "label": "x" data": "x" ,children:[]]
}]
}]
Working Example
validate(a) {
let newArr = [];
for (const key in a) {
if (key) {
newArr.push({data: key, label: key, childern: this.validate(a[key])});
}
}
return newArr;
}
const a = {
"com": {
"ups": {
"demo": {
"a": 9
}
}
}
};
console.log(this.validate(a));
Why is the json code wrong? I know I can have multi key in XML, but it seem that json doesn't allow.
{
"BackupSettings": {
"Setting":
{
"id": "34345"
},
"Setting": {
"id": "16454"
}
}
}
Indeed, keys within an object are required to be unique in JSON. The canonical way of expressing your data in JSON would be to use an array. It could look something like the following:
{
"BackupSettings": {
"Settings": [
{
"id": "34345"
},
{
"id": "16454"
}
]
}
}
Or even:
{
"BackupSettings": [
{
"id": "34345"
},
{
"id": "16454"
}
]
}
I'm trying to retrieve random documents that contain #maga so I did the following query:
{
"_source": "text",
"query": {
"function_score": {
"query": {
"match": {
"text": "#maga"
}
},
"functions": [
{
"random_score": {}
}
]
}
}
}
The problem is some returned document doesn't contain #maga but just the token maga. Why so? And How can I ivercome this problem?