Delete empty and/or null values from a JSON - json

Given the following message:
[{
"Name1": "Value1",
"Name2": [{
"Name2_1": [],
"Name2_2": [{
"Name2_2_1": "Value2_2_1"}]
}, {
"Name2_3": [{
"Name2_3_1": 12300}
],
"Name2_4": [{
"Name2_4_1": "Value2_4_1"}]
}],
"Name3": [{
"Name3_1": "Value3_1"
}]
}, {
"Name1": "Value1",
"Name2": [{
"Name2_1": 1234,
"Name2_2": [{
"Name2_2_1": "Value2_2_1"
}],
"Name2_3": []
}],
"Name3": []
}]
I want to remove all the empty and/or null values using the Groovy programming language.

The following code:
import groovy.json.*
def data = '''\
[
{
"Name1": "Value1",
"Name2": [
{
"Name2_1": [],
"Name2_2": [
{
"Name2_2_1": "Value2_2_1"
}
]
},
{
"Name2_3": [
{
"Name2_3_1": 12300
}
],
"Name2_4": [
{
"Name2_4_1": "Value2_4_1"
}
]
}
],
"Name3": [
{
"Name3_1": "Value3_1"
}
]
},
{
"Name1": "Value1",
"Name2": [
{
"Name2_1": 1234,
"Name2_2": [
{
"Name2_2_1": "Value2_2_1"
}
],
"Name2_3": []
}
],
"Name3": []
}
]'''
def json = new JsonSlurper().parseText(data)
json = recursivelyRemoveEmpties(json)
println(JsonOutput.prettyPrint(JsonOutput.toJson(json)))
def recursivelyRemoveEmpties(item) {
switch(item) {
case Map:
return item.collectEntries { k, v ->
[k, recursivelyRemoveEmpties(v)]
}.findAll { k, v -> v }
case List:
return item.collect {
recursivelyRemoveEmpties(it)
}.findAll { v -> v }
default:
return item
}
}
defines a recursive method recursivelyRemoveEmpties which removes empty and null values from arbitrary depth in the data structure. When executed this prints:
─➤ groovy solution.groovy
[
{
"Name1": "Value1",
"Name2": [
{
"Name2_2": [
{
"Name2_2_1": "Value2_2_1"
}
]
},
{
"Name2_3": [
{
"Name2_3_1": 12300
}
],
"Name2_4": [
{
"Name2_4_1": "Value2_4_1"
}
]
}
],
"Name3": [
{
"Name3_1": "Value3_1"
}
]
},
{
"Name1": "Value1",
"Name2": [
{
"Name2_1": 1234,
"Name2_2": [
{
"Name2_2_1": "Value2_2_1"
}
]
}
]
}
]
Tested on:
─➤ groovy -v
Groovy Version: 3.0.6 JVM: 15.0.2 Vendor: Amazon.com Inc. OS: Linux

Related

how to merge multiple json files with same structure into one json file with same structure (combined all into one with keeping same structure))

I need to merge file1.json file2.json (could be more) into onefile.json.
version is always the same value in all files. however vulnerabilities array and dependency_files array values different but there might be duplicate/which I want to remove if any after the merge
file1.json:
{
"version": "x.x.x",
"vulnerabilities": [
{
"id": "0000"
},
{
"id": "11111"
},
{
"id": "2222"
}
],
"dependency_files": [
{
"name": "name0000"
},
{
"name": "name1111"
},
{
"name": "name2222"
}
]
}
file2.json:
{
"version": "x.x.x",
"vulnerabilities": [
{
"id": "2222"
},
{
"id": "3333"
}
],
"dependency_files": [
{
"name": "name2222"
},
{
"name": "name3333"
}
]
}
onefile.json:
{
"version": "x.x.x",
"vulnerabilities": [
{
"id": "0000"
},
{
"id": "11111"
},
{
"id": "2222"
},
{
"id": "3333"
}
],
"dependency_files": [
{
"name": "name0000"
},
{
"name": "name1111"
},
{
"name": "name2222"
},
{
"name": "name3333"
}
]
}
I tried a lot with no luck
You could have a reduce on all files, initialized with the first, hence no need for the -n option:
jq '
reduce inputs as {$vulnerabilities, $dependency_files} (.;
.vulnerabilities = (.vulnerabilities + $vulnerabilities | unique_by(.id))
| .dependency_files = (.dependency_files + $dependency_files | unique_by(.name))
)
' file*.json
{
"version": "x.x.x",
"vulnerabilities": [
{
"id": "0000"
},
{
"id": "11111"
},
{
"id": "2222"
},
{
"id": "3333"
}
],
"dependency_files": [
{
"name": "name0000"
},
{
"name": "name1111"
},
{
"name": "name2222"
},
{
"name": "name3333"
}
]
}
Demo
Using this python code
import json
def merge_dicts(*dicts):
r = {}
skip = 'version'
for item in dicts:
for key, value in item.items():
if (key == skip):
r[skip] = value
else:
r.setdefault(key, []).extend(value)
unique = []
for obj in r[key]:
if obj not in unique:
unique.append(obj)
r[key] = unique
return r
with open("file1.json") as file_1:
data_1 = json.load(file_1)
with open("file2.json") as file_2:
data_2 = json.load(file_2)
with open('data.json', 'w') as merge_file:
json.dump(merge_dicts(data_1, data_2), merge_file, indent = 4)
Result
{
"version": "x.x.x",
"vulnerabilities": [
{
"id": "0000"
},
{
"id": "11111"
},
{
"id": "2222"
},
{
"id": "3333"
}
],
"dependency_files": [
{
"name": "name0000"
},
{
"name": "name1111"
},
{
"name": "name2222"
},
{
"name": "name3333"
}
]
}
This code is multiple json files support
import os, json
def merge_dicts(*dicts):
r = {}
skip = 'version'
for item in dicts:
for key, value in item.items():
if (key == skip):
r[skip] = value
else:
r.setdefault(key, []).extend(value)
unique = []
for obj in r[key]:
if obj not in unique:
unique.append(obj)
r[key] = unique
return r
json_files = [pos_json for pos_json in os.listdir('./') if pos_json.endswith('.json')]
a = []
print(type(a))
for json_file in json_files:
with open(json_file) as file_item:
read_data = json.load(file_item)
a.append(read_data)
file_item.close()
with open('data.json', 'w') as merge_file:
json.dump(merge_dicts(*tuple(a)), merge_file, indent = 4)

select a particular field from nested subdocuments Couchbase

I have in a couchbase bucket documents having this structure:
"name": {
"grandfather": {
"parent1": {
"child1": [
{
.....
"uid": "value1",
},
{
"uid": "value2",
}
],
},
"parent2": {
"child2"
[
{
"uid": "value3",
}
],
}
}
}
I would need a query that returns
{
{
"uid": "value1",
},
{
"uid": "value2",
}
{
"uid": "value3",
}
}
.. intuitively something like:
select grandfather.*.*.uid from name;
but this one doesn't work. If someone can help, thank you
Across all the documents
WITH doc AS ( { "grandfather": { "parent1": { "child1": [ { "uid": "value1" }, { "uid": "value2" } ],
"child2": [{ "uid": "value3"}]
},
"parent2": { "child1": [ { "uid": "value4" }, { "uid": "value5" } ],
"child2": [{ "uid": "value6"}]
}
}
})
SELECT RAW c1
FROM doc AS a
UNNEST OBJECT_VALUES(a.grandfather) AS p
UNNEST OBJECT_VALUES(p) AS c
UNNEST c AS c1;
One entry per document
WITH doc AS ( { "grandfather": { "parent1": { "child1": [ { "uid": "value1" }, { "uid": "value2" } ],
"child2": [{ "uid": "value3"}]
},
"parent2": { "child1": [ { "uid": "value4" }, { "uid": "value5" } ],
"child2": [{ "uid": "value6"}]
}
}
})
SELECT ARRAY_FLATTEN(ARRAY (ARRAY cv FOR cn:cv IN pv END) FOR pn:pv IN a.grandfather END,3) AS names
FROM doc AS a;

Sort complex JSON object by specific property

How can I sort the given JSON object with property count. I want to sort the entire sub-object. The higher the count value should come on the top an so on.
{
"Resource": [
{
"details": [
{
"value": "3.70"
},
{
"value": "3.09"
}
],
"work": {
"count": 1
}
},
{
"details": [
{
"value": "4"
},
{
"value": "5"
}
],
"work": {
"count": 2
},
{
"details": [
{
"value": "5"
},
{
"value": "5"
}
],
"work": "null"
}
]
}
You can try this example to sort your data:
data = {
"data": {
"Resource": [
{
"details": [{"value": "3.70"}, {"value": "3.09"}],
"work": {"count": 1},
},
{"details": [{"value": "4"}, {"value": "5"}], "work": {"count": 2}},
]
}
}
# sort by 'work'/'count'
data["data"]["Resource"] = sorted(
data["data"]["Resource"], key=lambda r: r["work"]["count"]
)
# sort by 'details'/'value'
for r in data["data"]["Resource"]:
r["details"] = sorted(r["details"], key=lambda k: float(k["value"]))
# pretty print:
import json
print(json.dumps(data, indent=4))
Prints:
{
"data": {
"Resource": [
{
"details": [
{
"value": "3.09"
},
{
"value": "3.70"
}
],
"work": {
"count": 1
}
},
{
"details": [
{
"value": "4"
},
{
"value": "5"
}
],
"work": {
"count": 2
}
}
]
}
}

MongoDB reduce nested

I have a collection of a class that looks something like that:
[
{
"_id": 1,
"title": "dummy title",
"assignments": [
{
"_id": 1,
"name": "a1",
"members": [
{
"_id": 11,
"full_name": "john doe",
"aga": 18
},
{
"_id": 12,
"full_name": "john doe2",
"aga": 18
}
]
}
],
"settings": [
{
"type": "light",
"status": "enabled"
},
{
"type": "flare",
"status": "disabled"
},
{
"type": "toolbar",
"status": "enabled"
}
]
}
]
I have 2 nested documents here "assignments" which have a nested "members"
and "settings". the result i want should look something like that:
{
"_id": 1,
"title": "dummy title",
"assignments": [
{
"_id": 1,
"name": "a1",
"member_ids": [11, 18]
}
],
"active_settings": ["light", "toolbar"]
}
Meaning in each "assignment" I should only return the ids of the members and not the whole member data. and in settings I should only return the settings that are set to "active"
is it possible?
Playground here:
https://mongoplayground.net/p/Le4BdTm_gOv
You can try with $map to go one by one. $mergeObjects helps to merge the output value with same object
[
{
$project: {
title: 1,
assignments: {
$map: {
input: "$assignments",
in: {
$mergeObjects: [
"$$this",
{
members: {
$map: {
input: "$$this.members",
in: "$$this._id"
}
}
}
]
}
}
},
active_settings: {
$reduce: {
input: "$settings",
initialValue: [],
in: {
$cond: [
{
$eq: [
"$$this.status",
"enabled"
]
},
{
$setUnion: [
"$$value",
[
"$$this.type"
]
]
},
"$$value"
]
}
}
}
}
}
]
Working Mongo playground
You can try,
get assignments member ids using $map and $reduce
get active_settings using $reduce
db.collection.aggregate([
{
$project: {
_id: 1,
title: 1,
assignments: {
$map: {
input: "$assignments",
in: {
_id: "$$this._id",
name: "$$this.name",
memberIds: {
$reduce: {
input: "$$this.members",
initialValue: [],
in: { $concatArrays: ["$$value", ["$$this._id"]] }
}
}
}
}
},
active_settings: {
$reduce: {
input: "$settings",
initialValue: [],
in: {
$cond: [
{ $eq: ["$$this.status", "enabled"] },
{ $concatArrays: ["$$value", ["$$this.type"]] },
"$$value"
]
}
}
}
}
}
])
Playground

Groovy: Convert Array to JSon

Am new to Groovy and am having trouble converting an array to JSON. The JSON computed should have all the values from my array list, but it is storing only the last one. Here is the code:
def arraylist = [["0",2],["1",8],["2",6],["3",8],["4",3]]
def arraysize = arraylist.size()
def builder = new groovy.json.JsonBuilder()
builder ({
cols([
{
"id" "hours"
"label" "Hours"
"type" "string"
},
{
"id" "visitor"
"label" "Visitors"
"type" "number"
}
])
rows([
{
for( i in 0..< arraysize )
{
c([
{
"v" arraylist[i][0]
},
{
"v" arraylist[i][1]
}
])
}//for
}
])
})
println builder.toPrettyString()
Can try running the code here:
http://groovyconsole.appspot.com/
Expected output is here:
{
"cols": [
{
"id": "hours",
"label": "Hours",
"type": "string"
},
{
"id": "visitor",
"label": "Visitors",
"type": "number"
}
],
"rows": [
{
"c": [
{
"v": "0"
},
{
"v": 2
}
]
},
{
"c": [
{
"v": "1"
},
{
"v": 8
}
]
},
{
"c": [
{
"v": "2"
},
{
"v": 6
}
]
},
{
"c": [
{
"v": "3"
},
{
"v": 8
}
]
},
{
"c": [
{
"v": "4"
},
{
"v": 3
}
]
}
]
}
Something like this seems to give the result you wanted:
def arraylist = [["0",2],["1",8],["2",6],["3",8],["4",3]]
def builder = new groovy.json.JsonBuilder()
builder {
cols( [
[ id: "hours", label: "Hours", type: "string" ],
[ id: "visitor", label: "Visitors", type: "number" ] ] )
rows( arraylist.collect { pair -> [ c: pair.collect { item -> [ v: item ] } ] } )
}
println builder.toPrettyString()