Parse a nested json structure - json

I have a nested json file like below
{
"Message No": 1.0,
"abc": {
"action": {
"ab1": false,
"ab2": false
},
"val": "Global"
},
"tyu": {
"lmp": [{
"Currency": "USD",
"Amount": "32401.32"
}]
},
"Payments": {
"Array": ["Hi", "There"],
"Details": [{
"Date": "2019-04-11"
}]
}
}
I have found a piece of code from google which will convert it into a simple key value pair
The code is like below
def Simply(m: Map[String, Any], tree: List[String] = List()) : Iterable[(String, Any)] = m.flatten
{
case (k: String, v: Map[String, Any] #unchecked) => Simply(v, tree :+ k)
case (k: String, v: List[Map[String, Any]] #unchecked) => v.flatten(Simply(_, tree :+ k))
case (k: String, v: Any) => List((tree :+ k.toString).mkString("_") ->v)
case (k,null) => List((tree :+ k.toString).mkString("_") ->"null")
}
The code works fine but it can not process the Array element in the json
"Array": ["Hi", "There"],
I tried to put an extra condition like
case (k: String, v: List[String]) => List((tree :+ k.toString).mkString("_") ->v.mkString(","))
but then this condition is blocking the below case in the simply function
case (k: String, v: List[Map[String, Any]] #unchecked) => v.flatten(Simply(_, tree :+ k))
Please help me understand If I am putting the condition in wrong place or any code change i need to do
Expected output
(tyu_lmp_Amount,32401.32)
(abc_action_ab1,false)
(Message No,1.0)
(abc_action_ab2,false)
(tyu_lmp_Currency,USD)
(Payments_Details_Date,2019-04-11)
(Payments_Array,{Hi, There})
(abc_val,Global)

I hope this is what you want:
package advanced
import org.json4s._
import org.json4s.jackson.JsonMethods._
object JsonTest extends App {
val s =
"""
|{
| "Message No": 1.0,
| "abc": {
| "action": {
| "ab1": false,
| "ab2": false
| },
| "val": "Global"
| },
|
| "tyu": {
| "lmp": [{
| "Currency": "USD",
| "Amount": "32401.32"
| }]
| },
|
| "Payments": {
| "Array": ["Hi", "There"],
| "Details": [{
| "Date": "2019-04-11"
| }]
| }
|}
|""".stripMargin
def jsonStrToMap(jsonStr: String): Map[String, Any] = {
implicit val formats = org.json4s.DefaultFormats
parse(jsonStr).extract[Map[String, Any]]
}
def Simply(m: Map[String, Any], tree: List[String] = List()) : Iterable[(String, Any)] = m.flatMap {
case (k: String, v: Map[String, Any]) => Simply(v, tree :+ k)
case (k: String, v: List[Any]) if v.headOption.exists(_.isInstanceOf[Map[String, Any]]) => v.flatMap{ subNode =>
Simply(subNode.asInstanceOf[Map[String, Any]], tree :+ k)
}
case (k: String, v: List[String]) => List((tree :+ k.toString).mkString("_") -> v.mkString(","))
case (k: String, v: Any) => List((tree :+ k.toString).mkString("_") ->v)
case (k,null) => List((tree :+ k.toString).mkString("_") ->"null")
}
val map = jsonStrToMap(s)
println(Simply(jsonStrToMap(s)))
//
// Map(tyu_lmp_Amount -> 32401.32, abc_action_ab1 -> false, Message No -> 1.0, abc_action_ab2 -> false, tyu_lmp_Currency -> USD, Payments_Details_Date -> 2019-04-11, Payments_Array -> Hi,There, abc_val -> Global)
//
}
pattern match on [Any] will try to cast it to any type in pattern match cases so that v: List[String] is swallowing any subnode that is a list of objects.

Related

How to find the difference/mismatch between two JSON file?

I have two json files, one is expected json and the another one is the result of GET API call. I need to compare and find out the mismatch in the file.
Expected Json:
{
"array": [
1,
2,
3
],
"boolean": true,
"null": null,
"number": 123,
"object": {
"a": "b",
"c": "d",
"e": "f"
},
"string": "Hello World"
}
Actual Json response:
{
"array": [
1,
2,
3
],
"boolean": true,
"null": null,
"number": 456,
"object": {
"a": "b",
"c": "d",
"e": "f"
},
"string": "India"
}
Actually there are two mismatch: number received is 456 and string is India.
Is there a way to compare and get these two mismatch as results.
This need to be implemented in gatling/scala.
You can use, for example, play-json library and recursively traverse both JSONs. For next input (a bit more sophisticated than yours input):
LEFT:
{
"array" : [ 1, 2, 4 ],
"boolean" : true,
"null" : null,
"number" : 123,
"object" : {
"a" : "b",
"c" : "d",
"e" : "f"
},
"string" : "Hello World",
"absent-in-right" : true,
"different-types" : 123
}
RIGHT:
{
"array" : [ 1, 2, 3 ],
"boolean" : true,
"null" : null,
"number" : 456,
"object" : {
"a" : "b",
"c" : "d",
"e" : "ff"
},
"string" : "India",
"absent-in-left" : true,
"different-types" : "YES"
}
It produces this output:
Next fields are absent in LEFT:
*\absent-in-left
Next fields are absent in RIGHT:
*\absent-in-right
'*\array\(2)' => 4 != 3
'*\number' => 123 != 456
'*\object\e' => f != ff
'*\string' => Hello World != India
Cannot compare JsNumber and JsString in '*\different-types'
Code:
val left = Json.parse("""{"array":[1,2,4],"boolean":true,"null":null,"number":123,"object":{"a":"b","c":"d","e":"f"},"string":"Hello World","absent-in-right":true,"different-types":123}""").asInstanceOf[JsObject]
val right = Json.parse("""{"array":[1,2,3],"boolean":true,"null":null,"number":456,"object":{"a":"b","c":"d","e":"ff"},"string":"India","absent-in-left":true,"different-types":"YES"}""").asInstanceOf[JsObject]
// '*' - for the root node
showJsDiff(left, right, "*", Seq.empty[String])
def showJsDiff(left: JsValue, right: JsValue, parent: String, path: Seq[String]): Unit = {
val newPath = path :+ parent
if (left.getClass != right.getClass) {
println(s"Cannot compare ${left.getClass.getSimpleName} and ${right.getClass.getSimpleName} " +
s"in '${getPath(newPath)}'")
}
else {
left match {
// Primitive types are pretty easy to handle
case JsNull => logIfNotEqual(JsNull, right.asInstanceOf[JsNull.type], newPath)
case JsBoolean(value) => logIfNotEqual(value, right.asInstanceOf[JsBoolean].value, newPath)
case JsNumber(value) => logIfNotEqual(value, right.asInstanceOf[JsNumber].value, newPath)
case JsString(value) => logIfNotEqual(value, right.asInstanceOf[JsString].value, newPath)
case JsArray(value) =>
// For array we have to call showJsDiff on each element of array
val arr1 = value
val arr2 = right.asInstanceOf[JsArray].value
if (arr1.length != arr2.length) {
println(s"Arrays in '${getPath(newPath)}' have different length. ${arr1.length} != ${arr2.length}")
}
else {
arr1.indices.foreach { idx =>
showJsDiff(arr1(idx), arr2(idx), s"($idx)", newPath)
}
}
case JsObject(value) =>
val leftFields = value.keys.toSeq
val rightJsObject = right.asInstanceOf[JsObject]
val rightFields = rightJsObject.fields.map { case (name, value) => name }
val absentInLeft = rightFields.diff(leftFields)
if (absentInLeft.nonEmpty) {
println("Next fields are absent in LEFT: ")
absentInLeft.foreach { fieldName =>
println(s"\t ${getPath(newPath :+ fieldName)}")
}
}
val absentInRight = leftFields.diff(rightFields)
if (absentInRight.nonEmpty) {
println("Next fields are absent in RIGHT: ")
absentInRight.foreach { fieldName =>
println(s"\t ${getPath(newPath :+ fieldName)}")
}
}
// For common fields we have to call showJsDiff on them
val commonFields = leftFields.intersect(rightFields)
commonFields.foreach { field =>
showJsDiff(value(field), rightJsObject(field), field, newPath)
}
}
}
}
def logIfNotEqual[T](left: T, right: T, path: Seq[String]): Unit = {
if (left != right) {
println(s"'${getPath(path)}' => $left != $right")
}
}
def getPath(path: Seq[String]): String = path.mkString("\\")
Use diffson - a Scala implementation of RFC-6901 and RFC-6902: https://github.com/gnieh/diffson
json4s has a handy diff function described here: https://github.com/json4s/json4s (search for Merging & Diffing) and API doc here: https://static.javadoc.io/org.json4s/json4s-core_2.9.1/3.0.0/org/json4s/Diff.html
This is a slightly modified version of Artavazd's answer (which is amazing btw thank you so much!). This version outputs the differences into a convenient object instead of only logging them.
import play.api.Logger
import play.api.libs.json.{JsArray, JsBoolean, JsError, JsNull, JsNumber, JsObject, JsString, JsSuccess, JsValue, Json, OFormat, Reads}
case class JsDifferences(
differences: List[JsDifference] = List()
)
object JsDifferences {
implicit val format: OFormat[JsDifferences] = Json.format[JsDifferences]
}
case class JsDifference(
key: String,
path: Seq[String],
oldValue: Option[String],
newValue: Option[String]
)
object JsDifference {
implicit val format: OFormat[JsDifference] = Json.format[JsDifference]
}
object JsonUtils {
val logger: Logger = Logger(this.getClass)
def findDiff(left: JsValue, right: JsValue, parent: String = "*", path: List[String] = List()): JsDifferences = {
val newPath = path :+ parent
if (left.getClass != right.getClass) {
logger.debug(s"Cannot compare ${left.getClass.getSimpleName} and ${right.getClass.getSimpleName} in '${getPath(newPath)}'")
JsDifferences()
} else left match {
case JsNull => logIfNotEqual(JsNull, right.asInstanceOf[JsNull.type], newPath)
case JsBoolean(value) => logIfNotEqual(value, right.asInstanceOf[JsBoolean].value, newPath)
case JsNumber(value) => logIfNotEqual(value, right.asInstanceOf[JsNumber].value, newPath)
case JsString(value) => logIfNotEqual(value, right.asInstanceOf[JsString].value, newPath)
case JsArray(value) =>
val arr1 = value
val arr2 = right.asInstanceOf[JsArray].value
if (arr1.length != arr2.length) {
logger.debug(s"Arrays in '${getPath(newPath)}' have different length. ${arr1.length} != ${arr2.length}")
JsDifferences()
} else JsDifferences(arr1.indices.flatMap(idx => findDiff(arr1(idx), arr2(idx), s"($idx)", newPath).differences).toList)
case leftJsObject: JsObject => {
val leftFields = leftJsObject.keys.toSeq
val rightJsObject = right.asInstanceOf[JsObject]
val rightFields = rightJsObject.fields.map { case (name, value) => name }
val keysAbsentInLeft = rightFields.diff(leftFields)
val leftDifferences = keysAbsentInLeft.map(fieldName => JsDifference(
key = fieldName, path = newPath :+ fieldName, oldValue = None, newValue = Some(rightJsObject(fieldName).toString)
))
val keysAbsentInRight = leftFields.diff(rightFields)
val rightDifferences = keysAbsentInRight.map(fieldName => JsDifference(
key = fieldName, path = newPath :+ fieldName, oldValue = Some(leftJsObject(fieldName).toString), newValue = None
))
val commonKeys = leftFields.intersect(rightFields)
val commonDifferences = commonKeys.flatMap(field => findDiff(leftJsObject(field), rightJsObject(field), field, newPath).differences).toList
JsDifferences((leftDifferences ++ rightDifferences ++ commonDifferences).toList)
}
}
}
def logIfNotEqual[T](left: T, right: T, path: Seq[String]): JsDifferences = {
if (left != right) {
JsDifferences(List(JsDifference(
key = path.last, path = path, oldValue = Some(left.toString), newValue = Some(right.toString)
)))
} else JsDifferences()
}
def getPath(path: Seq[String]): String = path.mkString("\\")
}

Parse Json array with Circe

I'm beginner in Circe and I would like retrieve information from this JSon
[
{
"sha":"7fd1a60b01f91b314f59955a4e4d4e80d8edf11d",
"commit":{
"author":{
"name":"The Octocat",
"email":"octocat#nowhere.com",
"date":"2012-03-06T23:06:50Z"
},
"committer":{
"name":"The Octocat",
"email":"octocat#nowhere.com",
"date":"2012-03-06T23:06:50Z"
},
"message":"Merge pull request #6 from Spaceghost/patch-1\n\nNew line at end of file.",
},
"url":"https://api.github.com/repos/octocat/Hello-World/commits/7fd1a60b01f91b314f59955a4e4d4e80d8edf11d",
},
{
"sha":"762941318ee16e59dabbacb1b4049eec22f0d303",
"commit":{
"author":{
"name":"Johnneylee Jack Rollins",
"email":"johnneylee.rollins#gmail.com",
"date":"2011-09-14T04:42:41Z"
},
"committer":{
"name":"Johnneylee Jack Rollins",
"email":"johnneylee.rollins#gmail.com",
"date":"2011-09-14T04:42:41Z"
},
"message":"New line at end of file. --Signed off by Spaceghost",
},
"url":"https://api.github.com/repos/octocat/Hello-World/commits/762941318ee16e59dabbacb1b4049eec22f0d303",
},
]
I don't understand how this code doesn't catch information about 'author'
val doc= parse(response.json.toString()).getOrElse(Json.Null)
doc.hcursor.downArray.downField("commit").right.as[Seq[String]] match {
case Left(failure) => println("Fail")
case Right(json) => println("Ok")
}
Do you have an idea ?
Thank in advance,
Your json contains trailing commas in some places. This is against specification.
val json =
"""[
{
"sha":"7fd1a60b01f91b314f59955a4e4d4e80d8edf11d",
"commit":{
"author":{
"name":"The Octocat",
"email":"octocat#nowhere.com",
"date":"2012-03-06T23:06:50Z"
},
"committer":{
"name":"The Octocat",
"email":"octocat#nowhere.com",
"date":"2012-03-06T23:06:50Z"
},
"message":"Merge pull request #6 from Spaceghost/patch-1\n\nNew line at end of file."
},
"url":"https://api.github.com/repos/octocat/Hello-World/commits/7fd1a60b01f91b314f59955a4e4d4e80d8edf11d"
},
{
"sha":"762941318ee16e59dabbacb1b4049eec22f0d303",
"commit":{
"author":{
"name":"Johnneylee Jack Rollins",
"email":"johnneylee.rollins#gmail.com",
"date":"2011-09-14T04:42:41Z"
},
"committer":{
"name":"Johnneylee Jack Rollins",
"email":"johnneylee.rollins#gmail.com",
"date":"2011-09-14T04:42:41Z"
},
"message":"New line at end of file. --Signed off by Spaceghost"
},
"url":"https://api.github.com/repos/octocat/Hello-World/commits/762941318ee16e59dabbacb1b4049eec22f0d303"
}
]"""
case class Author(name: String, email: String, date: String)
case class Committer(name: String, email: String, date: String)
case class Commit(author: Author, committer: Committer, message: String)
case class Record(sha: String, commit: Commit, url: String)
decode[Seq[Record]](json) match {
case Right(records) => records.foreach(record => println(record.commit.author))
case Left(error) => println(error)
}
//Author(The Octocat,octocat#nowhere.com,2012-03-06T23:06:50Z)
//Author(Johnneylee Jack Rollins,johnneylee.rollins#gmail.com,2011-09-14T04:42:41Z)
And code like yours works as well:
val doc= parse(json).getOrElse(Json.Null)
doc.hcursor.downArray.downField("commit").downField("author").downField("name").as[String] match {
case Left(failure) => println(failure)
case Right(name) => println(name)
}
// The Octocat

Update all values from any Json

I've got some json and I have to encrypt all the values. Below is a json, all the values of which should be updated:
json = Json.parse("""{
"key1" : 1.5,
"key2" : [
{"key211": 1, "key212": "value212"},
{"key221": 2, "key222": "value222"}
]
"key3" : {
"key31" : true,
"key32" : "value32"
},
"key4" : 17
}"""
After encrypting and updating all the values, it should look like this:
val json = Json.parse("""{
"key1" : "uhKhbofQtL",
"key2" : [
{"key211": "FxnbGGZFMW", "key212": "VsdfdGfg"},
{"key221": "sdffFdd", "key222": "Fsdfsfds"}
]
"key3" : {
"key31" : "Fsdfasdf",
"key32" : "Vsdfsdfsdfs"
},
"key4" : "sfsdfFSdfs"
}"""
How can I do it?
Parse it as a Map, then traverse the map and encrypt:
def encrypt(data: Any, enc: Any => String): Any = data match {
case v: Map[String, Any] => v.map { case (k,v) => k -> encrypt(v, enc) }
case v: List[Any] => v.map(encrypt(_, enc))
case v => enc(v)
}
I've solved this problem and solution is below
implicit val readsMap: Reads[Map[String, Any]] = Reads[Map[String, Any]](m => Reads.mapReads[Any](metaValueReader).reads(m))
implicit val writesMap: Writes[Map[String, Any]] = Writes[Map[String, Any]](m => Writes.mapWrites[Any](metaValueWriter).writes(m))
def metaValueReader(jsValue: JsValue): JsResult[Any] = jsValue match {
case JsObject(m) => JsSuccess(m.map { case (k, v) => k -> metaValueReader(v) })
case JsArray(arr) => JsSuccess(arr.map(metaValueReader))
case JsBoolean(b) => JsSuccess(b).map(encryptValue)
case JsNumber(n) => JsSuccess(n).map(encryptValue)
case JsString(s) => JsSuccess(s).map(encryptValue)
case JsNull => JsSuccess("").map(encryptValue)
case badValue => JsError(s"$badValue is not a valid value")
}
def metaValueWriter(value: Any): JsValue = value match {
case jsRes: JsSuccess[Any] => metaValueWriter(jsRes.get)
case m: Map[String, Any] => JsObject(m.map { case (k, v) => k -> metaValueWriter(v) })
case arr: Seq[Any] => JsArray(arr.map(metaValueWriter))
case s: String => JsString(s)
}
How can I improve this code?

SPARK : How to create aggregate from RDD[Row] in Scala

How do I create a List/Map inside a RDD/DF so that I can get the aggregate ?
I have a file where each row is a JSON object :
{
itemId :1122334,
language: [
{
name: [
"US", "FR"
],
value: [
"english", "french"
]
},
{
name: [
"IND"
],
value: [
"hindi"
]
}
],
country: [
{
US: [
{
startTime: 2016-06-06T17: 39: 35.000Z,
endTime: 2016-07-28T07: 00: 00.000Z
}
],
CANADA: [
{
startTime: 2016-06-06T17: 39: 35.000Z,
endTime: 2016-07-28T07: 00: 00.000Z
}
],
DENMARK: [
{
startTime: 2016-06-06T17: 39: 35.000Z,
endTime: 2016-07-28T07: 00: 00.000Z
}
],
FRANCE: [
{
startTime: 2016-08-06T17: 39: 35.000Z,
endTime: 2016-07-28T07: 00: 00.000Z
}
]
}
]
},
{
itemId :1122334,
language: [
{
name: [
"US", "FR"
],
value: [
"english", "french"
]
},
{
name: [
"IND"
],
value: [
"hindi"
]
}
],
country: [
{
US: [
{
startTime: 2016-06-06T17: 39: 35.000Z,
endTime: 2016-07-28T07: 00: 00.000Z
}
],
CANADA: [
{
startTime: 2016-07-06T17: 39: 35.000Z,
endTime: 2016-07-28T07: 00: 00.000Z
}
],
DENMARK: [
{
startTime: 2016-06-06T17: 39: 35.000Z,
endTime: 2016-07-28T07: 00: 00.000Z
}
],
FRANCE: [
{
startTime: 2016-08-06T17: 39: 35.000Z,
endTime: 2016-07-28T07: 00: 00.000Z
}
]
}
]
}
I have matching POJO which gets me the values from the JSON.
import com.mapping.data.model.MappingUtils
import com.mapping.data.model.CountryInfo
val mappingPath = "s3://.../"
val timeStamp = "2016-06-06T17: 39: 35.000Z"
val endTimeStamp = "2016-06-07T17: 39: 35.000Z"
val COUNTRY_US = "US"
val COUNTRY_CANADA = "CANADA"
val COUNTRY_DENMARK = "DENMARK"
val COUNTRY_FRANCE = "FRANCE"
val input = sc.textFile(mappingPath)
The input is list of jsons where each line is json which I am mapping to the POJO class CountryInfo using MappingUtils which takes care of JSON parsing and conversion:
val MappingsList = input.map(x=> {
val countryInfo = MappingUtils.getCountryInfoString(x);
(countryInfo.getItemId(), countryInfo)
}).collectAsMap
MappingsList: scala.collection.Map[String,com.mapping.data.model.CountryInfo]
def showCountryInfo(x: Option[CountryInfo]) = x match {
case Some(s) => s
}
But I need to create a DF/RDD so that I can get the aggregates of country and language for based on itemId.
In the given example, if the country's start time is not lesser than "2016-06-07T17: 39: 35.000Z" then the value will be zero.
Which format will be good to create the final aggregate json :
1. List ?
|-----itemId-------|----country-------------------|-----language---------------------|
| 1122334 | [US, CANADA,DENMARK] | [english,hindi,french] |
| 1122334 | [US,DENMARK] | [english] |
|------------------|------------------------------|----------------------------------|
2. Map ?
|-----itemId-------|----country---------------------------------|-----language---------------------|
| 1122334 | (US,2) (CANADA,1) (DENMARK,2) (FRANCE, 0) |(english,2) (hindi,1) (french,1) |
|.... |
|.... |
|.... |
|------------------|--------------------------------------------|----------------------------------|
I would like to create a final json which has the aggregate value like :
{
itemId: "1122334",
country: {
"US" : 2,
"CANADA" : 1,
"DENMARK" : 2,
"FRANCE" : 0
},
language: {
"english" : 2,
"french" : 1,
"hindi" : 1
}
}
I tried List :
val events = sqlContext.sql( "select itemId EventList")
val itemList = events.map(row => {
val itemId = row.getAs[String](1);
val countryInfo = showTitleInfo(MappingsList.get(itemId));
val country = new ListBuffer[String]()
country += if (countryInfo.getCountry().getUS().get(0).getStartTime() < endTimeStamp) COUNTRY_US;
country += if (countryInfo.getCountry().getCANADA().get(0).getStartTime() < endTimeStamp) COUNTRY_CANADA;
country += if (countryInfo.getCountry().getDENMARK().get(0).getStartTime() < endTimeStamp) COUNTRY_DENMARK;
country += if (countryInfo.getCountry().getFRANCE().get(0).getStartTime() < endTimeStamp) COUNTRY_FRANCE;
val languageList = new ListBuffer[String]()
val language = countryInfo.getLanguages().collect.foreach(x => languageList += x.getValue());
Row(itemId, country.toList, languageList.toList)
})
and Map :
val itemList = events.map(row => {
val itemId = row.getAs[String](1);
val countryInfo = showTitleInfo(MappingsList.get(itemId));
val country: Map[String, Int] = Map()
country += if (countryInfo.getCountry().getUS().get(0).getStartTime() < endTimeStamp) ('COUNTRY_US' -> 1) else ('COUNTRY_US' -> 0)
country += if (countryInfo.getCountry().getUS().get(0).getStartTime() < endTimeStamp) ('COUNTRY_CANADA' -> 1) else ('COUNTRY_CANADA' -> 0)
country += if (countryInfo.getCountry().getUS().get(0).getStartTime() < endTimeStamp) ('COUNTRY_DENMARK' -> 1) else ('COUNTRY_DENMARK' -> 0)
country += if (countryInfo.getCountry().getUS().get(0).getStartTime() < endTimeStamp) ('COUNTRY_FRANCE' -> 1) else ('COUNTRY_FRANCE' -> 0)
val language: Map[String, Int] = Map()
countryInfo.getLanguages().collect.foreach(x => language += (x.getValue -> 1)) ;
Row(itemId, country, language)
})
But both are getting frozen in Zeppelin. Is there any better way to get aggregates as json ? Which is better List/Map construct the final aggreagate ?
It would be helpful if you restated your question in terms of Spark DataFrame/Dataset and Row; I understand that you ultimately want to use JSON but the details of the JSON input/output are a separate concern.
The function you are looking for is a Spark SQL aggregate function (see the group of them on that page). The functions collect_list and collect_set are related, but the function you need is not already implemented.
You can implement what I'll call count_by_value by deriving from org.spark.spark.sql.expressions.UserDefinedAggregateFunction. This will require some in-depth knowledge of how Spark SQL works.
Once count_by_value is implemented, you can use it like this:
df.groupBy("itemId").agg(count_by_value(df("country")), count_by_value(df("language")))

Play [Scala]: How to flatten a JSON object

Given the following JSON...
{
"metadata": {
"id": "1234",
"type": "file",
"length": 395
}
}
... how do I convert it to
{
"metadata.id": "1234",
"metadata.type": "file",
"metadata.length": 395
}
Tx.
You can do this pretty concisely with Play's JSON transformers. The following is off the top of my head, and I'm sure it could be greatly improved on:
import play.api.libs.json._
val flattenMeta = (__ \ 'metadata).read[JsObject].flatMap(
_.fields.foldLeft((__ \ 'metadata).json.prune) {
case (acc, (k, v)) => acc andThen __.json.update(
Reads.of[JsObject].map(_ + (s"metadata.$k" -> v))
)
}
)
And then:
val json = Json.parse("""
{
"metadata": {
"id": "1234",
"type": "file",
"length": 395
}
}
""")
And:
scala> json.transform(flattenMeta).foreach(Json.prettyPrint _ andThen println)
{
"metadata.id" : "1234",
"metadata.type" : "file",
"metadata.length" : 395
}
Just change the path if you want to handle metadata fields somewhere else in the tree.
Note that using a transformer may be overkill hereā€”see e.g. Pascal Voitot's input in this thread, where he proposes the following:
(json \ "metadata").as[JsObject].fields.foldLeft(Json.obj()) {
case (acc, (k, v)) => acc + (s"metadata.$k" -> v)
}
It's not as composable, and you'd probably not want to use as in real code, but it may be all you need.
This is definitely not trivial, but possible by trying to flatten it recursively. I haven't tested this thoroughly, but it works with your example and some other basic one's I've come up with using arrays:
object JsFlattener {
def apply(js: JsValue): JsValue = flatten(js).foldLeft(JsObject(Nil))(_++_.as[JsObject])
def flatten(js: JsValue, prefix: String = ""): Seq[JsValue] = {
js.as[JsObject].fieldSet.toSeq.flatMap{ case (key, values) =>
values match {
case JsBoolean(x) => Seq(Json.obj(concat(prefix, key) -> x))
case JsNumber(x) => Seq(Json.obj(concat(prefix, key) -> x))
case JsString(x) => Seq(Json.obj(concat(prefix, key) -> x))
case JsArray(seq) => seq.zipWithIndex.flatMap{ case (x, i) => flatten(x, concat(prefix, key + s"[$i]")) }
case x: JsObject => flatten(x, concat(prefix, key))
case _ => Seq(Json.obj(concat(prefix, key) -> JsNull))
}
}
}
def concat(prefix: String, key: String): String = if(prefix.nonEmpty) s"$prefix.$key" else key
}
JsObject has the fieldSet method that returns a Set[(String, JsValue)], which I mapped, matched against the JsValue subclass, and continued consuming recursively from there.
You can use this example by passing a JsValue to apply:
val json = Json.parse("""
{
"metadata": {
"id": "1234",
"type": "file",
"length": 395
}
}
"""
JsFlattener(json)
We'll leave it as an exercise to the reader to make the code more beautiful looking.
Here's my take on this problem, based on #Travis Brown's 2nd solution.
It recursively traverses the json and prefixes each key with its parent's key.
def flatten(js: JsValue, prefix: String = ""): JsObject = js.as[JsObject].fields.foldLeft(Json.obj()) {
case (acc, (k, v: JsObject)) => {
if(prefix.isEmpty) acc.deepMerge(flatten(v, k))
else acc.deepMerge(flatten(v, s"$prefix.$k"))
}
case (acc, (k, v)) => {
if(prefix.isEmpty) acc + (k -> v)
else acc + (s"$prefix.$k" -> v)
}
}
which turns this:
{
"metadata": {
"id": "1234",
"type": "file",
"length": 395
},
"foo": "bar",
"person": {
"first": "peter",
"last": "smith",
"address": {
"city": "Ottawa",
"country": "Canada"
}
}
}
into this:
{
"metadata.id": "1234",
"metadata.type": "file",
"metadata.length": 395,
"foo": "bar",
"person.first": "peter",
"person.last": "smith",
"person.address.city": "Ottawa",
"person.address.country": "Canada"
}
#Trev has the best solution here, completely generic and recursive, but it's missing a case for array support. I'd like something that works in this scenario:
turn this:
{
"metadata": {
"id": "1234",
"type": "file",
"length": 395
},
"foo": "bar",
"person": {
"first": "peter",
"last": "smith",
"address": {
"city": "Ottawa",
"country": "Canada"
},
"kids": ["Bob", "Sam"]
}
}
into this:
{
"metadata.id": "1234",
"metadata.type": "file",
"metadata.length": 395,
"foo": "bar",
"person.first": "peter",
"person.last": "smith",
"person.address.city": "Ottawa",
"person.address.country": "Canada",
"person.kids[0]": "Bob",
"person.kids[1]": "Sam"
}
I've arrived at this, which appears to work, but seems overly verbose. Any help in making this pretty would be appreciated.
def flatten(js: JsValue, prefix: String = ""): JsObject = js.as[JsObject].fields.foldLeft(Json.obj()) {
case (acc, (k, v: JsObject)) => {
val nk = if(prefix.isEmpty) k else s"$prefix.$k"
acc.deepMerge(flatten(v, nk))
}
case (acc, (k, v: JsArray)) => {
val nk = if(prefix.isEmpty) k else s"$prefix.$k"
val arr = flattenArray(v, nk).foldLeft(Json.obj())(_++_)
acc.deepMerge(arr)
}
case (acc, (k, v)) => {
val nk = if(prefix.isEmpty) k else s"$prefix.$k"
acc + (nk -> v)
}
}
def flattenArray(a: JsArray, k: String = ""): Seq[JsObject] = {
flattenSeq(a.value.zipWithIndex.map {
case (o: JsObject, i: Int) =>
flatten(o, s"$k[$i]")
case (o: JsArray, i: Int) =>
flattenArray(o, s"$k[$i]")
case a =>
Json.obj(s"$k[${a._2}]" -> a._1)
})
}
def flattenSeq(s: Seq[Any], b: Seq[JsObject] = Seq()): Seq[JsObject] = {
s.foldLeft[Seq[JsObject]](b){
case (acc, v: JsObject) =>
acc:+v
case (acc, v: Seq[Any]) =>
flattenSeq(v, acc)
}
}
Thanks m-z, it is very helpful. (I'm not so familiar with Scala.)
I'd like to add a line for "flatten" working with primitive JSON array like "{metadata: ["aaa", "bob"]}".
def flatten(js: JsValue, prefix: String = ""): Seq[JsValue] = {
// JSON primitive array can't convert to JsObject
if(!js.isInstanceOf[JsObject]) return Seq(Json.obj(prefix -> js))
js.as[JsObject].fieldSet.toSeq.flatMap{ case (key, values) =>
values match {
case JsBoolean(x) => Seq(Json.obj(concat(prefix, key) -> x))
case JsNumber(x) => Seq(Json.obj(concat(prefix, key) -> x))
case JsString(x) => Seq(Json.obj(concat(prefix, key) -> x))
case JsArray(seq) => seq.zipWithIndex.flatMap{ case (x, i) => flatten(x, concat(prefix, key + s"[$i]")) }
case x: JsObject => flatten(x, concat(prefix, key))
case _ => Seq(Json.obj(concat(prefix, key) -> JsNull))
}
}
}
Based on previous solutions, have tried to simplify the code a bit
def getNewKey(oldKey: String, newKey: String): String = {
if (oldKey.nonEmpty) oldKey + "." + newKey else newKey
}
def flatten(js: JsValue, prefix: String = ""): JsObject = {
if (!js.isInstanceOf[JsObject]) return Json.obj(prefix -> js)
js.as[JsObject].fields.foldLeft(Json.obj()) {
case (o, (k, value)) => {
o.deepMerge(value match {
case x: JsArray => x.as[Seq[JsValue]].zipWithIndex.foldLeft(o) {
case (o, (n, i: Int)) => o.deepMerge(
flatten(n.as[JsValue], getNewKey(prefix, k) + s"[$i]")
)
}
case x: JsObject => flatten(x, getNewKey(prefix, k))
case x => Json.obj(getNewKey(prefix, k) -> x.as[JsValue])
})
}
}
}