Scala: Parsing JSON using the org.fastxml.jackson library - json

I have written the following program to parse a JSON structure in a streaming fashion.
However this looks very imperative. This is my latest attempt to wrote more idiomatic Scala code but I am not there yet.
I am parsing the following JSON, using the Scala code that follows the JSON snippet. My goal is to shorten the code through the use of more idiomatic scala structures.
Thanks in advance.
{
"type": "ImportantIncidentInfo",
"incidentTimestamp": "2014-05-15T10:09:27.989-05:00",
"numOfMatches": 4,
"myReport": {
"docReports": {
"part1/.": {
"path": [
"unknown"
],
"myAnalysis": {
"matches": [
{
"id": {
"major": 1,
"minor": 0
},
"name": "US SSN",
"position": 13,
"string": " 636-12-4567 "
},
{
"id": {
"major": 3,
"minor": 0
},
"name": "MasterCard Credit Card Number",
"position": 35,
"string": " 5424-1813-6924-3685 "
}
]
},
"cleanedUpData": [
{
"startPosition": 0,
"endPosition": 65,
"frameContent": ""
}
],
"minedMetadata": {
"Content-Encoding": "ISO-8859-1",
"Content-Type": "text/html; charset=iso-8859-1"
},
"deducedMetadata": {
"Content-Type": "text/html; iso-8859-1"
}
},
"part2/.": {
"path": [
"unknown"
],
"myAnalysis": {
"matches": [
{
"id": {
"major": 1,
"minor": 0
},
"name": "SSN",
"position": 3,
"string": " 636-12-4567\r"
},
{
"id": {
"major": 3,
"minor": 0
},
"name": "MasterCard Credit Card Number",
"position": 18,
"string": "\n5424-1813-6924-3685\r"
}
]
},
"cleanedUpData": [
{
"startPosition": 0,
"endPosition": 44,
"frameContent": ""
}
],
"minedMetadata": {
"Content-Encoding": "windows-1252",
"Content-Type": "text/plain; charset=windows-1252"
},
"deducedMetadata": {
"Content-Type": "text/plain; iso-8859-1"
}
}
}
},
"whatSetItOffEntry": {
"action": "Log",
"component": {
"type": "aComponent",
"components": [
{
"type": "PatternComponent",
"patterns": [
1
],
"not": false
}
],
"not": false
},
"ticketInfo": {
"createIncident": true,
"tags": [],
"seeRestrictedIds": [
{
"type": "userGroup",
"name": "SiteMasters",
"description": "Group for SiteMasters",
"masters": [
"04fb02a2bc0fba"
],
"members": [],
"id": "04fade"
}
]
},
"letmeknowInfo": {
"createNotification": true,
"contactNames": [
"someguy#gmail.com"
]
}
},
"seeRestrictedIds": [
"04fade66c0"
],
"status": "New",
"timeStamps": [
"2015-05-15T10:09:27.989-05:00"
],
"count": 1
}
package mypackage
import java.io.BufferedReader
import java.io.FileReader
import java.io.IOException
import java.io.InputStream
import java.util._
import com.fasterxml.jackson.core._
import com.fasterxml.jackson.databind._
import java.util.Properties
import JacksonStreaming._
object JacksonStreaming {
def main(args: Array[String]) {
println("Entered Main")
try {
new JacksonStreaming().getNames
} catch {
case e: Exception => e.printStackTrace()
}
}
}
class JacksonStreaming {
var jsonMapper: ObjectMapper = new ObjectMapper()
var jsonFactory: JsonFactory = new JsonFactory()
var prop: Properties = new Properties()
var filePath: String = ""
val path = Array("myReport", "docReports", "part1/.", "myAnalysis", "matches", "name")
def getNames() {
println("Entered getNames")
var rootNode: JsonNode = null
try {
val fileReader = new BufferedReader(new FileReader("C:/jsonFormattedModified.json"))
println("fileReader is: " + fileReader)
rootNode = jsonMapper.readTree(fileReader)
println("Return value of jsonMapper.readTree is: " + rootNode)
findByPath(rootNode)
val jsonParser = jsonFactory.createParser(new FileReader("C:/jsonFormattedModified.json"))
println("JsonParser is: " + jsonParser)
var pathIndex = 0
val names = new ArrayList[String]()
var breakOnClose = false
while (jsonParser.nextToken() != null) {
val fieldName = jsonParser.getCurrentName
if (fieldName == null) {
//continue
}
if (breakOnClose && fieldName == path(path.length - 2)) {
println("Stopping search at end of node " + fieldName)
//break
}
if (jsonParser.getCurrentToken != JsonToken.FIELD_NAME) {
//continue
}
if (pathIndex >= path.length - 1) {
if (fieldName == path(path.length - 1)) {
try {
jsonParser.nextToken()
} catch {
case e: IOException => e.printStackTrace()
}
var name: String = null
name = jsonParser.getValueAsString
if (name == null) {
throw new RuntimeException("No value exists for field " + fieldName)
}
names.add(name)
println("Found " + fieldName + " value: " + name)
}
} else if (fieldName == path(pathIndex)) {
println("Found node " + path(pathIndex))
pathIndex += 1
if (pathIndex >= path.length - 1) {
println("Looking for names ...")
breakOnClose = true
try {
jsonParser.nextFieldName()
} catch {
case e: IOException => e.printStackTrace()
}
}
}
}
} catch {
case e: IOException => e.printStackTrace()
}
}
def findByPath(jn: JsonNode) {
println("Entered findByPath")
var matchesNamesNode = jn
for (i <- 0 until path.length - 1) {
matchesNamesNode = matchesNamesNode.path(path(i))
}
if (matchesNamesNode.isMissingNode) {
throw new RuntimeException("No node with names found.")
}
println("Tree names: " + matchesNamesNode.findValuesAsText("name"))
}
}

I think that Scala is Expression Oriented, Object oriented and Functional programming language, of course you can write it imperative but for working with JSON I Recommend you to go througt Object Oriented, you can find examples it it's github repository
https://github.com/FasterXML/jackson-module-scala/
For example I recommend you to write A Scala, classes for All the Json and then for the sub objects like MyReport or whatSetItOffEntry, in the github repo is an example for this type of solution in the repo:
package com.fasterxml.jackson.module.scala
import com.fasterxml.jackson.annotation.{JsonUnwrapped, JsonProperty, JsonIgnore}
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.matchers.ShouldMatchers
import org.scalatest.FlatSpec
import com.fasterxml.jackson.databind.ObjectMapper
case class Address(address1: Option[String], city: Option[String], state: Option[String])
class NonCreatorPerson
{
var name: String = _
#JsonUnwrapped var location: Address = _
var alias: Option[String] = _
}
case class Person(name: String, #JsonIgnore location: Address, alias: Option[String])
{
private def this() = this("", Address(None, None, None), None)
def address1 = location.address1
private def address1_=(value: Option[String]) {
setAddressField("address1", value)
}
def city = location.city
private def city_=(value: Option[String]) {
setAddressField("city", value)
}
def state = location.state
private def state_= (value: Option[String]) {
setAddressField("state", value)
}
private def setAddressField(name: String, value: Option[String])
{
val f = location.getClass.getDeclaredField(name)
f.setAccessible(true)
f.set(location, value)
}
}
#RunWith(classOf[JUnitRunner])
class UnwrappedTest extends BaseSpec {
"mapper" should "handle ignored fields correctly" in {
val mapper = new ObjectMapper()
mapper.registerModule(DefaultScalaModule)
val p = Person("Snoopy", Address(Some("123 Main St"), Some("Anytown"), Some("WA")), Some("Joe Cool"))
val json = mapper.writeValueAsString(p)
// There's some instability in the ordering of keys. Not sure what that's about, but rather than
// have buggy tests, I'm accepting it for now.
// json should (
// be === """{"name":"Snoopy","alias":"Joe Cool","city":"Anytown","address1":"123 Main St","state":"WA"}""" or
// be === """{"name":"Snoopy","alias":"Joe Cool","state":"WA","address1":"123 Main St","city":"Anytown"}"""
// )
val p2 = mapper.readValue(json, classOf[Person])
p2 shouldEqual p
}
it should "handle JsonUnwrapped for non-creators" in {
val mapper = new ObjectMapper()
mapper.registerModule(DefaultScalaModule)
val p = new NonCreatorPerson
p.name = "Snoopy"
p.location = Address(Some("123 Main St"), Some("Anytown"), Some("WA"))
p.alias = Some("Joe Cool")
val json = mapper.writeValueAsString(p)
val p2 = mapper.readValue(json, classOf[NonCreatorPerson])
p2.name shouldBe p.name
p2.location shouldBe p.location
p2.alias shouldBe p.alias
}
}

Related

Put JSON Data in Flutter Stacked Chart

I have try to put my JSON data in flutter Stacked Chart.
I already work on simple charts using JSON Data like bar, column, pie, Doughnut charts etc.
I have refer
stacked-column-chart(syncfusion_flutter_charts),
Grouped Bar Chart(charts_flutter)
Stack Overflow Que-Ans
below like my API response/JSON String
[{
"name": "ABC",
"subject": [{
"name": "Math",
"marks": "54"
},
{
"name": "Physics",
"marks": "65"
}
]
},
{
"name": "PQR",
"subject": [{
"name": "Chemistry",
"marks": "53"
},
{
"name": "Biology",
"marks": "22"
},
{
"name": "English",
"marks": "7 "
},
{
"name": "Math",
"marks": "12"
}
]
}, {
"name": "JKL",
"subject": [{
"name": "Chemistry",
"marks": "53"
},
{
"name": "Biology",
"marks": "22"
},
{
"name": "English",
"marks": "79 "
},
{
"name": "Math",
"marks": "12"
},
{
"name": "Physics",
"marks": "72"
}
]
}
]
Or I want below type of graph using JSON Data
Note: Suggest me my JSON string is wrong, you can create your own JSON data and display the output
Using charts_flutter. Please customize it for your usecase its a bare minimum implementation to validate that its working for your json.
import 'package:flutter/material.dart';
import 'dart:math';
import 'package:flutter/material.dart';
import 'package:charts_flutter/flutter.dart' as charts;
import 'dart:convert';
class StackedBarChart extends StatelessWidget {
final bool animate;
StackedBarChart({this.animate = false});
// EXCLUDE_FROM_GALLERY_DOCS_END
#override
Widget build(BuildContext context) {
String jsonString = '[{"name":"ABC","subject":[{"name":"Math","marks":"54"},{"name":"Physics","marks":"65"}]},{"name":"PQR","subject":[{"name":"Chemistry","marks":"53"},{"name":"Biology","marks":"22"},{"name":"English","marks":"7 "},{"name":"Math","marks":"12"}]},{"name":"JKL","subject":[{"name":"Chemistry","marks":"53"},{"name":"Biology","marks":"22"},{"name":"English","marks":"79 "},{"name":"Math","marks":"12"},{"name":"Physics","marks":"72"}]}]';
final studentMarks = studentMarksFromJson(jsonString);
var subjects = <Subject?>{};
var subjectsDist = <Subject?>{};
int c=0;
for (var stdnt in studentMarks) {
for (var subjs in stdnt.subject) {
if (!subjectsDist.where((element) => element?.name==subjs.name).isNotEmpty) {
subjs.sno=c++;
subjectsDist.add(subjs);
}
}
}
print(subjectsDist.length);
List<List<OrdinalMarks>> SubjectData = [];
for (var subjs in subjectsDist) {
List<OrdinalMarks> marksData = [];
for (var stdnt in studentMarks) {
if (stdnt.subject
.where((element) => element.name == subjs?.name).isNotEmpty) {
var temp = stdnt.subject
.where((element) => element.name == subjs?.name)
.first;
marksData.add(OrdinalMarks(temp.name, int.parse(temp.marks),stdnt.name));
} else {
marksData.add(OrdinalMarks(subjs!.name, 0,stdnt.name));
}
}
SubjectData.add(marksData);
}
var palettes = charts.MaterialPalette.getOrderedPalettes(subjectsDist.length+2);
int cnt=0;
List<charts.Series<OrdinalMarks, String>> chartData = [
];
for(var d in SubjectData)
{
chartData.add(new charts.Series<OrdinalMarks, String>(
id: d.first.subjectName,
domainFn: (OrdinalMarks m, _) => m.studentName,
measureFn: (OrdinalMarks m, _) => m.marks,
data: d,
fillColorFn: ( subj, _) {
// print(subj.subjectName+": subj.subjectName :" + pallets[subj.subjectName].toString()??charts.MaterialPalette.blue.shadeDefault.toString());
return palettes.elementAt( subjectsDist.where((element) => element?.name==subj.subjectName).first?.sno??0 ).shadeDefault; //pallets[subj.subjectName]??charts.MaterialPalette.blue.shadeDefault;
},
colorFn: ( subj, _) {
// print(subj.subjectName+": subj.subjectName :" + pallets[subj.subjectName].toString()??charts.MaterialPalette.blue.shadeDefault.toString());
return palettes.elementAt(subjectsDist.where((element) => element?.name==subj.subjectName).first?.sno??0).shadeDefault;
},
));
}
return Scaffold(
// Use Obx(()=> to update Text() whenever count is changed.
appBar: AppBar(title: Text("Chart")),
// Replace the 8 lines Navigator.push by a simple Get.to(). You don't need context
body:new charts.BarChart(
chartData,
animate: animate,
behaviors: [new charts.SeriesLegend(showMeasures: true)],
animationDuration: Duration(seconds: 3),
));
}
}
/// Sample ordinal data type.
class OrdinalMarks {
final String subjectName;
final int marks;
final String studentName;
OrdinalMarks(this.subjectName, this.marks,this.studentName);
}
List<StudentMarks> studentMarksFromJson(String str) => List<StudentMarks>.from(json.decode(str).map((x) => StudentMarks.fromJson(x)));
String studentMarksToJson(List<StudentMarks> data) => json.encode(List<dynamic>.from(data.map((x) => x.toJson())));
class StudentMarks {
StudentMarks({
required this.name,
required this.subject,
});
String name;
List<Subject> subject;
factory StudentMarks.fromJson(Map<String, dynamic> json) => StudentMarks(
name: json["name"],
subject: List<Subject>.from(json["subject"].map((x) => Subject.fromJson(x))),
);
Map<String, dynamic> toJson() => {
"name": name,
"subject": List<dynamic>.from(subject.map((x) => x.toJson())),
};
}
class Subject {
Subject({
required this.name,
required this.marks,
});
String name;
String marks;
int? sno;
factory Subject.fromJson(Map<String, dynamic> json) => Subject(
name: json["name"],
marks: json["marks"],
);
Map<String, dynamic> toJson() => {
"name": name,
"marks": marks,
};
}

JSON Transformation to required format

We are working on a Middleware platform where we are required to respond to consumer with a JSON data in a particular format.
The Data we get from south bound API is a key value pair and this needs to be mapped to an understandable format for the consumer
We tried json-path, ObjectMapper but none of them is giving us the expected result for transforming
Respnse from backend API
{
"details": [
{
"name": "x.y.z.name","value": "TR-54695"
},
{
"name": "a.b.c.standards","value": "DOCSIS"
},
{
"name": "x.x.x.hversion","value": "10"
},
{
"name": "x.x.x.sversion","value": "9.1.116V"
},
{
"name": "x.x.x.uptime","value": "8000"
},
{
"name": "x.x.x.accessallowed","value": "true"
},
]
}
To be transformed to
{
"myData": {
"myInfo": {
"productClass": "TR-54695",
"supportedStandards": "DOCSIS",
"hardwareVersion": "10",
"softwareVersion": "9.1.116V",
"modemMacAddress": "",
"upTime": "8000",
"modemNetworkAccessAllowed": true
}
}
}
Do not like manual work, so here generated demo using 2 functions.
Mind ticking accept button under voting in case you like some answer.
function translate(src, mapping) {
var dst = { "myData": { "myInfo": { "modemMacAddress": "" } } }
//in case order matters:
dst = { "myData": { "myInfo": { "productClass": "", "supportedStandards": "", "hardwareVersion": "", "softwareVersion": "", "modemMacAddress": "", "upTime": "", "modemNetworkAccessAllowed": undefined } } }
var trueFalse = { "false": false, "true": true };
src = src.details;
for (var i = 0; i < src.length; i++) {
dst.myData.myInfo[mapping[src[i].name]] = trueFalse[src[i].value] || src[i].value;
}
return dst;
}
function generateMapping(src, dst) {
src = src.details;
var backLinks = {}, rename2 = {};
for (var i = 0; i < src.length; i++) {
backLinks[src[i].value] = src[i].name;
}
dst = dst.myData.myInfo;
for (var i in dst) {
rename2[backLinks[dst[i]]] = i;
}
return rename2;
}
var src = {
"details": [
{ "name": "x.y.z.name", "value": "TR-54695" },
{ "name": "a.b.c.standards", "value": "DOCSIS" },
{ "name": "x.x.x.hversion", "value": "10" },
{ "name": "x.x.x.sversion", "value": "9.1.116V" },
{ "name": "x.x.x.uptime", "value": "8000" },
{ "name": "x.x.x.accessallowed", "value": "true" },
]
}
var dst = {
"myData": {
"myInfo": {
"productClass": "TR-54695",
"supportedStandards": "DOCSIS",
"hardwareVersion": "10",
"softwareVersion": "9.1.116V",
"modemMacAddress": "",
"upTime": "8000",
"modemNetworkAccessAllowed": true
}
}
}
var mapping = generateMapping(src, dst);
// var mapping = {
// "x.y.z.name": "productClass",
// "a.b.c.standards": "supportedStandards",
// "x.x.x.hversion": "hardwareVersion",
// "x.x.x.sversion": "softwareVersion",
// "undefined": "modemMacAddress",
// "x.x.x.uptime": "upTime",
// "x.x.x.accessallowed": "modemNetworkAccessAllowed"
// }
var result = translate(src, mapping);
console.log(JSON.stringify(result, null, 2));
console.log(JSON.stringify(mapping, null, 2));
You can use below code and use codesandbox link (check console output ) for exact response and this link for key:value pair.
let response = {
details: [
{
name: "x.y.z.name",
value: "TR-54695"
},
{
name: "a.b.c.standards",
value: "DOCSIS"
},
{
name: "x.x.x.hversion",
value: "10"
},
{
name: "x.x.x.sversion",
value: "9.1.116V"
},
{
name: "x.x.x.uptime",
value: "8000"
},
{
name: "x.x.x.accessallowed",
value: "true"
}
]
};
// convert function for key value pair
function convertResponse(responseData) {
let output = { myData: { myInfo: {} } };
let outputRef = output.myData.myInfo;
responseData.forEach(element => {
outputRef[element.name] = element.value
});
return output;
}
// OR convert Function for getting exact same output
function convertResponse(responseData) {
let output = { myData: { myInfo: {} } };
let outputRef = output.myData.myInfo;
responseData.forEach(element => {
if (element.name === "x.y.z.name") {
outputRef.productClass = element.value;
} else if (element.name === "a.b.c.standards") {
outputRef.supportedStandards = element.value;
} else if (element.name === "x.x.x.hversion") {
outputRef.hardwareVersion = element.value;
} else if (element.name === "x.x.x.sversion") {
outputRef.softwareVersion = element.value;
} else if (element.name === "x.x.x.uptime") {
outputRef.upTime = element.value;
} else if (element.name === "x.x.x.accessallowed") {
outputRef.modemNetworkAccessAllowed = element.value;
}
});
return output;
}
//Function Call
console.log(convertResponse(response.details));

Explode Deeply Nested JSON returning duplicates in Spark Scala

I have a utility which is working fine for parsing simple JSONs, but cross joining in case multiple array[structs] is present in the JSON
I have tried distinct() or dropDuplicates() as well to remove duplicates which is happening due to the cross join that I have included in the code, but thats returning empty DF..
def flattenDataFrame(df: DataFrame): DataFrame = {
var flattenedDf: DataFrame = df
if (isNested(df)) {
val flattenedSchema: Array[(Column, Boolean)] = flattenSchema(df.schema)
var simpleColumns: List[Column] = List.empty[Column]
var complexColumns: List[Column] = List.empty[Column]
flattenedSchema.foreach {
case (col, isComplex) => {
if (isComplex) {
complexColumns = complexColumns :+ col
} else {
simpleColumns = simpleColumns :+ col
}
}
}
var crossJoinedDataFrame = df.select(simpleColumns: _*)
complexColumns.foreach(col => {
crossJoinedDataFrame = crossJoinedDataFrame.crossJoin(df.select(col))
crossJoinedDataFrame = flattenDataFrame(crossJoinedDataFrame)
})
crossJoinedDataFrame
} else {
flattenedDf
}
}
private def flattenSchema(schema: StructType, prefix: String = null): Array[(Column, Boolean)] = {
schema.fields.flatMap(field => {
val columnName = if (prefix == null) field.name else prefix + "." + field.name
field.dataType match {
case arrayType: ArrayType => {
val cols: Array[(Column, Boolean)] = Array[(Column, Boolean)](((explode_outer(col(columnName)).as(columnName.replace(".", "_"))), true))
cols
}
case structType: StructType => {
flattenSchema(structType, columnName)
}
case _ => {
val columnNameWithUnderscores = columnName.replace(".", "_")
val metadata = new MetadataBuilder().putString("encoding", "ZSTD").build()
Array(((col(columnName).as(columnNameWithUnderscores, metadata)), false))
}
}
}).filter(field => field != None)
}
def isNested(df: DataFrame): Boolean = {
df.schema.fields.flatMap(field => {
field.dataType match {
case arrayType: ArrayType => {
Array(true)
}
case mapType: MapType => {
Array(true)
}
case structType: StructType => {
Array(true)
}
case _ => {
Array(false)
}
}
}).exists(b => b)
}
A sample JSON in which I am facing the issue:
[
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
},
{
"id": "0002",
"type": "donut",
"name": "Raised",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
]
Solution without join and more than that, no cross-join which is your problem:
Sorry for the formatting, can't really get it to format well for stack-overflow
def flattenDataFrame(df: DataFrame): DataFrame = {
val flattenedDf: DataFrame = df
if (isNested(df)) {
val flattenedSchema: Array[(Column, Boolean)] = flattenSchema(flattenedDf.schema)
var simpleColumns: List[Column] = List.empty[Column]
var complexColumns: List[Column] = List.empty[Column]
flattenedSchema.foreach {
case (col, isComplex) =>
if (isComplex) {
complexColumns = complexColumns :+ col
} else {
simpleColumns = simpleColumns :+ col
}
}
val complexUnderlyingCols = complexColumns.map { column =>
val name = column.expr.asInstanceOf[UnresolvedAttribute].name
val unquotedColName = s"${name.replaceAll("`","")}"
val explodeSelectColName = s"`${name.replaceAll("`","")}`"
(unquotedColName, col(name).as(unquotedColName), explode_outer(col(explodeSelectColName)).as(unquotedColName))
}
var joinDataFrame = flattenedDf.select(simpleColumns ++ complexUnderlyingCols.map(_._2): _*)
complexUnderlyingCols.foreach { case (name, tempCol, column) =>
val nonTransformedColumns = joinDataFrame.schema.fieldNames.diff(List(name)).map(fieldName => s"`${fieldName.replaceAll("`", "")}`").map(col)
joinDataFrame = joinDataFrame.select(nonTransformedColumns :+ column :_*)
}
flattenDataFrame(joinDataFrame)
} else {
flattenedDf
}
}
private def flattenSchema(schema: StructType, prefix: String = null, level: Int = 0): Array[(Column, Boolean)] = {
val unquotedPrefix = if (prefix != null) prefix.replace("", "") else null
println(level)
schema.fields.flatMap(field => {
val fieldName = field.name
val columnName = if (level == 0) {
s"$fieldName"
} else {
val fullName = s"$unquotedPrefix.$fieldName"
val x = fullName.split('.').reverse.zipWithIndex.reverse.foldLeft(new StringBuilder("")){ case (builder, (fieldPart, index)) =>
if(index > level) {
builder.append(s".$fieldPart")
} else if (index == level) {
builder.append(s".$fieldPart")
} else {
builder.append(s".$fieldPart")
}
}
x.replace(1,2,"").toString()
}
val unquotedColumnName = columnName.replace("", "")
field.dataType match {
case _: ArrayType =>
val cols: Array[(Column, Boolean)] = Array[(Column, Boolean)]((col(columnName), true)) // We pass only the column as we'll generate explode function while expanding the DF
cols
case structType: StructType =>
flattenSchema(structType, columnName, level + 1)
case _ =>
val metadata = new MetadataBuilder().putString("encoding", "ZSTD").build()
Array((col(columnName).as(unquotedColumnName, metadata), false))
}
})
}
def isNested(df: DataFrame): Boolean = {
df.schema.fields.flatMap(field => {
field.dataType match {
case _: ArrayType =>
Array(x = true)
case _: MapType =>
Array(x = true)
case _: StructType =>
Array(x = true)
case _ =>
Array(x = false)
}
}).exists(b => b)
}

How to find the difference/mismatch between two JSON file?

I have two json files, one is expected json and the another one is the result of GET API call. I need to compare and find out the mismatch in the file.
Expected Json:
{
"array": [
1,
2,
3
],
"boolean": true,
"null": null,
"number": 123,
"object": {
"a": "b",
"c": "d",
"e": "f"
},
"string": "Hello World"
}
Actual Json response:
{
"array": [
1,
2,
3
],
"boolean": true,
"null": null,
"number": 456,
"object": {
"a": "b",
"c": "d",
"e": "f"
},
"string": "India"
}
Actually there are two mismatch: number received is 456 and string is India.
Is there a way to compare and get these two mismatch as results.
This need to be implemented in gatling/scala.
You can use, for example, play-json library and recursively traverse both JSONs. For next input (a bit more sophisticated than yours input):
LEFT:
{
"array" : [ 1, 2, 4 ],
"boolean" : true,
"null" : null,
"number" : 123,
"object" : {
"a" : "b",
"c" : "d",
"e" : "f"
},
"string" : "Hello World",
"absent-in-right" : true,
"different-types" : 123
}
RIGHT:
{
"array" : [ 1, 2, 3 ],
"boolean" : true,
"null" : null,
"number" : 456,
"object" : {
"a" : "b",
"c" : "d",
"e" : "ff"
},
"string" : "India",
"absent-in-left" : true,
"different-types" : "YES"
}
It produces this output:
Next fields are absent in LEFT:
*\absent-in-left
Next fields are absent in RIGHT:
*\absent-in-right
'*\array\(2)' => 4 != 3
'*\number' => 123 != 456
'*\object\e' => f != ff
'*\string' => Hello World != India
Cannot compare JsNumber and JsString in '*\different-types'
Code:
val left = Json.parse("""{"array":[1,2,4],"boolean":true,"null":null,"number":123,"object":{"a":"b","c":"d","e":"f"},"string":"Hello World","absent-in-right":true,"different-types":123}""").asInstanceOf[JsObject]
val right = Json.parse("""{"array":[1,2,3],"boolean":true,"null":null,"number":456,"object":{"a":"b","c":"d","e":"ff"},"string":"India","absent-in-left":true,"different-types":"YES"}""").asInstanceOf[JsObject]
// '*' - for the root node
showJsDiff(left, right, "*", Seq.empty[String])
def showJsDiff(left: JsValue, right: JsValue, parent: String, path: Seq[String]): Unit = {
val newPath = path :+ parent
if (left.getClass != right.getClass) {
println(s"Cannot compare ${left.getClass.getSimpleName} and ${right.getClass.getSimpleName} " +
s"in '${getPath(newPath)}'")
}
else {
left match {
// Primitive types are pretty easy to handle
case JsNull => logIfNotEqual(JsNull, right.asInstanceOf[JsNull.type], newPath)
case JsBoolean(value) => logIfNotEqual(value, right.asInstanceOf[JsBoolean].value, newPath)
case JsNumber(value) => logIfNotEqual(value, right.asInstanceOf[JsNumber].value, newPath)
case JsString(value) => logIfNotEqual(value, right.asInstanceOf[JsString].value, newPath)
case JsArray(value) =>
// For array we have to call showJsDiff on each element of array
val arr1 = value
val arr2 = right.asInstanceOf[JsArray].value
if (arr1.length != arr2.length) {
println(s"Arrays in '${getPath(newPath)}' have different length. ${arr1.length} != ${arr2.length}")
}
else {
arr1.indices.foreach { idx =>
showJsDiff(arr1(idx), arr2(idx), s"($idx)", newPath)
}
}
case JsObject(value) =>
val leftFields = value.keys.toSeq
val rightJsObject = right.asInstanceOf[JsObject]
val rightFields = rightJsObject.fields.map { case (name, value) => name }
val absentInLeft = rightFields.diff(leftFields)
if (absentInLeft.nonEmpty) {
println("Next fields are absent in LEFT: ")
absentInLeft.foreach { fieldName =>
println(s"\t ${getPath(newPath :+ fieldName)}")
}
}
val absentInRight = leftFields.diff(rightFields)
if (absentInRight.nonEmpty) {
println("Next fields are absent in RIGHT: ")
absentInRight.foreach { fieldName =>
println(s"\t ${getPath(newPath :+ fieldName)}")
}
}
// For common fields we have to call showJsDiff on them
val commonFields = leftFields.intersect(rightFields)
commonFields.foreach { field =>
showJsDiff(value(field), rightJsObject(field), field, newPath)
}
}
}
}
def logIfNotEqual[T](left: T, right: T, path: Seq[String]): Unit = {
if (left != right) {
println(s"'${getPath(path)}' => $left != $right")
}
}
def getPath(path: Seq[String]): String = path.mkString("\\")
Use diffson - a Scala implementation of RFC-6901 and RFC-6902: https://github.com/gnieh/diffson
json4s has a handy diff function described here: https://github.com/json4s/json4s (search for Merging & Diffing) and API doc here: https://static.javadoc.io/org.json4s/json4s-core_2.9.1/3.0.0/org/json4s/Diff.html
This is a slightly modified version of Artavazd's answer (which is amazing btw thank you so much!). This version outputs the differences into a convenient object instead of only logging them.
import play.api.Logger
import play.api.libs.json.{JsArray, JsBoolean, JsError, JsNull, JsNumber, JsObject, JsString, JsSuccess, JsValue, Json, OFormat, Reads}
case class JsDifferences(
differences: List[JsDifference] = List()
)
object JsDifferences {
implicit val format: OFormat[JsDifferences] = Json.format[JsDifferences]
}
case class JsDifference(
key: String,
path: Seq[String],
oldValue: Option[String],
newValue: Option[String]
)
object JsDifference {
implicit val format: OFormat[JsDifference] = Json.format[JsDifference]
}
object JsonUtils {
val logger: Logger = Logger(this.getClass)
def findDiff(left: JsValue, right: JsValue, parent: String = "*", path: List[String] = List()): JsDifferences = {
val newPath = path :+ parent
if (left.getClass != right.getClass) {
logger.debug(s"Cannot compare ${left.getClass.getSimpleName} and ${right.getClass.getSimpleName} in '${getPath(newPath)}'")
JsDifferences()
} else left match {
case JsNull => logIfNotEqual(JsNull, right.asInstanceOf[JsNull.type], newPath)
case JsBoolean(value) => logIfNotEqual(value, right.asInstanceOf[JsBoolean].value, newPath)
case JsNumber(value) => logIfNotEqual(value, right.asInstanceOf[JsNumber].value, newPath)
case JsString(value) => logIfNotEqual(value, right.asInstanceOf[JsString].value, newPath)
case JsArray(value) =>
val arr1 = value
val arr2 = right.asInstanceOf[JsArray].value
if (arr1.length != arr2.length) {
logger.debug(s"Arrays in '${getPath(newPath)}' have different length. ${arr1.length} != ${arr2.length}")
JsDifferences()
} else JsDifferences(arr1.indices.flatMap(idx => findDiff(arr1(idx), arr2(idx), s"($idx)", newPath).differences).toList)
case leftJsObject: JsObject => {
val leftFields = leftJsObject.keys.toSeq
val rightJsObject = right.asInstanceOf[JsObject]
val rightFields = rightJsObject.fields.map { case (name, value) => name }
val keysAbsentInLeft = rightFields.diff(leftFields)
val leftDifferences = keysAbsentInLeft.map(fieldName => JsDifference(
key = fieldName, path = newPath :+ fieldName, oldValue = None, newValue = Some(rightJsObject(fieldName).toString)
))
val keysAbsentInRight = leftFields.diff(rightFields)
val rightDifferences = keysAbsentInRight.map(fieldName => JsDifference(
key = fieldName, path = newPath :+ fieldName, oldValue = Some(leftJsObject(fieldName).toString), newValue = None
))
val commonKeys = leftFields.intersect(rightFields)
val commonDifferences = commonKeys.flatMap(field => findDiff(leftJsObject(field), rightJsObject(field), field, newPath).differences).toList
JsDifferences((leftDifferences ++ rightDifferences ++ commonDifferences).toList)
}
}
}
def logIfNotEqual[T](left: T, right: T, path: Seq[String]): JsDifferences = {
if (left != right) {
JsDifferences(List(JsDifference(
key = path.last, path = path, oldValue = Some(left.toString), newValue = Some(right.toString)
)))
} else JsDifferences()
}
def getPath(path: Seq[String]): String = path.mkString("\\")
}

Observable from a RESTful paged collection

On one hand, I have a RESTful HAL HATEOAS collection which looks like this :
{
"page": 1,
"limit": 10,
"pages": 18,
"total": 174,
"_links": {
"self": { "href": "/users?page=1&limit=10" },
"first": { "href": "/users?page=1&limit=10" },
"last": { "href": "/users?page=18&limit=10" },
"next": { "href": "/users?page=2&limit=10" }
},
"_embedded": {
"users": [
{
"name": "bob",
"_links": { "self": { "href": "/users/1" } }
},
...
]
}
}
On the other hand, I have an Angular 2 app.
public getUsers(uri: string = this.baseURI): Observable<User> {
return this.http.get(uri)
.map(res => res.json()._embedded.users as User[])
.flatMap(d => d) // Transform the flux of arrays in flux of users
.catch(this.handleError);
} // Get only the 10th first users
What I'm trying to do have an observable of Users which will append data while _links.next != null
Modified service
public getUsers(uri: string = this.baseURI): Observable<User> {
return this.http.get(uri)
.do(res => {
const uri = JSON.parse(res._body)._links.next.href;
this.nextUri = uri ? uri : null;
})
.map(res => res.json()._embedded.users as User[])
.flatMap(d => d) // Transform the flux of arrays in flux of users
.catch(this.handleError);
}
Recursive function
loadAll(uri: string) {
read(uri)
.subscribe(
user => {
this.stockedUsers.push(user);
},
error => console.log(error),
() => {
if (this.nextUri) {
this.loadAll(this.nextUri);
}
}
);
}
Does someone know how to achieve this properly ?
I want to keep thes advantages of the RxJS flux.
UPDATE/ANSWER
Silly me ! I think I answered myself. Maybe this will help others :
public read(uri: string = this.baseURI): Observable<User> {
return Observable.create(observer => this.iteratePages(observer, uri));
}
private iteratePages(observer: Observer<User>, uri): void {
if (uri == null) { return observer.complete(); }
this.http.get(uri).subscribe(res => {
const data = res.json();
for (const user of data._embedded.users) {
observer.next(user as User);
}
const nextUri = (data._links && data._links.next) ? data._links.next.href : null;
this.iteratePages(observer, nextUri);
});
}