I try to save a DataSet into an index ElasticSearch everyday (schedule with Oozie) but I have sometimes this error java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.util.JsonProtocol so the job failed immediately. I don't know why this error appears.
Code :
private def readSource1()(implicit spark: SparkSession): DataFrame = {
import spark.implicits._
val sourceName = "dictionary.source1"
val plantsPath: String = config.getString("sources." + sourceName + ".path")
spark.read
.option("delimiter", ";")
.option("header", "true")
.csv(plantsPath)
.select('id as "sourceId", 'assembly_site_id)
}
private def readSource2()(implicit spark: SparkSession): DataFrame = {
import spark.implicits._
val source2: SourceIO = SourceManager(config)("source2")
(startDate, endDate) match {
case (Some(sd), Some(ed)) ⇒ source2.loadDf()
.where('assemblyEndDate.between(Date.valueOf(sd), Date.valueOf(ed)) ||
'tctDate.between(Date.valueOf(sd), Date.valueOf(ed)))
case _ ⇒ source2.loadDf()
}
}
def saveSourceToEs(implicit sparkSession: SparkSession): Unit = {
val source1: DataFrame = readSource1()
val source2: DataFrame = readSource2()
val source: Dataset[Source] = buildSource(this.getSource(source1, source2))
source.saveToEs(s"source_${createDateString()}/_doc")
}
object SourceIndexer extends SparkApp with Configurable with Logging {
val config: Config = ConfigFactory.load()
def apply(
sourceID: Option[String] = None,
startDate: Option[LocalDate] = None,
endDate: Option[LocalDate] = None
): SourceIndexer = {
new SourceIndexer(config, sourceID, startDate, endDate)
}
def main(args: Array[String]): Unit = {
try {
val bootConfig = BootConfig.parseSourceIndexer(args)
this.apply(bootConfig.sourceID, bootConfig.startDate, bootConfig.endDate)
.saveSourceToEs(spark)
} finally {
spark.sparkContext.stop()
}
}
}
Thanks for your help.
Related
I'm trying to serialize a Map<String,*> type.
Currently I'm decalring a SerializersModule, with any type that I'm storing in the map added here.
val module = SerializersModule {
polymorphic(Any::class) {
subclass(Int::class, PolymorphicPrimitiveSerializer(Int.serializer()))
subclass(String::class, PolymorphicPrimitiveSerializer(String.serializer()))
subclass(ComplexType::class)
}
}
I would like to enforce the compiler that only types that were defined in SerializersModule can be added to the map?
I'm open to hear other strategies to serialize star projected types
Full code:
import kotlinx.serialization.*
import kotlinx.serialization.builtins.serializer
import kotlinx.serialization.descriptors.SerialDescriptor
import kotlinx.serialization.json.Json
import kotlinx.serialization.encoding.Decoder
import kotlinx.serialization.encoding.Encoder
import kotlinx.serialization.modules.SerializersModule
import kotlinx.serialization.descriptors.buildClassSerialDescriptor
import kotlinx.serialization.encoding.decodeStructure
import kotlinx.serialization.encoding.encodeStructure
import kotlinx.serialization.modules.*
import java.io.Serializable
#OptIn( ExperimentalSerializationApi::class )
class PolymorphicPrimitiveSerializer<T> (val typeSerializer: KSerializer<T>) : KSerializer<T>
{
override val descriptor: SerialDescriptor = buildClassSerialDescriptor( typeSerializer.descriptor.serialName )
{
element( "value", typeSerializer.descriptor )
}
override fun deserialize( decoder: Decoder ): T =
decoder.decodeStructure( descriptor )
{
decodeElementIndex( descriptor )
//TODO: what is this?
decodeSerializableElement( descriptor, 0,typeSerializer)
}
override fun serialize(encoder: Encoder, value: T) {
encoder.encodeStructure( descriptor )
{
encodeSerializableElement( descriptor, 0, typeSerializer, value )
}
}
}
data class MetaKey<T>(val name: String, val typ: Class<T>) : java.io.Serializable {
companion object {
inline fun <reified T: Serializable> named(name: String) =
MetaKey(name, T::class.javaObjectType)
}
}
fun emptyMetaMap() = mapOf<MetaKey<*>, Any>()
#kotlinx.serialization.Serializable
data class ComplexType(val i: Int)
fun main() {
val typesDictionary = mapOf(
"kotlin.Int" to Int::class.javaObjectType,
"ComplexType" to ComplexType::class.javaObjectType,
"kotlin.String" to String::class.javaObjectType )
val module = SerializersModule {
polymorphic(Any::class) {
subclass(Int::class, PolymorphicPrimitiveSerializer(Int.serializer()))
subclass(String::class, PolymorphicPrimitiveSerializer(String.serializer()))
subclass(ComplexType::class)
}
}
val format = Json { serializersModule = module }
val mm = emptyMetaMap()
.plus(MetaKey("complex type", ComplexType::class.javaObjectType) to ComplexType(1) )
.plus(MetaKey("Int one", Int::class.javaObjectType) to 1)
.plus(MetaKey("string value", String::class.javaObjectType) to "2B||!2B")
val jsoned = mm.keys.map {
listOf(
format.encodeToString(it.name),
format.encodeToString(PolymorphicSerializer(Any::class),mm[it] as Any),
)
}
var mmDecoded = emptyMetaMap()
jsoned.forEach {
val strKey = format.decodeFromString<String>(it[0])
val decodedVal = format.decodeFromString(PolymorphicSerializer(Any::class), it[1])
val strType = it[1].substringAfter("\"type\":\"").substringBefore("\"")
val metaKeyDeserialized = MetaKey(strKey, typesDictionary[strType] ?: throw IllegalArgumentException())
mmDecoded = mmDecoded.plus(metaKeyDeserialized to decodedVal)
}
assert(mmDecoded == mm)
}
Want to compare the first json string with the other 2 json string.
First the keys should match . If they match , then compare the nested key and values.
val of1 = "{\"keyA\":{\"1\":13,\"0\":202}}"
val of2 = "{\"keyA\":{\"1\":12,\"0\":201}}"
val of3 = "{\"keyB\":{\"1\":12}}"
Should throw Error for key mismatch.
val of1 = "{\"keyA\":{\"1\":13,\"0\":202}}"
val of2 = "{\"keyA\":{\"1\":12,\"0\":201}}"
val of2 = "{\"keyA\":{\"1\":11,\"0\":200}}"
This should return true, as both keys match and also sub keys 1 and 0 have more values than sub key of json 2 and json 3.The numbers are Long values.
Please help.
Below is my try.
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
val of1 = "{\"keyA\":{\"1\":13,\"0\":202}}"
val of2 = "{\"keyA\":{\"1\":12,\"0\":201}}"
val of3 = "{\"keyB\":{\"1\":12}}"
def OffsetComparator(json1: String, json2: String, json3:String): Boolean = {
val mapper = new ObjectMapper() with ScalaObjectMapper
mapper.registerModule(DefaultScalaModule)
val jsonObj1 = mapper.readValue(json1, classOf[Map[String, Map[String, Long]]])
val jsonObj2 = mapper.readValue(json2, classOf[Map[String, Map[String, Long]]])
val jsonObj3 = mapper.readValue(json3, classOf[Map[String, Map[String, Long]]])
//Trying to get the key and compare first
val mapA = jsonObj1.keySet.foreach(i=>jsonObj1.keySet(i).toString)
val mapB = jsonObj2.keySet
val mapC = jsonObj3.keySet
println( (jsonObj1.keySet == jsonObj3.keySet) )
if (mapA.keySet != mapB.keySet || mapA.keySet != mapC.keySet) throw new Exception("partitions mismatch")
mapA.keys.forall(k => (mapA(k).asInstanceOf[Long] > mapB(k).asInstanceOf[Long] && mapA(k).asInstanceOf[Long] > mapC(k).asInstanceOf[Long]))
// getting error :java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.Long when i am casting as Long.Not su
}
println(OffsetComparator(of1, of2,of3))
}
You can try with https://github.com/gnieh/diffson. ITs available for circe, spray-json and play-json.
Your example with Circe:
import diffson._
import diffson.lcs._
import diffson.jsonpatch.lcsdiff._
import io.circe._
import diffson.circe._
import diffson.jsonpatch._
import io.circe.parser._
val decoder = Decoder[JsonPatch[Json]]
val encoder = Encoder[JsonPatch[Json]]
implicit val lcs = new Patience[Json]
val json1 = parse(of1)
val json2 = parse(of2)
val patch =
for {
json1 <- json1
json2 <- json2
} yield diff(json1, json2)
print(patch)
That gives:
Right(JsonPatch(List(Replace(Chain(Left(keyA), Left(0)),201,None), Replace(Chain(Left(keyA), Left(1)),12,None))))
take a look to see how it works https://index.scala-lang.org/gnieh/diffson/diffson-circe/4.0.3?target=_2.13
For Circe, indlude the dependence:
"org.gnieh" %% f"diffson-circe" % "4.0.3"
I have a Json serializer with Play Json for Id
def idFormat[T] = {
Format(new Reads[Id[T]] {
def reads(jv: JsValue): JsResult[Id[T]] =
JsSuccess(Id[T](jv.as[String]))
}, new Writes[Id[T]] {
def writes(id: Id[T]): JsString = JsString(id.underlying.toString)
})
}
implicit def idFormatter[A]: Format[Id[A]] = idFormat[A]
Now I would like to support to serialize Option[Id[_]] but all my tries were in vain. How would i write
implicit def optionIdFormatter[Option[A]]: ....
import play.api.libs.json._
case class Id[T](t: T)
implicit def idFormat[T](implicit tFormat: Format[T]): Format[Id[T]] = Format(
new Reads[Id[T]] {
def reads(jsValue: JsValue): JsResult[Id[T]] = jsValue.validate[T].map(t => Id(t))
},
new Writes[Id[T]] {
def writes(id: Id[T]): JsValue = Json.toJson(id.t)
}
)
implicit def optionFormat[T](implicit tFormat: Format[T]): Format[Option[T]] = Format(
new Reads[Option[T]] {
def reads(jsValue: JsValue): JsResult[Option[T]] = jsValue.validate[T].map(t => Some(t))
},
new Writes[Option[T]] {
def writes(o: Option[T]): JsValue = o.map(t => Json.toJson(t)).getOrElse(JsNull)
}
)
val intIdOpt = Some(Id(5))
val intIdOptJson = Json.toJson(intIdOpt)
val intIdOptJsonString = intIdOptJson.toString()
// intIdOptJsonString: String = 5
val usingOptionIntId1 = Map("id" -> Some(Id(5)))
val usingOptionIntId1JsonString = Json.toJson(usingOptionIntId1).toString()
// usingOptionIntId1JsonString: String = {"id":5}
val usingOptionIntId2 = Map("id" -> Option.empty[Id[Int]])
val usingOptionIntId2JsonString = Json.toJson(usingOptionIntId2).toString()
// usingOptionIntId2JsonString: String = {"id":null}
I have a scala application and have a case class like -
case class SR(
systemId: Option[String] = None,
x: Map[Timestamp, CaseClass1] = Map.empty,
y: Map[Timestamp, CaseClass2] = Map.empty,
y: Map[Timestamp, CaseClass3] = Map.empty
)
Now I have to provide an implicit read and write JSON format for properties x,y,z for SR case class like -
implicit val mapCMPFormat = new Format[Map[Timestamp, CaseClass1]] {
def writes(obj: Map[Timestamp, CaseClass1]): JsValue =
JsArray(obj.values.toSeq.map(Json.toJson(_)))
def reads(jv: JsValue): JsResult[Map[Timestamp, CaseClass1]] = jv.validate[scala.collection.Seq[CaseClass1]] match {
case JsSuccess(objs, path) => JsSuccess(objs.map(obj => obj.dataDate.get -> obj).toMap, path)
case err: JsError => err
}
}
And so on similarly for Y and Z, and in future, I will be adding many more properties like x,y,z in SR case class and then need to provide the formators.
So Can I get some Generic Formater that will take care for all types?
To my knowledge, a simple way to achieve this does not exists, however, to create a "default" reader for each object should not be hard to do, something like:
case class VehicleColorForAdd(
name: String,
rgb: String
)
object VehicleColorForAdd {
implicit val jsonFormat: Format[VehicleColorForAdd] = Json.formats[VehicleColorForAdd]
}
This way you have access to the implicit by simply using the object, so you could have other objects that contains this object with no problem:
case class BiggerModel(
vehicleColorForAdd: VehicleColorForAdd
)
object BiggerModel{
implicit val jsonFormat: Format[BiggerModel] = Json.format[BiggerModel]
}
Sadly, you need to do this for each class type, but you can "extend" play converters with your own, for example, this are some of my default readers:
package common.json
import core.order.Order
import org.joda.time.{ DateTime, LocalDateTime }
import org.joda.time.format.DateTimeFormat
import core.promotion.{ DailySchedule, Period }
import play.api.libs.functional.syntax._
import play.api.libs.json.Reads._
import play.api.libs.json._
import play.api.libs.json.{ JsError, JsPath, JsSuccess, Reads }
import scala.language.implicitConversions
/**
* General JSon readers and transformations.
*/
object JsonReaders {
val dateTimeFormat = "yyyy-MM-dd HH:mm:ss"
class JsPathHelper(val path: JsPath) {
def readTrimmedString(implicit r: Reads[String]): Reads[String] = Reads.at[String](path)(r).map(_.trim)
def readUpperString(implicit r: Reads[String]): Reads[String] = Reads.at[String](path)(r).map(_.toUpperCase)
def readNullableTrimmedString(implicit r: Reads[String]): Reads[Option[String]] = Reads.nullable[String](path)(r).map(_.map(_.trim))
}
implicit val localDateTimeReader: Reads[LocalDateTime] = Reads[LocalDateTime]((js: JsValue) =>
js.validate[String].map[LocalDateTime](dtString =>
LocalDateTime.parse(dtString, DateTimeFormat.forPattern(dateTimeFormat))))
val localDateTimeWriter: Writes[LocalDateTime] = new Writes[LocalDateTime] {
def writes(d: LocalDateTime): JsValue = JsString(d.toString(dateTimeFormat))
}
implicit val localDateTimeFormat: Format[LocalDateTime] = Format(localDateTimeReader, localDateTimeWriter)
implicit val dateTimeReader: Reads[DateTime] = Reads[DateTime]((js: JsValue) =>
js.validate[String].map[DateTime](dtString =>
DateTime.parse(dtString, DateTimeFormat.forPattern(dateTimeFormat))))
implicit def toJsPathHelper(path: JsPath): JsPathHelper = new JsPathHelper(path)
val defaultStringMax: Reads[String] = maxLength[String](255)
val defaultStringMinMax: Reads[String] = minLength[String](1) andKeep defaultStringMax
val rgbRegex: Reads[String] = pattern("""^#([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})$""".r, "error.invalidRGBPattern")
val plateRegex: Reads[String] = pattern("""^[\d\a-zA-Z]*$""".r, "error.invalidPlatePattern")
val minOnlyWordsRegex: Reads[String] = minLength[String](2) keepAnd onlyWordsRegex
val positiveInt: Reads[Int] = min[Int](1)
val zeroPositiveInt: Reads[Int] = min[Int](0)
val zeroPositiveBigDecimal: Reads[BigDecimal] = min[BigDecimal](0)
val positiveBigDecimal: Reads[BigDecimal] = min[BigDecimal](1)
def validLocalDatePeriod()(implicit reads: Reads[Period]) =
Reads[Period](js => reads.reads(js).flatMap { o =>
if (o.startPeriod isAfter o.endPeriod)
JsError("error.startPeriodAfterEndPeriod")
else
JsSuccess(o)
})
def validLocalTimePeriod()(implicit reads: Reads[DailySchedule]) =
Reads[DailySchedule](js => reads.reads(js).flatMap { o =>
if (o.dailyStart isAfter o.dailyEnd)
JsError("error.dailyStartAfterDailyEnd")
else
JsSuccess(o)
})
}
Then, you only need to import this object to have access to all this implicit converters:
package common.forms
import common.json.JsonReaders._
import play.api.libs.json._
/**
* Form to add a model with only one string field.
*/
object SimpleCatalogAdd {
case class Data(
name: String
)
implicit val dataReads: Reads[Data] = (__ \ "name").readTrimmedString(defaultStringMinMax).map(Data.apply)
}
I am very new to programming in Scala. I am writing a test program to get maximum value from JSON data. I have following code:
import scala.io.Source
import scala.util.parsing.json._
object jsonParsing{
//Id int `json:"id"`
//Price int `json:"price"`
def main(args: Array[String]): Unit = {
val file_name = "jsonData.txt"
val json_string = scala.io.Source.fromFile("jsonData.txt").getLines.mkString
val json_arr = json_string.split(",")
json_arr.foreach {println}
}
}
The json_arr.foreach {println} prints following data:
[{ "id":1
"price":4629}
{ "id":2
"price":7126}
{ "id":3
"price":8862}
{ "id":4
"price":8999}
{ "id":5
"price":1095}]
I am stuck at the part of figuring out how to find the maximum price from such JSON data? That is, in this case the output should be '8999'.
you can try something like this below:
package com.x.x.integration.commons
import collection.immutable.IndexedSeq
import com.google.gson.Gson
import com.google.gson.JsonObject
import com.google.gson.JsonParser
case class wrapperObject(val json_string: Array[MyJsonObject])
case class MyJsonObject(val id:Int ,val price:Int)
object Demo {
val gson = new Gson()
def main(args: Array[String])={
val json_string = scala.io.Source.fromFile("jsonData.txt").getLines.mkString
//val json_string= """{"json_string":[{"id":1,"price":4629},{"id":2,"price":7126},{"id":3,"price":8862},{"id":4,"price":8999},{"id":5,"price":1095}]}"""
val jsonStringAsObject= new JsonParser().parse(json_string).getAsJsonObject
val objectThatYouCanPlayWith:wrapperObject = gson.fromJson(jsonStringAsObject, classOf[wrapperObject])
var maxPrice:Int = 0
for(i <- objectThatYouCanPlayWith.json_string if i.price>maxPrice)
{
maxPrice= i.price
}
println(maxPrice)
}
}
check if it helps you
I also recommend to use Json4s or playJson.
But you could do without any libraries as such.
val json = """[{"id":1,"price":100},{"id":2, "price": 200}]"""
val priceRegex = """"price"\s*:\s*(\d+)""".r
val maxPrice = priceRegex.findAllIn(json).map({
case priceRegex(price) => price.toInt
}).max
println(maxPrice) // print 200
Although Play JSON is handy, you could use Regex as well.
import scala.io.Source
import scala.util.matching.Regex._
val jsonString = Source
.fromFile("jsonData.txt")
.getLines.mkString.split(",")
var maxPrice = 0
jsonString.foreach(each => {
val price: Option[Match] = ("\"price\":(\\d+)").r.findFirstMatchIn(each)
if (price.isDefined) {
maxPrice = Math.max(maxPrice, price.get.group(1).toInt)
}
})