Create a Postgresql table from Avro Schema in Nifi - json

Using InferAvroSchema I got an Avro Schema of my file. I want to create a table in PostregSql using this Avro schema. Which processor I have to use.
I use : GetFile->InferAvroSchema-> I want to create a table from this schema -> Put databaseRecord.
The avro schema :
{
"type" : "record",
"name" : "warranty",
"doc" : "Schema generated by Kite",
"fields" : [ {
"name" : "id",
"type" : "long",
"doc" : "Type inferred from '1'"
}, {
"name" : "train_id",
"type" : "long",
"doc" : "Type inferred from '21691'"
}, {
"name" : "siemens_nr",
"type" : "string",
"doc" : "Type inferred from 'Loco-001'"
}, {
"name" : "uic_nr",
"type" : "long",
"doc" : "Type inferred from '193901'"
}, {
"name" : "Configuration",
"type" : "string",
"doc" : "Type inferred from 'ZP28'"
}, {
"name" : "Warranty_Status",
"type" : "string",
"doc" : "Type inferred from 'Out_of_Warranty'"
}, {
"name" : "Warranty_Data_Type",
"type" : "string",
"doc" : "Type inferred from 'Real_based_on_preliminary_acceptance_date'"
}, {
"name" : "of_progression",
"type" : "long",
"doc" : "Type inferred from '100'"
}, {
"name" : "Delivery_Date",
"type" : "string",
"doc" : "Type inferred from '18/12/2009'"
}, {
"name" : "Warranty_on_Delivery_Date",
"type" : "string",
"doc" : "Type inferred from '18/12/2013'"
}, {
"name" : "Customer_Status",
"type" : "string",
"doc" : "Type inferred from 'homologation'"
}, {
"name" : "Commissioning_Date",
"type" : "string",
"doc" : "Type inferred from '6/10/2010'"
}, {
"name" : "Preliminary_acceptance_date",
"type" : "string",
"doc" : "Type inferred from '6/01/2011'"
}, {
"name" : "Warranty_Start_Date",
"type" : "string",
"doc" : "Type inferred from '6/01/2011'"
}, {
"name" : "Warranty_End_Date",
"type" : "string",
"doc" : "Type inferred from '6/01/2013'"
}, {
"name" : "Effective_End_Warranty_Date",
"type" : [ "null", "string" ],
"doc" : "Type inferred from 'null'",
"default" : null
}, {
"name" : "Level_2_in_function",
"type" : "string",
"doc" : "Type inferred from '17/07/2015'"
}, {
"name" : "Baseline",
"type" : "string",
"doc" : "Type inferred from '2.10.23.4'"
}, {
"name" : "RELN_revision",
"type" : "string",
"doc" : "Type inferred from '0434-26.3'"
}, {
"name" : "TC_report",
"type" : "string",
"doc" : "Type inferred from 'A480140'"
}, {
"name" : "Last_version_Date",
"type" : "string",
"doc" : "Type inferred from 'A-23/09/2015'"
}, {
"name" : "ETCS_ID_NID_Engine",
"type" : [ "null", "long" ],
"doc" : "Type inferred from '13001'",
"default" : null
}, {
"name" : "Item_Type",
"type" : "string",
"doc" : "Type inferred from 'Item'"
}, {
"name" : "Path",
"type" : "string",
"doc" : "Type inferred from 'sites/TrWMTISnerc_Community/Lists/X4Trains'"
} ]
}
and my table create table is :
Create table warranty(
id float,
train_id float,
siemens_nr varchar(255),
uic_nr float,
configuration varchar(255),
warranty_status varchar(255),
warranty_data_type varchar(255),
of_progression float,
delivery_date varchar(255),
warranty_on_delivery_date varchar(255),
customer_status varchar(255),
commissioning_date varchar(255),
preliminary_acceptance_date varchar(255),
warranty_start_date varchar(255),
warranty_end_date varchar(255),
effective_end_warranty_date varchar(255),
level_2_in_function varchar(255),
baseline varchar(255),
reln_revision varchar(255),
tc_report varchar(255),
last_version_Date varchar(255),
etcs_id_nid_engine float,
item_type varchar(255),
path varchar(255)
)

I can suggest ExecuteGroovyScript processor in nifi v1.5+
define new property SQL.mydb - you will be prompted to link its value to a database (DBCPConnectionPool)
choose the database where you want to create a table
and use this script (assume avro schema is in the flow file content)
import groovy.json.JsonSlurper
def ff = session.get()
if(!ff)return
//parse avro schema from flow file content
def schema = ff.read().withReader("UTF-8"){ new JsonSlurper().parse(it) }
//define type mapping
def typeMap = [
"string" : "varchar(255)",
"long" : "numeric(10)",
[ "null", "string" ]: "varchar(255)",
[ "null", "long" ] : "numeric(10)",
]
assert schema.name && schema.name=~/^\w.*/
//build create table statement
def createTable = "create table ${schema.name} (" +
schema.fields.collect{ "\n ${it.name.padRight(39)} ${typeMap[it.type]}" }.join(',') +
"\n)"
//execute statement through the custom defined property
//SQL.mydb references http://docs.groovy-lang.org/2.4.10/html/api/groovy/sql/Sql.html object
SQL.mydb.execute(createTable as String) //important to cast to String
//transfer flow file to success
REL_SUCCESS << ff

Related

PROPERTY_REMOVED_FROM_CLOSED_CONTENT_MODEL for optional field

Use draft-7 json schema CLOSED_CONTENT_MODEL and BACKWARD compatibility and confluent schema registry 7.2.1-post.
deps:
implementation 'com.github.victools:jsonschema-generator:4.26.0'
implementation 'io.confluent:kafka-schema-registry:7.2.1'
schema example
{
"type" : "object",
"properties" : {
"name" : {
"type" : "string",
"description" : "String"
},
"timeDescription" : {
"type" : "string",
"description" : "String"
},
},
"required" : [ "name"],
"additionalProperties" : false
}
I am trying to find incompatibilities between schema via
{{base_url}}/compatibility/subjects/subject/versions/latest?verbose=true
(timeDescription is absent but not required)
{
"type" : "object",
"properties" : {
"name" : {
"type" : "string",
"description" : "String"
},
},
"required" : [ "name"],
"additionalProperties" : false
}
and see
```json
{
"is_compatible": false,
"messages": [
"Found incompatible change: Difference{jsonPath='#/properties/timeDescription', type=PROPERTY_REMOVED_FROM_CLOSED_CONTENT_MODEL}"
]
}
How to disable incompatibilities for case when optional field exists in one schema and absent in another?
Thanks for your answers.
It is not possible to enable or disable compatibilities. You might need select different Schema type.

Json schema validator, how to validate if key is not static

I am trying to validate this specific schema:
{
"messages": [
{
"name": "test msg",
"id": "0x100",
"signals": {
"0": {"name": "Engine RPM", "bit_length": 16},
"16": {"name": "Gear", "bit_length": 3},
"19": {"name": "Battery Voltage", "bit_length": 5}
}
}
]
}
I am using the python from jsonschema import Draft4Validator to validate this schema... however I am not sure how to continue.
This is my current schema validation so far:
{
"$schema" : "https://json-schema.org/schema#",
"type" : "object",
"properties" :
{
"messages" :
{
"type" : "array",
"items" :
{
"properties" :
{
"name" :
{
"type" : "string"
},
"id" :
{
"type" : "string"
},
"signals" :
{
"type" : "object"
},
"properties" :
{
}
},
"required": ["name", "id", "signals"]
}
}
}
}
The problem I am facing is I am not sure how to deal with the objects that are in the "signals" key as they start with a string and are NOT consistent ("0", "16", "19")... How could I go about validating this by ensuring the type is always a string, disregarding whether or not the string is consistent.
Thanks to all of those who reply in advance.
I was able to accomplish this by doing the following:
{
"$schema" : "https://json-schema.org/schema#",
"type" : "object",
"properties" :
{
"messages" :
{
"type" : "array",
"items" :
{
"properties" :
{
"name" :
{
"type" : "string"
},
"id" :
{
"type" : "string"
},
"signals" :
{
"type" : "object"
},
"properties" :
{
"start_bit" :
{
"type" : "object",
"properties" :
{
"name" :
{
"type" : "string"
},
"bit_length" :
{
"type" : "string"
},
"factor" :
{
"type" : "string"
},
"offset" :
{
"type" : "string"
}
},
"required" : ["name", "bit_length", "factor", "offset"]
}
}
},
"required": ["name", "id", "signals"]
}
}
}
}
To "avoid" having to keep the string consistent, in the validator file, I can put any string (obviously it makes more sense to name the string what is represents, in my case "start_bit") and then by NOT having it be required.

Creating a Proper avro schema for timestamp record

I would like to know what the proper avro schema would be for some json to avro conversion that is in this format:
{"entryDate": "2018-01-26T12:00:40.930"}
My schema:
{
"type" : "record",
"name" : "schema",
"fields" : [{
"name" : "entryDate",
"type" : ["null", {
"type" : "long",
"logicalType" : "timestamp-micros"
}],
"default" : null
}]
}
I keep getting
`'Cannot convert field entryDate: Cannot resolve union:
"2018-01-26T12:00:40.930"
not in
["null",{"type":"long","logicalType":"timestamp-millis"}]'`
It was a silly mistake...obviously I was storing the timestamp value as a string so the avro schema needed a string instead of long for type.
ie.
{
"type" : "record",
"name" : "schema",
"fields" : [{
"name" : "entryDate",
"type" : ["null", {
"type" : `**"long"**`,
"logicalType" : "timestamp-micros"
}],
"default" : null
}]
}
should be
{
"type" : "record",
"name" : "schema",
"fields" : [{
"name" : "entryDate",
"type" : ["null", {
"type" : `**"string"**`,
"logicalType" : "timestamp-micros"
}],
"default" : null
}]
}
doh!

Mongo query to create a subform from json entry

I want to only access a part of the document. Only Variables in the entry given below in the following format.
Document1:{
"META" : {
"CATEGORY" : "Boxes",
"CREATEDBY" : "Garima",
"PRIVACY" : "PUBLIC",
"KEYWORDS" : [
"day","night"
],
"TEMPLATE_NAME" : "Name",
"IS_ACTIVE" : true
},
"**Variables**" : **[
{
"INDEX" : 0,
"DATATYPE" : "string",
"NAME" : "varient text type",
},
{
"INDEX" : 1,
"DATATYPE" : "number",
"NAME" : "varient number type",
},
{
"INDEX" : 2,
"DATATYPE" : "price",
"NAME" : "varient price type",
},
{
"INDEX" : 3,
"DATATYPE" : "date",
"NAME" : "varient date type",
},
{
"INDEX" : 4,
"DATATYPE" : "text",
"NAME" : "varient textarea type",
},
{
"INDEX" : 5,
"DATATYPE" : "string",
"NAME" : "varient blank radio type",
},
{
"INDEX" : 6,
"DATATYPE" : "string",
"NAME" : "varient single radio type",
},**
Output Required
**Variable Names [varient text type,varient number type,varient price type,varient date type,varient textarea type,varient blank radio type,varient single radio type]**
I have used db.collection.find({Variables}) but doesn't show as it is an array.
Later I want these names to create a form in meteor using autoform
JS File:
var variant=CollectionName.find( { "VARIENTS.NAME": 1, _id : 0 } );
HTML File:
{{#each variant}}
<li>
{{#each VARIENTS}}
{{this.NAME}}
{{/each}}
</li>
{{/each}}
This will display the variant names only.
Your desired output and sample data are not matching.
Is it what you are looking for-
db.collectionName.find({},{"Variables.INDEX":1,"Variables.DATATYPE":1,"Variables.NAME":1, "_id":0})
Above query will gives following output with sample data provided in question.
{
"Variables" : [
{
"INDEX" : 0,
"DATATYPE" : "string",
"NAME" : "varient text type"
},
{
"INDEX" : 1,
"DATATYPE" : "number",
"NAME" : "varient number type"
},
{
"INDEX" : 2,
"DATATYPE" : "price",
"NAME" : "varient price type"
},
{
"INDEX" : 3,
"DATATYPE" : "date",
"NAME" : "varient date type"
},
{
"INDEX" : 4,
"DATATYPE" : "text",
"NAME" : "varient textarea type"
},
{
"INDEX" : 5,
"DATATYPE" : "string",
"NAME" : "varient blank radio type"
},
{
"INDEX" : 6,
"DATATYPE" : "string",
"NAME" : "varient single radio type"
}
]
}

Json Schema example for oneOf objects

I am trying to figure out how oneOf works by building a schema which validates two different object types. For example a person (firstname, lastname, sport) and vehicles (type, cost).
Here are some sample objects:
{"firstName":"John", "lastName":"Doe", "sport": "football"}
{"vehicle":"car", "price":20000}
The question is what have I done wrongly and how can I fix it. Here is the schema:
{
"description": "schema validating people and vehicles",
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"required": [ "oneOf" ],
"properties": { "oneOf": [
{
"firstName": {"type": "string"},
"lastName": {"type": "string"},
"sport": {"type": "string"}
},
{
"vehicle": {"type": "string"},
"price":{"type": "integer"}
}
]
}
}
When I try to validate it in this parser:
https://json-schema-validator.herokuapp.com/
I get the following error:
[ {
"level" : "fatal",
"message" : "invalid JSON Schema, cannot continue\nSyntax errors:\n[ {\n \"level\" : \"error\",\n \"schema\" : {\n \"loadingURI\" : \"#\",\n \"pointer\" : \"/properties/oneOf\"\n },\n \"domain\" : \"syntax\",\n \"message\" : \"JSON value is of type array, not a JSON Schema (expected an object)\",\n \"found\" : \"array\"\n} ]",
"info" : "other messages follow (if any)"
}, {
"level" : "error",
"schema" : {
"loadingURI" : "#",
"pointer" : "/properties/oneOf"
},
"domain" : "syntax",
"message" : "JSON value is of type array, not a JSON Schema (expected an object)",
"found" : "array"
} ]
Try this:
{
"description" : "schema validating people and vehicles",
"type" : "object",
"oneOf" : [
{
"type" : "object",
"properties" : {
"firstName" : {
"type" : "string"
},
"lastName" : {
"type" : "string"
},
"sport" : {
"type" : "string"
}
}
},
{
"type" : "object",
"properties" : {
"vehicle" : {
"type" : "string"
},
"price" : {
"type" : "integer"
}
},
"additionalProperties":false
}
]
}
oneOf need to be used inside a schema to work.
Inside properties, it's like another property called "oneOf" without the effect you want.