How to read Json file and convert it to dataframe - json

I was trying to read the JSON file and convert to JSON, but I am finding difficulties here as i dont have much knowledge on this.
from pandas.io.json import json_normalize
import pandas as pd
import json
Path = "NavigatorInstances.json"
with open(Path, 'r') as myfile:
data= myfile.read()
data = json.loads(data)
df = pd.DataFrame.from_dict(json_normalize(data))
I am getting error as: json.decoder.JSONDecodeError: Expecting value: line 2 column 13 (char 15)
My JSON Sample data looks like below
{
"_id" : ObjectId("5ecfe5a0f9fcb510c8ec51e5"),
"RNI_Corps" : {
"MetaData" : {
"TaxonomyName" : "RN.Corps",
"InstanceName" : "Ratings Navigator Instance Corps",
"ThisMongoObjectId" : "5ecfe5a0f9fcb510c8ec51e5",
"ThisObjectShortId" : "37187",
"ReplacedMongoObjectId" : "",
"ThreadIDs" : "5ecfe5a0f9fcb510c8ec51e5",
"CurrentWF_Activity" : "Drafted",
"CurrentWF_ActivityDate" : "2020-05-28 16:23:53",
"VersionID" : {
"Tool" : "RN",
"Group" : "Corps",
"Sector" : "GenCos",
"Version" : "2.8.1.1"
},
"Cart" : {
"Id" : null,
"Status" : null,
"StatusDate" : null,
"Locked" : null
},
"Effective" : {
"Date" : null,
"Reason" : null,
"Source" : {
"SystemName" : null,
"SystemId" : null,
"EventType" : null
}
},
"Criteria" : {
"Id" : "10123001",
"Name" : "Exposure Draft: Sector Navigators",
"Date" : "2020-05-20 00:00:00"
},
"InstanceFileInfo" : {
"MongoObjectId" : "5ecfe5a0f9fcb510c8ec51df",
"MD5CheckSum" : "2d28eabe1a046f76e17a58cca6c386f1",
"Name" : "RN_2_8_1_1_96781051_2020_05_28_1.xlsm",
"SavedDate" : "2020-05-28 16:24:00"
},
"EntityInfo" : {
"AgentID" : NumberInt(1507132),
"AgentName" : "AES Mexico Generation Holdings, S. de R.L. de C.V.",
"NicknameID" : null,
"Nickname" : null,
"IssuerID" : NumberInt(96781051),
"IssuerName" : "AES Mexico Generation Holdings, S. de R.L. de C.V.",
"Region" : "Emerging Markets - Americas",
"CountryName" : "Mexico",
"Sovereign_Agent_ID" : null,
"Sector" : "GenCos"
},
Please help me to understand how I can convert the JSON data into a readable format such as pandas dataframe

Your sample is not in the JSON standard(or it is only a sample?).
The word "ObjectId" is not a string or number.
I think you can try https://www.json.cn to verify your JSON file first. This error is about json not DataFrame.

Related

JSON file to CSV file conversion using jq

I am trying to convert my json file to a csv file using jq. Below is the sample input events.json file.
{
"took" : 111,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "alerts",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"alertID" : "639387c3-0fbe-4c2b-9387-c30fbe7c2bc6",
"alertCategory" : "Server Alert",
"description" : "Successfully started.",
"logId" : null
}
},
{
"_index" : "alerts",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"alertID" : "2",
"alertCategory" : "Server Alert",
"description" : "Successfully stoped.",
"logId" : null
}
}
]
}
}
My rows in csv should have the data inside each _source tag. So my columns would be alertId , alertCategory , description and logId with its respective data.
I tried the below command :
jq --raw-output '.hits[] | [."alertId",."alertCategory",."description",."logId"] | #csv' < /root/events.json
and its not working.
Can anyone help me with this?
Your path-expression is not right, you have a hits array inside an object named hits and the fields you trying to put in CSV is present under __source object.
So your expression should have been below. Use it along with -r flag to put the output in raw output format
.hits.hits[]._source | [ .alertID, .alertCategory, .description, .logId ] | #csv
If your fields are null, the string representation of your null field value results in just "". If you want an explicit "null" string representation, use the alternate operator along with the field you expect to be null, e.g. instead of .logId, you can do (.logId // "null")
To add the column name as the header in the output CSV format, you could use the #csv or the join(",") function in raw output format -r
[ "alertId" , "alertCategory" , "description", "logId" ],
( .hits.hits[]._source | [ .alertID, .alertCategory, .description, .logId // "null" ]) | #csv
or
[ "alertId" , "alertCategory" , "description", "logId" ],
( .hits.hits[]._source | [ .alertID, .alertCategory, .description, .logId // "null" ]) | join(",")

Sending array json body with arrays inside of it (Alamofire, Swift, iOS)

i want to send json body to my middleware. at first, it works well (note that the "tema" & "emailGroup" wasn't an array). but after some changes on my backend, i have to send this type of json
[{
"ID": "",
"Name": "Artikel BU CE - Visit HoB Topic",
"ChannelType": 0,
"PublishDate": "2018-09-21T01:00:00Z",
"Headline": null,
"Content": null,
"EmailSubject": null,
"EmailUrl": null,
"Mention": null,
"PostLink": null,
"ChannelActivityMobileId": null,
"HashTag": null,
"Tema": [
{
"Value": 6
}
],
"EmailGroup": [
{
"ID": "2c53ea1f-6ebe-e811-a977-000d3aa00fc2",
"Name": "TV Broadcast",
"List_EmailListModels": null
}
],
"ApprovalStatus": 0,
"ApprovalNote": null,
"EmployeeId": null,
"EmployeeLevel": 0
}]
here's my code
let parameters = [["ID" : "", SerializationKeys.channelMobileId : channel.mobileId, SerializationKeys.name : activity.activityName, "ApprovalStatus" : channel.channelStatus, SerializationKeys.channelType : channel.channelType, SerializationKeys.publish_date : channel.publishDate, SerializationKeys.content : channel.content, SerializationKeys.emailSubject : channel.emailSubject, SerializationKeys.emailURL : channel.emailURL, SerializationKeys.hashtag : channel.hastag, SerializationKeys.mention : channel.mention, SerializationKeys.note : channel.note, SerializationKeys.postLink : channel.postLink, SerializationKeys.tema : [tema]] as [[String : Any]]
where tema is
var tema = [String : Int]()
i got an error that says
[Any] is not convertible to '[[String : Any]]'; did you mean to use as! to force downcast?
and after i change "as" to "as!", it says
Excpected ";" separator
where the semicolon should put near "SerializationKeys.tema : [tema]". Please kindly help me. Thanks
EDIT
I think you mistakenly added ] near
, SerializationKeys.postLink : channel.postLink],
You may want this
let parameters:[[String:Any]] = [["ID" : "",
SerializationKeys.channelMobileId : channel.mobileId,
SerializationKeys.name : activity.activityName,
"ApprovalStatus" : channel.channelStatus,
SerializationKeys.channelType : channel.channelType,
SerializationKeys.publish_date : channel.publishDate,
SerializationKeys.content : channel.content,
SerializationKeys.emailSubject : channel.emailSubject,
SerializationKeys.emailURL : channel.emailURL,
SerializationKeys.hashtag : channel.hastag,
SerializationKeys.mention : channel.mention,
SerializationKeys.note : channel.note,
SerializationKeys.postLink : channel.postLink,
SerializationKeys.tema : [tema]
]]
Note: I heighly recommend using Codable with struct models for your case

Ideas how to store code snippets/objects (not json) in mongodb?

So let's say I am doing converting csv to json in node.js, and doing mapping between the fields found in the customer's input file and the object that my platform requires, such as:
var objInvoice = {
"id" : guid.create().value,
"financingType" : "scf",
"batchId" : batchId,
"recordTypeCode" : inv.kayittipi,
"buyerReference" : result[0].tedarikcireferans,
"supplierReference" : inv.tedarikcireferans,
"invoiceDate" : moment(inv.faturatarih +"-140000" , "YYMMDD-HHmmss").toISOString(),
"certifiedInvoiceAmount" : inv.odenecekfaturatutar,
"currencyCode" : inv.parabirimi,
"maturityDate" : moment(inv.orjvadetarihi +"-140000" , "YYMMDD-HHmmss").toISOString(),
"correctedMaturityDate" : moment(inv.orjvadetarihi +"-140000" , "YYMMDD-HHmmss").toISOString(),
"originalInvoiceAmount" : inv.orjfaturatutar,
"invoiceNo" : inv.faturano,
"invoiceSerialNo" : inv.faturaserino,
"noterizedDocNo" : inv.belgenoteryevmiyeno,
"hashCode" : inv.hashCode,
"forecastDate" : inv.fiilivadetarihi,
"supplierTaxId" : inv.tedarikcivkn,
"supplierName" : inv.tedarikciadi,
"invoiceType" : inv.faturatipi,
"buyerAccountNumber" : inv.aliciiban,
"buyerTaxId" : inv.alicivkn,
"fiReference" : inv. bankakodu,
"bankBranchCode" : inv.bankasubeno,
"invoiceUploadStatus" : inv.dosyakayitstatu,
"gwCode" : "111110000000",
"gwCodeExplanation" : "",
"invoiceDiscountDate" : null,
"additionalInformation1" : inv.aciklama1,
"additionalInformation2" : inv.aciklama2,
"additionalInformation3" : inv.aciklama3,
"lastModificationDate" : null,
"newInvoiceId" : "00000000-0000-0000-0000-000000000000",
"oldInvoiceId" : "00000000-0000-0000-0000-000000000000",
"supplierIban" : "",
"supplierBankName" : "",
"customUniqueId" : "",
"deductionReason" : "",
"paymentObligationNo" : null,
"deductionAmount" : "",
"sellAmount" : "",
"invoiceAmountWithoutVat" : "",
"uploadDate" : now.toISOString(),
}
but a customer2 may have a different positioned fields,or even different headers in file - such as:
var objInvoice = {
"id" : guid.create().value,
"financingType" : "scf",
"batchId" : batchId,
"recordTypeCode" : inv.element1,
"buyerReference" : result[0].element2,
"supplierReference" : inv.element2,
"invoiceDate" : moment(inv.element3 +"-140000" , "YYMMDD-HHmmss").toISOString(),
"certifiedInvoiceAmount" : inv.element4,
"currencyCode" : inv.element5,
"maturityDate" : moment(inv.element6 +"-140000" , "YYMMDD-HHmmss").toISOString(),
"correctedMaturityDate" : moment(inv.element6 +"-140000" , "YYMMDD-HHmmss").toISOString(),
"originalInvoiceAmount" : inv.element7,
"invoiceNo" : inv.element8,
"invoiceSerialNo" : inv.element9,
"noterizedDocNo" : inv.element10,
"hashCode" : inv.element11,
"forecastDate" : inv.element12,
"supplierTaxId" : inv.element13,
"supplierName" : inv.element14,
"invoiceType" : inv.element15,
"buyerAccountNumber" : inv.element16,
"buyerTaxId" : inv.element17,
"fiReference" : inv. element18,
"bankBranchCode" : inv.element19,
"invoiceUploadStatus" : inv.element20,
"gwCode" : "111110000000",
"gwCodeExplanation" : "",
"invoiceDiscountDate" : null,
"additionalInformation1" : inv.element21,
"additionalInformation2" : inv.element22,
"additionalInformation3" : inv.element23,
"lastModificationDate" : null,
"newInvoiceId" : "00000000-0000-0000-0000-000000000000",
"oldInvoiceId" : "00000000-0000-0000-0000-000000000000",
"supplierIban" : "",
"supplierBankName" : "",
"customUniqueId" : "",
"deductionReason" : "",
"paymentObligationNo" : null,
"deductionAmount" : "",
"sellAmount" : "",
"invoiceAmountWithoutVat" : "",
"uploadDate" : now.toISOString(),
}
so I was thinking if it's possible to store each 'mapping' per customer in mongodb? and pulls the correct objInvoice via customer reference?
The issue is, objInvoice cannot be stored as a json document since it contains variables such as inv.element1.
I've also tried to store as a one big string (I tried with {} and without {} ), I was able to store it fine but when I pulled it from the code and unstringnified it the vars didn't get processed as expected.
Any ideas fellow good programmers?
Or the only way is to store those in seperate js files per customer?
Unfortunately, your data is not even valid Javascript Objects or JSON.
An idea is to convert values, like moment(inv.orjvadetarihi +"-140000" , "YYMMDD-HHmmss").toISOString() which are not valid, into strings (escaping correctly). As a result you will be able to JSON.stringify and JSON.parse. The strings can then be evaluated in your code.
Preprocessing your data to escape non-string values into strings certainly works. You may use regexes.

R JSON File to Dataframe using tidyjson

I exported a JSON file from Mongodb with the below format. I'm trying to create a dataframe from it but I can't see to get tidyjson to read it as it throws this error.
Error: lexical error: invalid char in json text.
{ "_id" : ObjectId("586e684427a06a4a658fa
(right here) ------^
I used read_json("file.json")
The file is below
{
"_id" : ObjectId("586e684427a06a4a658fa28e"),
"expires_in" : ISODate("2016-11-19T22:16:57.418+0000"),
"job_type" : "Satellite Sales & Service",
"inbound_id" : ObjectId("586e68440c83945fb2658754"),
"created_at" : ISODate("2017-01-05T15:37:40.850+0000"),
"action_states" : [
{
"_id" : ObjectId("586e684627a06a4a658fa293"),
"transition_duration" : NumberInt(0),
"name" : Symbol("created"),
"actor" : "home_owner",
"created_at" : ISODate("2017-01-05T15:37:42.297+0000")
},
{
"_id" : ObjectId("586e68ad0c83945fb2658825"),
"transition_duration" : NumberInt(1),
"name" : Symbol("accepted"),
"reason" : null,
"actor" : "contractor",
"created_at" : ISODate("2017-01-05T15:39:25.924+0000")
}
]
}
{
"_id" : ObjectId("586e675d27a06a4a658fa264"),
"expires_in" : ISODate("2016-11-19T22:16:57.418+0000"),
"job_type" : "Satellite Sales & Service",
"inbound_id" : ObjectId("586e675d0c83945fa2f6e190"),
"created_at" : ISODate("2017-01-05T15:33:49.934+0000"),
"action_states" : [
{
"_id" : ObjectId("586e675f27a06a4a658fa267"),
"transition_duration" : NumberInt(0),
"name" : Symbol("created"),
"actor" : "home_owner",
"created_at" : ISODate("2017-01-05T15:33:51.097+0000")
},
{
"_id" : ObjectId("586e694c0c83945faae36559"),
"transition_duration" : NumberInt(8),
"name" : Symbol("accepted"),
"reason" : null,
"actor" : "contractor",
"created_at" : ISODate("2017-01-05T15:42:04.116+0000")
}
]
}
This error due to mnogodb produce "extended" json format. You should try to export your data in 'strict' json mode using, for example, mongoexport (https://docs.mongodb.com/manual/reference/mongodb-extended-json/)

Spark - Scala : Read json file as dataframe doesn't work when json data is spread across multiple lines?

Json Data:
{ "blogID" : "FJY26J1333", "date" : "2012-04-01",
"name" : "vpxnksu", "comment" : "good stuff"}
{"blogID" : "VSAUMDFGSD", "date" : "2012-04-12", "name" : "yhftrcx", "comment" : "another comment"}
Code:
val dataFrame=sqlContext.read.json("sample.json")
dataFrame.show()
Output:
_corrupt_record blogID comment date name
{ "blogID" : "FJY... null null null null
"name" : "vpxnksu... null null null null
null VSAUMDFGSD another comment 2012-04-12 yhftrcx
How can i read it as two records?
Make sure its one json object per line in the source file like this:
{ "blogID" : "FJY26J1333", "date" : "2012-04-01", "name" : "vpxnksu", "comment" : "good stuff"}
{ "blogID" : "VSAUMDFGSD", "date" : "2012-04-12", "name" : "yhftrcx", "comment" : "another comment"}