Combine multiple JSON files, and parse into CSV - json

I have about 100 JSON files, all titled with different dates and I need to merge them into one CSV file that has headers "date", "real_name", "text".
There are no dates listed in the JSON itself, and the real_name is nested. I haven't worked with JSON in a while and am a little lost.
The basic structure of the JSON looks more or less like this:
Filename: 2021-01-18.json
[
{
"client_msg_id": "xxxx",
"type": "message",
"text": "THIS IS THE TEXT I WANT TO PULL",
"user": "XXX",
"user_profile": {
"first_name": "XXX",
"real_name": "THIS IS THE NAME I WANT TO PULL",
"display_name": "XXX",
"is_restricted": false,
"is_ultra_restricted": false
},
"blocks": [
{
"type": "rich_text",
"block_id": "yf=A9",
}
]
}
]
So far I have
import glob
read_files = glob.glob("*.json")
output_list = []
all_items = []
for f in read_files:
with open(f, "rb") as infile:
output_list.append(json.load(infile))
data = {}
for obj in output_list[]
data['date'] = f
data['text'] = 'text'
data['real_name'] = 'real_name'
all_items.append(data)

Once you've read the JSON object, just index into the dictionaries for the data. You might need obj[0]['text'], etc., if your JSON data is really in a list in each file, but that seems odd and I'm assuming your data was pasted from output_list after you'd collected the data. So assuming your file content is exactly like below:
{
"client_msg_id": "xxxx",
"type": "message",
"text": "THIS IS THE TEXT I WANT TO PULL",
"user": "XXX",
"user_profile": {
"first_name": "XXX",
"real_name": "THIS IS THE NAME I WANT TO PULL",
"display_name": "XXX",
"is_restricted": false,
"is_ultra_restricted": false
},
"blocks": [
{
"type": "rich_text",
"block_id": "yf=A9",
}
]
}
test.py:
import json
import glob
from pathlib import Path
read_files = glob.glob("*.json")
output_list = []
all_items = []
for f in read_files:
with open(f, "rb") as infile:
output_list.append(json.load(infile))
data = {}
for obj in output_list:
data['date'] = Path(f).stem
data['text'] = obj['text']
data['real_name'] = obj['user_profile']['real_name']
all_items.append(data)
print(all_items)
Output:
[{'date': '2021-01-18', 'text': 'THIS IS THE TEXT I WANT TO PULL', 'real_name': 'THIS IS THE NAME I WANT TO PULL'}]

Related

How to check JSON data in Python

Data sample:
import pandas as pd
patients_df = pd.read_json('C:/MyWorks/Python/Anal/data_sample.json', orient="records", lines=True)
patients_df.head()
//in python
//my json data sample
"data1": {
"id": "myid",
"seatbid": [
{
"bid": [
{
"id": "myid",
"impid": "1",
"price": 0.46328014,
"adm": "adminfo",
"adomain": [
"domain.com"
],
"iurl": "url.com",
"cid": "111",
"crid": "1111",
"cat": [
"CAT-0101"
],
"w": 00,
"h": 00
}
],
"seat": "27"
}
],
"cur": "USD"
},
What I want to do is to check if there is a "cat" value in my very large JSON data.
The "cat" value may/may not exist, but I'm trying to use Python Pandas to check it.
for seatbid in patients_df["win_res"]:
for bid in seatbid["seatbid"]:
I tried to access JSON data while writing a loop like that, but it's not being accessed properly.
I simply want to check if "cat" exist or not.
You can use python's json library as follows:
import json
patient_data = json.loads(patientJson)
if "cat" in student:
print("Key exist in JSON data")
else
print("Key doesn't exist in JSON data")

Save result to a new csv file instead of println Groovy

How can I save result of groovy script to a new file? C:/temp/all1.csv. I want to parse json file to csv, script is working fine but I don't know how can I save result in a new file. Please help.
import groovy.json.*
import java.io.File
def json ='''
{
"expand": "schema,names",
"startAt": 0,
"maxResults": 50,
"total": 21,
"issues": [
{
"expand": "operations,versionedRepresentations",
"id": "217580",
"self": "issue/217580",
"key": "ART-4070",
"fields": {"summary": "#[ART] Pre.3 Verification \\"S\\""}
},
{
"expand": "operations,versionedRepresentations",
"id": "217579",
"self": "issue/217579",
"key": "ART-4069",
"fields": {"summary": "Verification \\"C\\""}
},
{
"expand": "operations,versionedRepresentations",
"id": "217577",
"self": "issue/217577",
"key": "ART-4068",
"fields": {"summary": "#[ART] Enum type"}
}
]
}
'''
File csvFile = new File( 'C:/temp/all1.csv')
def jsonSlurper = new JsonSlurper()
def config = [ // header -> extractor
"key": { it.key },
"summary": { it.fields.summary }
]
def encode(e) { // help with nulls; quote the separator
(e ?: "").replaceAll(";", "\\;")
}
def csvLine(items) { // write items as "CSV"
println(items.collect{ encode it }.join(";"))
}
def obj = new JsonSlurper().parseText(json)
csvLine(config.keySet())
obj.issues.each{ issue ->
csvLine(config.values().collect{ f -> f issue })
}
result:
key;summary
ART-4070;#[ART] Pre.3 Verification "S"
ART-4069;Verification "C"
ART-4068;#[ART] Enum type
To go with the current code, you could use csvFile.append(...) instead of println inside your
csvLine function and depending on your amount of real data, this might
be a good compromise between performance and resource.
Or you can write the whole CSV at once. E.g.
// prepare whole table
def data = [config.keySet()]
data.addAll(
obj.issues.collect{ issue ->
config.values().collect{ f -> f issue }
}
)
// write table as csv
def csvFile = "/tmp/out.csv" as File
csvFile.text = data.collect{
it.collect{ encode it }.join(";")9
}.join("\n")

How to get the the node object or array from JSON based on excel sheet data using groovy?

Lets say I have the following JSON :-
{
"book": [
{
"id": "01",
"language": "Java",
"edition": "third",
"author": "Herbert Schildt"
},
{
"id": "07",
"language": "C++",
"edition": "second",
"author": "E.Balagurusamy"
}
]
}
And, I am passing the value of author from excel sheet to check if that author is present or not. If that author is present inside JSON, then that that particular array node only and remove other from the JSON.
For Example:- I am passing "author" value as "Herbert Schildt" from excel sheet. Now this value is present inside JSON, So, I need this particular array node to be printed and rest all should be removed. Like this:-
{
"book": [
{
"id": "01",
"language": "Java",
"edition": "third",
"author": "Herbert Schildt"
}
]
}
Can it be done using groovy? I have tried with HashMap but couldn't get through.
It's quite easy using groovy:
def text = '''{
"book": [
{
"id": "01",
"language": "Java",
"edition": "third",
"author": "Herbert Schildt"
},
{
"id": "07",
"language": "C++",
"edition": "second",
"author": "E.Balagurusamy"
}
]
}
'''
def result = groovy.json.JsonOutput.toJson(
[book: new groovy.json.JsonSlurper().parseText(text).book.findAll{it.author == "Herbert Schildt"}]
)
println result
You may try this ways json search
var json = '{"book":[{"id":"01","language":"Java","edition":"third","author":"Herbert Schildt"},{"id":"07","language":"C++","edition":"second","author":"E.Balagurusamy"}]}';
var parsed = JSON.parse(json);
var result = {};
result.book = [];
var author = "Herbert Schildt";
parsed.book.map((i, j) => {
if(i.author == author) {
result.book.push(i);
}
});
console.log(result)

How to Get JSON values Python

Learning Days
Code to the get the data in JSON Format
#...
cursor.execute("SELECT * FROM user")
response = {
"version": "5.2",
"user_type": "online",
"user": list(cursor),
}
response = json.dumps(response, sort_keys=False, indent=4, separators=(',', ': '))
print(response)
# ...
This produces output as
{
"version": "5.2",
"user_type": "online",
"user":
[
{
"name": "John",
"id": 50
},
{
"name": "Mark",
"id": 57
}
]
}
print(response["user"]) - TypeError: string indices must be integers
How do i access the values in JSON
json.dumps return a string, need a small conversion something like this, not sure is this the exact method to do
Solution:
response = JSONEncoder().encode(response )
response = JSONDecoder().decode(response )
response = json.loads(response )
print(response['user'[0]['id'])

build a dynamic JSON object from the MONGODB , create a .json file and save the object to it

Lets say we have our MongoDB and we want to backup the data into a .json file example for an output file : database.json and inside :
{
"collections": [
{"name": "admin"},
{"name": "class"},
{"name": "lesson"},
{"name": "message"},
{"name": "room"},
{"name": "student"},
{"name": "subject"},
{"name": "teacher"}
],
"subjects": [
{
"name": "Null",
"color": "#FFFFFF"
},
{
"name": "Design Art",
"color": "#82B9D6"
},
{
"name": "Plastic Art",
"color": "#a3db05"
},
{
"name": "Media And Production",
"color": "#522a64"
}, //...there is a continue to this file ....
}
each collection should be added to the collections and for each collection there should be an array of all the info inside it (like above)
I'm using python 3.4 with the pymongo driver.
What is the best way to get all the info from the DB , create the JSON object and insert it to a new .json file
I found this way of doing it :
import json
from pymongo import MongoClient
from pprint import pprint
def main():
with open('../config/database.json') as database_config:
config = json.load(database_config)
client = MongoClient(config["mongodb"])
db = client[config["database"]]
data = dict()
data["collections"] = db.collection_names()
for collection_name in db.collection_names():
data[collection_name] = get_collection(db, collection_name)
pprint(data)
insert_data_to_file(data)
def get_collection(db, collection_name):
collection_list = []
collection = db[collection_name]
cursor = collection.find({})
for document in cursor:
_id = document.pop('_id')
document['_id'] = str(_id)
collection_list.append(document)
return collection_list
def insert_data_to_file(data):
with open('database.json', 'x') as database:
json.dump(data, database, sort_keys=True)
main()