Convert multiple csv files to json using python - json

I am trying to convert csv files in a folder to a single json file. Below code does the job, but the issue is, json file has the first csv written several times. Below is the code i tried. I guess i am going wrong with assigning the data variable. Help me fix it
import csv, json, os
dir_path = 'C:/Users/USER/Desktop/output_files'
inputfiles = [file for file in os.listdir(dir_path) if file.endswith('.csv')]
outputfile = "data_backup1.json"
for file in inputfiles:
filepath = os.path.join(dir_path, file)
data = {}
with open(filepath, "r") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
id = row['ID']
data[id] = row
with open(outputfile, "a") as jsonfile:
jsonfile.write(json.dumps(data, indent=4))
Expected output: Json file needs to have each csv written only once into it.

if your .csv files and all of the rows do have different ['ID']s, your assigned dictionary keys should be unique. In this case, your dictionary is growing with one entry per reader .csv row.
You have to change the indentation of the jsonfile.write() function as shown below to produce just one .json file. To sort your entries you could add sort_keys=True in this function.
for file in inputfiles:
filepath = os.path.join(dir_path, file)
data = {}
with open(filepath, "r") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
id = row['ID']
data[id] = row
with open(outputfile, "a") as jsonfile:
jsonfile.write(json.dumps(data, indent=4, sort_keys=True))

Related

Using a CSV file with file path info (file's orginal location end with .jpg, destination end with .jpg) to copy files

I have a csv file with location information of images in 1st column
and
a destination location information of images in 2nd column
I was able to move one image to location with code
import shutil
source_folder = r"C:/Users/JJ/Desktop/test/images/AoF06978.jpg"
destination_folder = r"C:/Users/JJ/Desktop/new_db/test_fire/AoF06978.jpg"
shutil.copyfile(source_folder, destination_folder)
However, when I tried to move all files using CSV file, I have no clue;
import csv, shutil
filename = 'C:/Users/JJ/Desktop/test.csv'
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile)
for row in datareader:
a= row[0]
b =row[1]
shutil.copyfile(a, b)
and I got an error: iterator should return strings, not bytes
Assuming that your .csv file has a header, you can skip this one, then read the two columns (origin and destination) as lists and finally use shutil.copyfile to copy/move the images :
import csv
import shutil
with open("C:/Users/JJ/Desktop/test.csv", "r") as f:
reader = csv.reader(f)
for index, row in enumerate(reader):
if index == 0:
pass
else:
origin, destination = row
shutil.copy(origin, destination)

How do I load given CSV data file from a given path? (Python)

My professor has instructed me to
Load 3d_classification_data_v0.csv data file, from path: '../mlrefined_datasets/superlearn_datasets/'
use csv or pandas package for reading csv file.
import csv
file = open(“csvfile.csv”)
csvreader = csv.reader(file)
header = []
header = next(csvreader)
rows = []
for row in csvreader:
rows.append(row)

csv file isn't saved in different directory in python

My code reads a bunch of json files from a directory and extract "frequency" and "attenuation" data from those files and write to a csv file. Now I want to save that csv file in a different directory. The code executes without any error but saves in the current directory. Can anyone help to resolve this issue?
import csv
import glob
import json
import os
site = 'alpha'
frequency_to_check = '196050.000'
json_dir_name = 'V:/temp/test/'
json_pattern = os.path.join(json_dir_name, '*.json')
total_files = glob.glob(json_pattern)
atten = []
timestamp = []
save_path = 'V:/python/result/'
if not os.path.isdir(save_path):
os.makedirs(save_path)
filename = f'{site}-{frequency_to_check}.csv'
with open(filename, 'w', newline='') as csv_file:
for file in total_files:
with open(file) as json_file:
output_json = json.load(json_file)
for key in output_json:
if key['start-freq'] == frequency_to_check:
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
save_file = os.path.join(save_path, filename)
csv_file.close()
print(f'Total files processed {len(total_files)}')
The issue as far as I can deduce is here :
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
csv_file is your object that is loaded into memory , and everytime this line is executed you are just writing the rows in the already open file. After that you are just creating a new path :
save_file = os.path.join(save_path, filename)
which is never really used as you close the file too.
To fix this I would suggest that you put save_path as csv file :
import csv
import glob
import json
import os
site = 'alpha'
frequency_to_check = '196050.000'
json_dir_name = 'V:/temp/test/'
json_pattern = os.path.join(json_dir_name, '*.json')
total_files = glob.glob(json_pattern)
atten = []
timestamp = []
save_path = 'V:/python/result/'
if not os.path.isdir(save_path):
os.makedirs(save_path)
filename = f'{site}-{frequency_to_check}.csv'
save_file = os.path.join(save_path, filename)
with open(save_file, 'w', newline='') as csv_file:
for file in total_files:
with open(file) as json_file:
output_json = json.load(json_file)
for key in output_json:
if key['start-freq'] == frequency_to_check:
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
csv_file.close()
print(f'Total files processed {len(total_files)}')
I guess this should work.

Convert .csv into .json using python

I am trying to convert csv into json file using python3. I keep getting this error, FileNotFound, when the csv file exists in the directory. Please help me fix the issue. Below is the code i tried. Also i would be grateful, if anyone could suggest how to transfer MongoDB database into a json file using python3.
import csv, json, os
#get all csv files from the directory
dir_path = r'C:\Users\USER\Desktop\output_files'
inputfile = [file for file in os.listdir(dir_path) if file.endswith('.csv')]
print(inputfile)
for file in inputfile:
with open(file, "r") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
id = row['ID']
data[id] = row
Writing the files out using this code...
with open(outputfile, "a") as jsonfile:
jsonfile.write(json.dumps(data, indent=4))
Produces the following:
['adult_diapers.csv', 'groceries.csv', 'health_supplements.csv', 'mobility_aids.csv']
Here's my error in more detail:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-17-1aac06308031> in <module>
6 print(inputfile)
7 for file in inputfile:
----> 8 with open(file, "r") as csvfile:
9 reader = csv.DictReader(csvfile)
10 for row in reader:
FileNotFoundError: [Errno 2] No such file or directory: 'adult_diapers.csv'
Is the full path specified? Looks like it's just the filename and not the full path to the file. Add dir_path and use os.path.join() to concatenate the path and the filename as follows:
with open(os.path.join(dir_path, file), "r") as csvfile:
reader = csv.DictReader(csvfile)
And your final code becomes:
import csv, json, os
#get all csv files from the directory
dir_path = r'C:\Users\USER\Desktop\output_files'
inputfile = [file for file in os.listdir(dir_path) if file.endswith('.csv')]
print(inputfile)
for file in inputfile:
with open(os.path.join(dir_path, file), "r") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
id = row['ID']
data[id] = row
with open(outputfile, "a") as jsonfile:
jsonfile.write(json.dumps(data, indent=4))

Python 3: Opening multiple .csv files

I want to open multiple csv files (with same data types/columns), save the data into one variable do some stuff to data and save it into one csv file. While I can easily open one file, I can't seem to find a way to open multiple files. Here is my code:
import numpy as np
import csv
from collections import Counter
files = ['11.csv', '12.csv', '13.csv', '14.csv', '15.csv']
with open(files) as csvfile:
info = csv.reader(csvfile, delimiter=',')
info_types = []
records = 0
for row in info:
records = row[2]
call_types.append(records)
stats = Counter(call_types).most_common()
print(stats)
results = stats
resultFile = open("Totals.csv",'w')
wr = csv.writer(resultFile, dialect='excel')
for output in results:
wr.writerow(output)
To make it work, simultaneouly less bug prone and efficient try the following.
# required imports
files = ['11.csv', '12.csv', '13.csv', '14.csv', '15.csv']
with open("outfile","wt") as fw:
writer = csv.writer(fw)
for file in files:
with open(file) as csvfile:
info = csv.reader(csvfile, delimiter=',')
info_types = []
records = 0
for row in info:
# process row but don't store it
# in any list if you
# don't have to(that will defeat the purpose)
# say you get processed_row
writer.writerow(processed_row)
I would do this within a loop. Since you are already appending the data as you are reading from the file.
for f in files:
with open(f) as csvfile:
...