Python 3: Opening multiple .csv files - csv

I want to open multiple csv files (with same data types/columns), save the data into one variable do some stuff to data and save it into one csv file. While I can easily open one file, I can't seem to find a way to open multiple files. Here is my code:
import numpy as np
import csv
from collections import Counter
files = ['11.csv', '12.csv', '13.csv', '14.csv', '15.csv']
with open(files) as csvfile:
info = csv.reader(csvfile, delimiter=',')
info_types = []
records = 0
for row in info:
records = row[2]
call_types.append(records)
stats = Counter(call_types).most_common()
print(stats)
results = stats
resultFile = open("Totals.csv",'w')
wr = csv.writer(resultFile, dialect='excel')
for output in results:
wr.writerow(output)

To make it work, simultaneouly less bug prone and efficient try the following.
# required imports
files = ['11.csv', '12.csv', '13.csv', '14.csv', '15.csv']
with open("outfile","wt") as fw:
writer = csv.writer(fw)
for file in files:
with open(file) as csvfile:
info = csv.reader(csvfile, delimiter=',')
info_types = []
records = 0
for row in info:
# process row but don't store it
# in any list if you
# don't have to(that will defeat the purpose)
# say you get processed_row
writer.writerow(processed_row)

I would do this within a loop. Since you are already appending the data as you are reading from the file.
for f in files:
with open(f) as csvfile:
...

Related

How do I load given CSV data file from a given path? (Python)

My professor has instructed me to
Load 3d_classification_data_v0.csv data file, from path: '../mlrefined_datasets/superlearn_datasets/'
use csv or pandas package for reading csv file.
import csv
file = open(“csvfile.csv”)
csvreader = csv.reader(file)
header = []
header = next(csvreader)
rows = []
for row in csvreader:
rows.append(row)

Single CSV output with data in different columns

I have a number of CSV files with data in the first three columns only. I want to copy data from each CSV file and paste it into one single CSV file in column order. For example data from the first CSV file goes into columns 1,2 and 3 in the output file. Similarly, data from the 2nd CSV goes to columns 4,5, and 6 of the same output CSV file and so on. Any help would be highly appreciated. Thanks.
I have tried the following code but it gets me the output in same columns only.
import glob
import pandas as pd
import time
import numpy as np
start = time.time()
Filename='Combined_Data.csv'
extension = 'csv'
all_filenames = [i for i in glob.glob('*.{}'.format(extension))]
for i in range(len(all_filenames)):
data= pd.read_csv(all_filenames[i],skiprows=23)
data= data.rename({'G1': 'CH1', 'G2': 'CH2','Dis': 'CH3'},axis=1)
data= data[['CH1','CH2','CH3']]
data= data.apply(pd.to_numeric, errors='coerce')
print(all_filenames[i])
if i == 0:
data.to_csv(Filename,sep=',',index=False,header=True,mode='a')
else:
data.to_csv(Filename,sep=',',index=False,header=False,mode='a')
end = time.time()
print((end - start),'Seconds(Execution Time)')
If you don't need to write your own code for this, I'd recommend GoCSV's zip command; it can also handle the CSVs having different numbers of rows.
I have three CSV files:
file1.csv
Dig1,Dig2,Dig3
1,2,3
4,5,6
7,8,9
file2.csv
Letter1,Letter2,Letter3
a,b,c
d,e,f
and file3.csv
RomNum1,RomNum2,RomNum3
I,II,III
When I run gocsv zip file2.csv file1.csv file3.csv I get:
Letter1,Letter2,Letter3,Dig1,Dig2,Dig3,RomNum1,RomNum2,RomNum3
a,b,c,1,2,3,I,II,III
d,e,f,4,5,6,,,
,,,7,8,9,,,
GoCSV is pre-built for a number of different OS'es.
Here's how to do it with Python's CSV module, using these files:
file1.csv
Dig1,Dig2,Dig3
1,2,3
4,5,6
7,8,9
file2.csv
Letter1,Letter2,Letter3
a,b,c
d,e,f
and file3.csv
RomNum1,RomNum2,RomNum3
I,II,III
The more-memory-intensive option
This accumulates the final CSV one file at a time, expanding a list that represents the final CSV with with each new input CSV.
#!/usr/bin/env python3
import csv
import sys
csv_files = [
'file2.csv',
'file1.csv',
'file3.csv',
]
all = []
for csv_file in csv_files:
with open(csv_file) as f:
reader = csv.reader(f)
rows = list(reader)
len_all = len(all)
# First file, initialize all and continue (skip)
if len_all == 0:
all = rows
continue
# The number of columns in all so far
len_cols = len(all[0])
# Extend all with the new rows
for i, row in enumerate(rows):
# Check to make sure all has as many rows as this file
if i >= len_all:
all.append(['']*len_cols)
all[i].extend(row)
# Finally, pad all rows on the right
len_cols = len(all[0])
for i in range(len(all)):
len_row = len(all[i])
if len_row < len_cols:
col_diff = len_cols - len_row
all[i].extend(['']*col_diff)
writer = csv.writer(sys.stdout)
writer.writerows(all)
The streaming option
This reads-and-writes a line/row at a time.
(this is basically a Python port of the Go code from GoCSV's zip, from above)
import csv
import sys
fnames = [
'file2.csv',
'file1.csv',
'file3.csv',
]
num_files = len(fnames)
readers = [csv.reader(open(x)) for x in fnames]
# Collect "header" lines; each header defines the number
# of columns for its file
headers = []
num_cols = 0
offsets = [0]
for reader in readers:
header = next(reader)
headers.append(header)
num_cols += len(header)
offsets.append(num_cols)
writer = csv.writer(sys.stdout)
# With all headers counted, every row must have this many columns
shell_row = [''] * num_cols
for i, header in enumerate(headers):
start = offsets[i]
end = offsets[i+1]
shell_row[start:end] = header
# Write headers
writer.writerow(shell_row)
# Expect that not all CSVs have the same number of rows; some will "finish" ahead of others
file_is_complete = [False] * num_files
num_complete = 0
# Loop a row at a time...
while True:
# ... for each CSV
for i, reader in enumerate(readers):
if file_is_complete[i]:
continue
start = offsets[i]
end = offsets[i+1]
try:
row = next(reader)
# Put this row in its place in the main row
shell_row[start:end] = row
except StopIteration:
file_is_complete[i] = True
num_complete += 1
except:
raise
if num_complete == num_files:
break
# Done iterating CSVs (for this row), write it
writer.writerow(shell_row)
# Reset for next main row
shell_row = [''] * num_cols
For either, I get:
Letter1,Letter2,Letter3,Dig1,Dig2,Dig3,RomNum1,RomNum2,RomNum3
a,b,c,1,2,3,I,II,III
d,e,f,4,5,6,,,
,,,7,8,9,,,

csv file isn't saved in different directory in python

My code reads a bunch of json files from a directory and extract "frequency" and "attenuation" data from those files and write to a csv file. Now I want to save that csv file in a different directory. The code executes without any error but saves in the current directory. Can anyone help to resolve this issue?
import csv
import glob
import json
import os
site = 'alpha'
frequency_to_check = '196050.000'
json_dir_name = 'V:/temp/test/'
json_pattern = os.path.join(json_dir_name, '*.json')
total_files = glob.glob(json_pattern)
atten = []
timestamp = []
save_path = 'V:/python/result/'
if not os.path.isdir(save_path):
os.makedirs(save_path)
filename = f'{site}-{frequency_to_check}.csv'
with open(filename, 'w', newline='') as csv_file:
for file in total_files:
with open(file) as json_file:
output_json = json.load(json_file)
for key in output_json:
if key['start-freq'] == frequency_to_check:
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
save_file = os.path.join(save_path, filename)
csv_file.close()
print(f'Total files processed {len(total_files)}')
The issue as far as I can deduce is here :
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
csv_file is your object that is loaded into memory , and everytime this line is executed you are just writing the rows in the already open file. After that you are just creating a new path :
save_file = os.path.join(save_path, filename)
which is never really used as you close the file too.
To fix this I would suggest that you put save_path as csv file :
import csv
import glob
import json
import os
site = 'alpha'
frequency_to_check = '196050.000'
json_dir_name = 'V:/temp/test/'
json_pattern = os.path.join(json_dir_name, '*.json')
total_files = glob.glob(json_pattern)
atten = []
timestamp = []
save_path = 'V:/python/result/'
if not os.path.isdir(save_path):
os.makedirs(save_path)
filename = f'{site}-{frequency_to_check}.csv'
save_file = os.path.join(save_path, filename)
with open(save_file, 'w', newline='') as csv_file:
for file in total_files:
with open(file) as json_file:
output_json = json.load(json_file)
for key in output_json:
if key['start-freq'] == frequency_to_check:
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
csv_file.close()
print(f'Total files processed {len(total_files)}')
I guess this should work.

Convert multiple csv files to json using python

I am trying to convert csv files in a folder to a single json file. Below code does the job, but the issue is, json file has the first csv written several times. Below is the code i tried. I guess i am going wrong with assigning the data variable. Help me fix it
import csv, json, os
dir_path = 'C:/Users/USER/Desktop/output_files'
inputfiles = [file for file in os.listdir(dir_path) if file.endswith('.csv')]
outputfile = "data_backup1.json"
for file in inputfiles:
filepath = os.path.join(dir_path, file)
data = {}
with open(filepath, "r") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
id = row['ID']
data[id] = row
with open(outputfile, "a") as jsonfile:
jsonfile.write(json.dumps(data, indent=4))
Expected output: Json file needs to have each csv written only once into it.
if your .csv files and all of the rows do have different ['ID']s, your assigned dictionary keys should be unique. In this case, your dictionary is growing with one entry per reader .csv row.
You have to change the indentation of the jsonfile.write() function as shown below to produce just one .json file. To sort your entries you could add sort_keys=True in this function.
for file in inputfiles:
filepath = os.path.join(dir_path, file)
data = {}
with open(filepath, "r") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
id = row['ID']
data[id] = row
with open(outputfile, "a") as jsonfile:
jsonfile.write(json.dumps(data, indent=4, sort_keys=True))

Download files from FTP or HTTP using filenames in CSV - Python 3

I have a csv file of products for an ecommerce site I'm working on, as well as FTP access to the corresponding images for each product (~15K products).
I would like to use Python to pull only the images listed in the csv from either the FTP or HTTP and save them locally.
import urllib.request
import urllib.parse
import re
url='http://www.fakesite.com/pimages/filename.jpg'
split = urllib.parse.urlsplit(url)
filename = split.path.split("/")[-1]
urllib.request.urlretrieve(url, filename)
print(filename)
saveFile = open(filename,'r')
saveFile.close()
import csv
with open('test.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
images = []
for row in readCSV:
image = row[14]
print(image)
The code I have currently can pull the filename from the URL and save the file as that filename. It can also pull the filename of the image from the CSV file. (filename and image are the exact same) What I need it to do, is input the filename, from the CSV into the end of the URL, and then save that file as the filename.
I have graduated to this:
import urllib.request
import urllib.parse
import re
import os
import csv
with open('test.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
images = []
for row in readCSV:
image = row[14]
images.append(image)
x ='http://www.fakesite.com/pimages/'
url = os.path.join (x,image)
split = urllib.parse.urlsplit(url)
filename = split.path.split("/")[-1]
urllib.request.urlretrieve(url,filename)
saveFile = open(filename,'r')
saveFile.close()
Now this is great. It works perfectly. It is pulling the correct filename out of the CSV file, adding it on to the end of the URL, downloading the file, and saving it as the filename.
However, I can't seem to figure out how to make this work for more than one line of the CSV file. As of now, it takes that last line, and pulls the relevant information. Ideally, I would use the CSV file with all of the products on it, and it would go through and download every single one, not just the last image.
You're doing strange things ...
import urllib.request
import csv
# the images list should be outside the with block
images = []
IMAGE_COLUMN = 14
with open('test.csv') as csvfile:
# read csv
readCSV = csv.reader(csvfile, delimiter=",")
for row in readCSV:
# I guess 14 is the column-index of the image-name like 'image.jpg'
# I've put it in some constant
# now append all the image-names into the list
images.append(row[IMAGE_COLUMN])
# no need for the following
# image = row[14]
# images.append(image)
# make sure, root_url ends with a slash
# x was some strange name for an url
root_url = 'http://www.fakesite.com/pimages/'
# iterate through the list
for image in images:
# you don't need os.path.join, because that's operating system dependent.
# you don't need to urlsplit, because you have created the url yourself.
# you don't need to split the filename as it is the image name
# with the following line, the root_url must end with a slash
url = root_url + image
# urlretrieve saves the file as whatever image is into the current directory
urllib.request.urlretrieve(url, image)
or in short, that's all you need:
import urllib.request
import csv
IMAGE_COLUMN = 14
ROOT_URL = 'http://www.fakesite.com/pimages/'
images = []
with open('test.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
for row in readCSV:
images.append(row[IMAGE_COLUMN])
for image in images:
url = ROOT_URL + image
urllib.request.urlretrieve(url, image)