blank file while copying a file in python - function

I have a function takes a file as input and prints certain statistics and also copies the file into a file name provided by the user. Here is my current code:
def copy_file(option):
infile_name = input("Please enter the name of the file to copy: ")
infile = open(infile_name, 'r')
outfile_name = input("Please enter the name of the new copy: ")
outfile = open(outfile_name, 'w')
slist = infile.readlines()
if option == 'statistics':
for line in infile:
outfile.write(line)
infile.close()
outfile.close()
result = []
blank_count = slist.count('\n')
for item in slist:
result.append(len(item))
print('\n{0:<5d} lines in the list\n{1:>5d} empty lines\n{2:>7.1f} average character per line\n{3:>7.1f} average character per non-empty line'.format(
len(slist), blank_count, sum(result)/len(slist), (sum(result)-blank_count)/(len(slist)-blank_count)))
copy_file('statistics')
It prints the statistics of the file correctly, however the copy it makes of the file is empty. If I remove the readline() part and the statistics part, the function seems to make a copy of the file correctly. How can I correct my code so that it does both. It's a minor problem but I can't seem to get it.

The reason the file is blank is that
slist = infile.readlines()
is reading the entire contents of the file, so when it gets to
for line in infile:
there is nothing left to read and it just closes the newly truncated (mode w) file leaving you with a blank file.
I think the answer here is to change your for line in infile: to for line in slist:
def copy_file(option):
infile_name= input("Please enter the name of the file to copy: ")
infile = open(infile_name, 'r')
outfile_name = input("Please enter the name of the new copy: ")
outfile = open(outfile_name, 'w')
slist = infile.readlines()
if option == 'statistics':
for line in slist:
outfile.write(line)
infile.close()
outfile.close()
result = []
blank_count = slist.count('\n')
for item in slist:
result.append(len(item))
print('\n{0:<5d} lines in the list\n{1:>5d} empty lines\n{2:>7.1f} average character per line\n{3:>7.1f} average character per non-empty line'.format(
len(slist), blank_count, sum(result)/len(slist), (sum(result)-blank_count)/(len(slist)-blank_count)))
copy_file('statistics')
Having said all that, consider if it's worth using your own copy routine rather than shutil.copy - Always better to delegate the task to your OS as it will be quicker and probably safer (thanks to NightShadeQueen for the reminder)!

Related

Python csv data logging doesn't work in while loop

I have been trying to log the data received from the Arduino through USB port and the strange thing is that the code works on my mac just fine but on windows it won't write it. At the start I expected the initial writing "DATA" but it didn't even write that. And when I commented out the entire loop it worked (It says "DATA" in the csv file).
import serial
count = 1
port = serial.Serial('COM4', baudrate=9600, bytesize=8)
log = open("data_log.csv", "w")
log.write("DATA")
log.write("\n")
while 1:
value = str(port.read(8), 'utf-8')
value = value.replace('\r', '').replace('\n', '')
if value.strip():
log.write(str(count))
log.write(',')
log.write(value)
log.write('\n')
print(count)
count += 1
print(value)
\n = CR (Carriage Return) // Used as a new line character in Unix
\r = LF (Line Feed) // Used as a new line character in Mac OS
\n\r = CR + LF // Used as a new line character in Windows
I think it's not working in windows because you need to look for a CR LF.
Might try using Environment.NewLine as it will act as any of the above depending on the operating system.

Splitting a csv file into multiple files

I have a csv file of 150500 rows and I want to split it into multiple files containing 500 rows (entries)
I'm using Jupyter and I know how to open and read the file. However, I don't know how to specify an output_path to record the newly created files from splitting the big one.
I have found this code online but once again since I don't know what is my output_path I don't know how to use it. Moreover, for this block of code I don't understand how we specify the input file.
import os
def split(filehandler, delimiter=',', row_limit=1000,
output_name_template='output_%s.csv', output_path='.', keep_headers=True):
import csv
reader = csv.reader(filehandler, delimiter=delimiter)
current_piece = 1
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
current_limit = row_limit
if keep_headers:
headers = reader.next()
current_out_writer.writerow(headers)
for i, row in enumerate(reader):
if i + 1 > current_limit:
current_piece += 1
current_limit = row_limit * current_piece
current_out_path = os.path.join(
output_path,
output_name_template % current_piece
)
current_out_writer = csv.writer(open(current_out_path, 'w'), delimiter=delimiter)
if keep_headers:
current_out_writer.writerow(headers)
current_out_writer.writerow(row)
My file name is DataSet2.csv and it's in the same file in jupyter as my ipynb notebook is running.
number_of_small_files = 301
lines_per_small_file = 500
largeFile = open('large.csv', 'r')
header = largeFile.readline()
for i in range(number_of_small_files):
smallFile = open(str(i) + '_small.csv', 'w')
smallFile.write(header) # This line copies the header to all small files
for x in range(lines_per_small_file):
line = largeFile.readline()
smallFile.write(line)
smallFile.close()
largeFile.close()
This will create many small files in the same directory. About 301 of them. They will be named from 0_small.csv to 300_small.csv.
Using standard unix utilities:
cat DataSet2.csv | tail -n +2 | split -l 500 --additional-suffix=.csv output_
This pipeline takes the original file, strips off the first line with 'tail -n +2', and then splits the rest into 500 line chunks that are put into files with names that start with 'output_' and end with '.csv'

Automating a process for multiple CSV file

I've been looking around and couldn't find the answer so here it is.
I'm trying to look into a way for automating of changing the content of a CSV file into something else for machine learning purposes. I have the content of a single line like this:
0, 0, 0, -2.3145, 5.567...... 65, 65, 125, 70.
(516 columns)
And trying to change it to this:
0,
0,
-2.3145,
5.567
....
65,
65,
125,
70.
(516 rows)
So basically transposing the data from horizontal to vertical (single row to single column).
It's easily done using Excel but problem is I have 4000+ of the CSV file so it takes a lot of time.
On top of that, I have to get the first 512 rows and store it into a CSV of another folder adding the last 4 rows into another CSV of another folder while both files have the same name.
Eg:
features(folder)
1.CSV
2.CSV
.....
4000+.CSV
labels(folder)
1.CSV
2.CSV
.....
4000+.CSV
Any suggestions on how I can speed things up? Tried writing my own program but I'm stumped on changing it from row to column. I've only managed to split the single CSV file to it's 4000+ pieces.
EDIT:
I've tested by putting the csv rows into an array and then storing the array into the csv where the code looks like this:
with open('FFTMIM16_512L1H1S0D0_1194.csv', 'r') as f:
reader = csv.reader(f)
your_list = list(reader)
print(your_list[0:512])
print(your_list[512:516])
print(your_list)
with open('test.csv', 'w', newline = '') as fa:
writer = csv.writer(fa)
writer.writerows(your_list[0:511])
with open('test1.csv', 'w', newline = '') as fb:
writer = csv.writer(fb)
writer.writerows(your_list[512:516])
It works but I just need to run it in a loop. A problem that I don't understand is that if I save the values from 0 to 512 on test.csv, it will show 512 counts of rows but when I store from 513 to 516 to test1.csv, it only shows three instead of four rows that I need. Changing fb content from 512 to 516 will work which doesn't make sense to me because the value of 512 in test.csv is 0 while test1.csv is 69. Why is that? From what I can understand is the index of the array, it starts from 0 to the place of number I need. Or is it not the case in python?
EDIT 2:
My new code is as follows:
import csv
import os
import glob
#import itertools
directory = input("INPUT FOLDER: ")
output1 = input("FEATURES FODLER: ")
output2 = input("LABELS FOLDER: ")
in_files = os.path.join(directory, '*.csv')
for in_file in glob.glob(in_files):
with open(in_file) as input_file:
reader = csv.reader(input_file)
your_list = (reader)
filename = os.path.splitext(os.path.basename(in_file))[0] + '.csv'
with open(os.path.join(output1, filename), 'w', newline='') as output_file1:
writer = csv.writer(output_file1)
writer.writerow(your_list[0:512])
with open(os.path.join(output2, filename), 'w', newline='' ) as output_file2:
writer = csv.writer(output_file2)
writer.writerow(your_list[512:516])
It shows the output as I wanted but now it stores apostrophes and braces eg. ['0.0'], ['2.321223'] as well. How do I remove these?
I don't understand why you can't do it programatically if you have your 4000+ pieces, just write every piece in a new line?
In my opinion the easiest way, but not automatically, would be some editor like Notepad ++.
Here you can Replace "," by "\r\n" or if you want to keep the "," you replace it with ",\r\n".
If you want it automated i don't see a not programmatical way.
By the way... if you use python with numpy/scipy you can just use the .transpose() function
*Edit to your comment:
what do you mean with "split from the first to the 512"? If you want parts with the size 512 it would be something like:
new_array = []
temp_array = []
k = 0
for num in your_array:
temp_array.append(num)
k += 1
if k % 512 == 0:
new_array.append(temp_array)
k = 0
temp_array = []
#to append the last block which might not be 512 sized
if len(temp_array) > 0:
new_array.append(temp_array)
# Save Arrays
for i in len(new_array):
saveToCsv(array = new_array[i], name="csv_"+str(i))
Your new_array would now be an array filled with 512 sized arrays.
Might be mistakes here, i did not test the code. To save you only need a function saveToCsf(array, name) which saves an array into a file.

Julia - Rewriting a CSV

Complete Julia newbie here.
I'd like to do some processing on a CSV. Something along the lines of:
using CSV
in_file = CSV.Source('/dir/in.csv')
out_file = CSV.Sink('/dir/out.csv')
for line in CSV.eachline(in_file)
replace!(line, "None", "")
CSV.writeline(out_file, line)
end
This is in pseudocode, those aren't existing functions.
Idiomatically, should I iterate on 1:CSV.countlines(in_file)? Do a while and check something?
If all you want to do is replace a string in the line, you do not need any CSV parsing utilities. All you do is read the file line by line, replace, and write. So:
infile = "/path/to/input.csv"
outfile = "/path/to/output.csv"
out = open(outfile, "w+")
for line in readlines(infile)
newline = replace(line, "a", "b")
write(out, newline)
end
close(out)
This will replicate the pseudocode you have in your question.
If you need to parse and read the csv field by field, use the readcsv function in base.
data=readcsv(infile)
typeof(data) #Array{Any,2}
This will return the data in the file as a 2 dimensional array. You can process this data any way you want, and write it back using the writecsv function.
for i in 1:size(data,1) #iterate by rows
data[i, 1] = "This is " * data[i, 1] # Add text to first column
end
writecsv(outfile, data)
Documentation for these functions:
http://docs.julialang.org/en/release-0.5/stdlib/io-network/?highlight=readcsv#Base.readcsv
http://docs.julialang.org/en/release-0.5/stdlib/io-network/?highlight=readcsv#Base.writecsv

Filtering rows in a csv file with Python

I have a script that opens and modifies a text file. The text file which contains personnel info and a lunch account balance. My script takes the text file removes the quotes and only writes rows that contain the values D, F or R in column 8. It writes this filtered data to two files, a csv import file called lunchimport.csv for a separate program and a csv temp file called to be used for further filtering. The second stage of the script uses the csv temp file to generate two additional csv files. One file, negativebal.csv, contains only rows with a negative value in column 14. The other file, lowbal.cav, contains rows with a value between 0 and 5 in column 14. My issue is that I cant get the script to filter "between" values properly. When using the code below to just write rows with values in column14 between 0 and 5 nothing will filter out. If I use values between 0 and 1.99 it works. Anything greater than 1.99 and the code doesnt filter anything:
if row[13] > "0" and row[13] < "1.99":
lowwriter.writerow([row[0], row[13]])
I have pasted my entire code below. I do use alot of temp files to accomplish my tasks. There probably is a better way but im just interested in getting my filters to work properly.
import os
import csv
infile = open("\\\\comalexsrv\\export\\update.txt", "r")
outfile1 = open("casttemp1.csv", "w")
infile2 = open("casttemp1.csv", "r")
outfile2 = open("casttemp2.csv", "w")
infile3 = open("casttemp2.csv", "r")
outfile3 = open("casttemp3.csv", "w")
infile4 = open("casttemp3.csv", "r")
inowcsv = open("F:\zbennett\Lunch_Imports\lunchimport.csv", "w")
negcastcsv = open("\\\\tcdc\\inow_transfer$\\negativebal.csv", "w")
lowcastcsv = open("\\\\tcdc\\inow_transfer$\\lowbal.csv", "w")
# Remove quotes in update.txt, write to outfile1(casttemp1.csv)
string = infile.read()
outfile1.write(string.replace("\"", ''))
# Open infile2(casttemp1.csv), write rows with D,F,R in column 8 to outfile2(casttemp2.csv)
# Open infile2(casttemp1.csv), write rows with D,F,R in column 8 to inowcsv(F:\zbennett\Lunch_Imports\lunchimport.csv)
# Open infile2(casttemp1.csv), write rows with D,R in column 8 to outfile3(casttemp3.csv)
tempwriter = csv.writer(outfile2, delimiter=',', lineterminator= '\n')
importwriter = csv.writer(inowcsv, delimiter=',', lineterminator= '\n')
lowtemp = csv.writer(outfile3, delimiter=',', lineterminator= '\n')
for row in csv.reader(infile2, delimiter=','):
if row[7] == "D":
tempwriter.writerow(row)
importwriter.writerow(row)
lowtemp.writerow(row)
if row[7] == "F":
tempwriter.writerow(row)
importwriter.writerow(row)
if row[7] == "R":
tempwriter.writerow(row)
importwriter.writerow(row)
lowtemp.writerow(row)
# Open infile3(casttemp2.csv), write columns 1,14 for rows with less than 0 in column 14 to negcastcsv(\\tcdc\inow_transfer$\negativebal.csv)
negwriter = csv.writer(negcastcsv, delimiter=',', lineterminator= '\n')
for row in csv.reader(infile3, delimiter=','):
if row[13] < "0":
negwriter.writerow([row[0], row[13]])
# Open infile4(casttemp3.csv), write columns 1,14 for rows with column 14 greater than 0 and less than 1.75 to lowcastcsv(\\tcdc\inow_transfer$\lowbal.csv)
lowwriter = csv.writer(lowcastcsv, delimiter=',', lineterminator= '\n')
for row in csv.reader(infile4, delimiter=','):
if row[13] > "0" and row[13] < "1.99":
lowwriter.writerow([row[0], row[13]])
infile.close()
outfile1.close()
infile2.close()
outfile2.close()
inowcsv.close()
outfile3.close()
infile3.close()
infile4.close()
negcastcsv.close()
lowcastcsv.close()
# Delete casttemp1.csv file
os.remove("casttemp1.csv")
os.remove("casttemp2.csv")
os.remove("casttemp3.csv")
Comparison is happening using strings, when you probably want numeric comparison:
if 0. < float(row[13]) < 1.99:
lowwriter.writerow([row[0], row[13]])