Read csv files and calculate the total sum

Read csv files and calculate the total sum - csv

import glob
import csv
def read_with_csv(filename):
data = []
for x in csv.reader(open(filename, 'r')):
data.append(x)
return data
def calculate_sum(data_sample):
total = 0
for row in data_sample[1:]:
price = float(row[9])
total += price
return total
files = [file for file in glob.glob("*.csv") if 'Invoice-Item-Adjustment_' in file]
data_from_csv = read_with_csv(files)
the_sum = data_from_csv(calculate_sum)
print(the_sum)
I have multiple files in directory and read to list and loop through the amount in the column and telly up the monthly total sum. I have erros and not sure how to deal with.
Traceback (most recent call last):
File "listing.py", line 18, in <module>
data_from_csv = read_with_csv(files)
File "listing.py", line 6, in read_with_csv
for x in csv.reader(open(filename, 'r')):
TypeError: invalid file: ['JEExport_20141201-20141218_Invoice-Item-Adjustment_20
150208164027.csv', 'JEExport_20150116-20150128_Invoice-Item-Adjustment_201502021
70516.csv'
]

List comprehensions in python would return a collection - not the file itself.
Hence
files = [file for file in glob.glob("*.csv") if 'Invoice-Item-Adjustment_' in file]
returns a list of files whose names contain Invoice-Item-Adjustment. You would have to iterate through the files one by one and then process it.

Your function read_with_csv() takes a filename as an argument NOT a list of files. So instead, the following might work depending on what you want to do exactly
data_from_csv = [read_with_csv(file) for file in files]

Related

google spreadsheet CellNotFound Exception using python issue

I am building Discord bot which ask user for their name and Points. My spreadsheet has two columns with first raw as header (Name, Points). my code take the name of the player and search the spreadsheet if found update the point. This works perfect when the player name already found but i get cellnotfound error when the name is not in the google sheet.
i have already tried solution in other forum but non are working like
if len(cells) > 0 and except gspread.exceptions.CellNotFound: and except gspread.CellNotFound:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from pprint import pprint
from googleapiclient import discovery
# SET UP GSHEETS DESTINATION
scope =["https://spreadsheets.google.com/feeds",'https://www.googleapis.com/auth/spreadsheets',"https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("generated.json", scope)
client1 = gspread.authorize(creds)
content = "22"
if content.isdigit():
#open spreadsheet
sh = client1.open("test")
worksheet = sh.sheet1
nameof= 'player4'
#find data with playername
cells = worksheet.find(nameof)
if cells != []:
#capture player name data column and row number
#print("found at R%s C%s" %(cells.row, cells.col))
name_row_number = ("%s"%(cells.row))
name_cell_number = ("%s"%(cells.col))
old_points_cell_number = int(name_cell_number)+1
#print(old_points_cell_number)
oldscore = worksheet.cell(name_row_number, old_points_cell_number).value
#print(oldscore)
worksheet.update_cell(name_row_number, old_points_cell_number, content)
else:
print("name not found")
[Screenshot of my google sheet][1]
[1]: https://i.stack.imgur.com/iw6O5.png
Below is the error message i get
Traceback (most recent call last):
File "C:\Python39\lib\site-packages\gspread\models.py", line 1799, in find
return self._finder(finditem, query, in_row, in_column)
File "C:\Python39\lib\site-packages\gspread\models.py", line 1761, in _finder
return func(match, cells)
File "C:\Python39\lib\site-packages\gspread\utils.py", line 97, in finditem
return next((item for item in seq if func(item)))
StopIteration
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\abc\Desktop\Discord_Python\ReadExcel.py", line 38, in <module>
cells = worksheet.find(nameof)
File "C:\Python39\lib\site-packages\gspread\models.py", line 1801, in find
raise CellNotFound(query)
gspread.exceptions.CellNotFound: player4

Python 3: Opening multiple .csv files

I want to open multiple csv files (with same data types/columns), save the data into one variable do some stuff to data and save it into one csv file. While I can easily open one file, I can't seem to find a way to open multiple files. Here is my code:
import numpy as np
import csv
from collections import Counter
files = ['11.csv', '12.csv', '13.csv', '14.csv', '15.csv']
with open(files) as csvfile:
info = csv.reader(csvfile, delimiter=',')
info_types = []
records = 0
for row in info:
records = row[2]
call_types.append(records)
stats = Counter(call_types).most_common()
print(stats)
results = stats
resultFile = open("Totals.csv",'w')
wr = csv.writer(resultFile, dialect='excel')
for output in results:
wr.writerow(output)

To make it work, simultaneouly less bug prone and efficient try the following.
# required imports
files = ['11.csv', '12.csv', '13.csv', '14.csv', '15.csv']
with open("outfile","wt") as fw:
writer = csv.writer(fw)
for file in files:
with open(file) as csvfile:
info = csv.reader(csvfile, delimiter=',')
info_types = []
records = 0
for row in info:
# process row but don't store it
# in any list if you
# don't have to(that will defeat the purpose)
# say you get processed_row
writer.writerow(processed_row)

I would do this within a loop. Since you are already appending the data as you are reading from the file.
for f in files:
with open(f) as csvfile:
...

How to get the actual value of a cell with openpyxl?

I'm a beginner with Python and I need help. I'm using Python 2.7 and I'm trying to retrieve the cell values of an excel file and store it into a csv file. My code is the following:
import os, openpyxl, csv
aggname = "deu"
wb_source = openpyxl.load_workbook(filename, data_only = True)
app_file = open(filename,'a')
dest_file = csv.writer(app_file, delimiter=',', lineterminator='\n')
calib_sheet = wb_source.get_sheet_by_name('Calibration')
data = calib_sheet['B78:C88']
data = list(data)
print(data)
for i in range(len(data)):
dest_file.writerow(data[i])
app_file.close()
In my csv file, I get this, instead of the actual value (for example in my case: SFCG, 99103).
<Cell Calibration.B78>,<Cell Calibration.C78>
<Cell Calibration.B79>,<Cell Calibration.C79>
<Cell Calibration.B80>,<Cell Calibration.C80>
<Cell Calibration.B81>,<Cell Calibration.C81>
<Cell Calibration.B82>,<Cell Calibration.C82>
<Cell Calibration.B83>,<Cell Calibration.C83>
<Cell Calibration.B84>,<Cell Calibration.C84>
<Cell Calibration.B85>,<Cell Calibration.C85>
<Cell Calibration.B86>,<Cell Calibration.C86>
<Cell Calibration.B87>,<Cell Calibration.C87>
<Cell Calibration.B88>,<Cell Calibration.C88>
I tried to set the data_only = True, when opening the excel file as suggested in answers to similar questions but it doesn't solve my problem.
---------------EDIT-------------
Taking into account the first two answers I got (thank you!), I tried several things:
for i in range(len(data)):
dest_file.writerows(data[i].value)
I get this error message :
for i in range(len(data)):
dest_file.writerows(data[i].values)
Traceback (most recent call last):
File "<ipython-input-78-27828c989b39>", line 2, in <module>
dest_file.writerows(data[i].values)
AttributeError: 'tuple' object has no attribute 'values'
Then I tried this instead:
for i in range(len(data)):
for j in range(2):
dest_file.writerow(data[i][j].value)
and then I have the following error message:
for i in range(len(data)):
for j in range(2):
dest_file.writerow(data[i][j].value)
Traceback (most recent call last):
File "<ipython-input-80-c571abd7c3ec>", line 3, in <module>
dest_file.writerow(data[i][j].value)
Error: sequence expected
So then, I tried this:
import os, openpyxl, csv
wb_source = openpyxl.load_workbook(filename, data_only=True)
app_file = open(filename,'a')
dest_file = csv.writer(app_file, delimiter=',', lineterminator='\n')
calib_sheet = wb_source.get_sheet_by_name('Calibration')
list(calib_sheet.iter_rows('B78:C88'))
for row in calib_sheet.iter_rows('B78:C88'):
for cell in row:
dest_file.writerow(cell.value)
Only to get this error message:
Traceback (most recent call last):
File "<ipython-input-81-5bed62b45985>", line 12, in <module>
dest_file.writerow(cell.value)
Error: sequence expected
For the "sequence expected" error I suppose python expects a list rather than a single cell, so I did this:
import os, openpyxl, csv
wb_source = openpyxl.load_workbook(filename, data_only=True)
app_file = open(filename,'a')
dest_file = csv.writer(app_file, delimiter=',', lineterminator='\n')
calib_sheet = wb_source.get_sheet_by_name('Calibration')
list(calib_sheet.iter_rows('B78:C88'))
for row in calib_sheet.iter_rows('B78:C88'):
dest_file.writerow(row)
There is no error message but I only get the reference of the cell in csv file and changing it to dest_file.writerow(row.value) brings me back to the tuple error.
I obviously still need your help!

You've forgot to get the cell's value! See the documentation

I found a way around it using numpy, which allows me to store my values as a list of lists rather than a list of tuples.
import os, openpyxl, csv
import numpy as np
wb_source = openpyxl.load_workbook(filename, data_only=True)
app_file = open(filename,'a')
dest_file = csv.writer(app_file, delimiter=',', lineterminator='\n')
calib_sheet = wb_source.get_sheet_by_name('Calibration')
store = list(calib_sheet.iter_rows('B78:C88'))
print store
truc = np.array(store)
print truc
for i in range(11):
for j in range(1):
dest_file.writerow([truc[i][j].value, truc[i][j+1].value])
app_file.close()
I actually have a sequence as my argument in "writerow()" and with the list object I can also use the double index and the value method to retrieve the value of my cell.

Try using data.values instead of just data when you are printing it.
Hope it helps !!
**
***An example :
import openpyxl
import re
import os
wc=openpyxl.load_workbook('<path of the file>') wcsheet=wc.get_sheet_by_name('test')
store=[]
for data in wcsheet.columns[0]:
store=data
print(store.value)***
=======================
=================================================
**
Live Life Buddha Size

Converting JSON files to .csv

I've found some data that someone is downloading into a JSON file (I think! - I'm a newb!). The file contains data on nearly 600 football players.
Here you can find the file
In the past, I have downloaded the json file and then used this code:
import csv
import json
json_data = open("file.json")
data = json.load(json_data)
f = csv.writer(open("fix_hists.csv","wb+"))
arr = []
for i in data:
fh = data[i]["fixture_history"]
array = fh["all"]
for j in array:
try:
j.insert(0,str(data[i]["first_name"]))
except:
j.insert(0,'error')
try:
j.insert(1,data[i]["web_name"])
except:
j.insert(1,'error')
try:
f.writerow(j)
except:
f.writerow(['error','error'])
json_data.close()
Sadly, when I do this now in command prompt, i get the following error:
Traceback (most recent call last):
File"fix_hist.py", line 12 (module)
fh = data[i]["fixture_history"]
TypeError: list indices must be integers, not str
Can this be fixed or is there another way I can grab some of the data and convert it to .csv? Specifically the 'Fixture History'? and then 'First'Name', 'type_name' etc.
Thanks in advance for any help :)

Try this tool: http://www.convertcsv.com/json-to-csv.htm
You will need to configure a few things, but should be easy enough.

Cannot process a csv file into a pandas with tickers, and fetch stock-info from the DataFrame

I am trying to import a list of stock-tickers (the line that is #symbols_list...read_csv..), and fetch stock-info on that date into a pandas.
import datetime
import pandas as pd
from pandas import DataFrame
from pandas.io.data import DataReader
#symbols_list = [pd.read_csv('Stock List.csv', index_col=0)]
symbols_list = ['AAPL', 'TSLA', 'YHOO','GOOG', 'MSFT','ALTR','WDC','KLAC']
symbols=[]
start = datetime.datetime(2014, 2, 9)
#end = datetime.datetime(2014, 12, 30)
for ticker in symbols_list:
r = DataReader(ticker, "yahoo",
start = start)
#start=start, end)
# add a symbol column
r['Symbol'] = ticker
symbols.append(r)
# concatenate all the dfs
df = pd.concat(symbols)
#define cell with the columns that i need
cell= df[['Symbol','Open','High','Low','Adj Close','Volume']]
#changing sort of Symbol (ascending) and Date(descending) setting Symbol as first column and changing date format
cell.reset_index().sort(['Symbol', 'Date'], ascending=[1,0]).set_index('Symbol').to_csv('stock.csv', date_format='%d/%m/%Y')
The input file Stock list.csv
has the following content with these entries on each their separate row:
Index
MMM
ABT
ABBV
ACE
ACN
ACT
ADBE
ADT
AES
AET
AFL
AMG
and many more tickers of interest.
When run with the manually coded list
symbols_list = ['AAPL', 'TSLA', 'YHOO','GOOG', 'MSFT','ALTR','WDC','KLAC']
It all works fine and processes the input and stores it to a file,
But whenever I run the code with the read_csv from file, I get the following error:
runfile('Z:/python/CrystallBall/SpyderProject/getstocks3.py', wdir='Z:/python/CrystallBall/SpyderProject') Reloaded modules: pandas.io.data, pandas.tseries.common Traceback (most recent call last):
File "<ipython-input-32-67cbdd367f48>", line 1, in <module>
runfile('Z:/python/CrystallBall/SpyderProject/getstocks3.py', wdir='Z:/python/CrystallBall/SpyderProject')
File "C:\Program Files (x86)\WinPython-32bit-3.4.2.4\python-3.4.2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 601, in runfile
execfile(filename, namespace)
File "C:\Program Files (x86)\WinPython-32bit-3.4.2.4\python-3.4.2\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 80, in execfile
exec(compile(open(filename, 'rb').read(), filename, 'exec'), namespace)
File "Z:/python/CrystallBall/SpyderProject/getstocks3.py", line 35, in <module>
cell.reset_index().sort(['Symbol', 'Date'], ascending=[1,0]).set_index('Symbol').to_csv('stock.csv', date_format='%d/%m/%Y')
File "C:\Users\Morten\AppData\Roaming\Python\Python34\site-packages\pandas\core\generic.py", line 1947, in __getattr__
(type(self).__name__, name))
AttributeError: 'Panel' object has no attribute 'reset_index'
Why can I only process the symbol_list manually laid out, and not the imported tickers from file?
Any takers? Any help greatly appreciated!

Your code has numerous issues which the following code has fixed and works:
In [4]:
import datetime
import pandas as pd
from pandas import DataFrame
from pandas.io.data import DataReader
temp='''Index
MMM
ABT
ABBV
ACE
ACN
ACT
ADBE
ADT
AES
AET
AFL
AMG'''
df = pd.read_csv(io.StringIO(temp), index_col=[0])
symbols=[]
start = datetime.datetime(2014, 2, 9)
for ticker in df.index:
r = DataReader(ticker, "yahoo",
start = start)
#start=start, end)
# add a symbol column
r['Symbol'] = ticker
symbols.append(r)
# concatenate all the dfs
df = pd.concat(symbols)
#define cell with the columns that i need
cell= df[['Symbol','Open','High','Low','Adj Close','Volume']]
#changing sort of Symbol (ascending) and Date(descending) setting Symbol as first column and changing date format
cell.reset_index().sort(['Symbol', 'Date'], ascending=[1,0]).set_index('Symbol').to_csv('stock.csv', date_format='%d/%m/%Y')
cell
Out[4]:
Symbol Open High Low Adj Close Volume
Date
2014-02-10 MMM 129.65 130.41 129.02 126.63 3317400
2014-02-11 MMM 129.70 131.49 129.65 127.88 2604000
... ... ... ... ... ... ...
2015-02-06 AMG 214.35 215.82 212.64 214.45 424400
[3012 rows x 6 columns]
So firstly this: symbols_list = [pd.read_csv('Stock List.csv', index_col=0)]
This will create a list with a single entry which will be a df with no columns and just an index of your ticker values.
This: for ticker in symbols_list:
won't work because the iterable object that is returned from the df is the column and not each entry, in your case you need to iterate over the index which is what my code does.
I'm not sure what you wanted to achieve, it isn't necessary to specify that index_col=0 if there is only one column, you can either create a df with just a single column, or if you pass squeeze=True this will create a Series which just has a single column.

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

Read csv files and calculate the total sum - csv

Your function read_with_csv() takes a filename as an argument NOT a list of files. So instead, the following might work depending on what you want to do exactly data_from_csv = [read_with_csv(file) for file in files]

Related

google spreadsheet CellNotFound Exception using python issue

Python 3: Opening multiple .csv files

How to get the actual value of a cell with openpyxl?

Converting JSON files to .csv

Cannot process a csv file into a pandas with tickers, and fetch stock-info from the DataFrame

Categories

Resources