Download files from FTP or HTTP using filenames in CSV - Python 3 - csv

I have a csv file of products for an ecommerce site I'm working on, as well as FTP access to the corresponding images for each product (~15K products).
I would like to use Python to pull only the images listed in the csv from either the FTP or HTTP and save them locally.
import urllib.request
import urllib.parse
import re
url='http://www.fakesite.com/pimages/filename.jpg'
split = urllib.parse.urlsplit(url)
filename = split.path.split("/")[-1]
urllib.request.urlretrieve(url, filename)
print(filename)
saveFile = open(filename,'r')
saveFile.close()
import csv
with open('test.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
images = []
for row in readCSV:
image = row[14]
print(image)
The code I have currently can pull the filename from the URL and save the file as that filename. It can also pull the filename of the image from the CSV file. (filename and image are the exact same) What I need it to do, is input the filename, from the CSV into the end of the URL, and then save that file as the filename.
I have graduated to this:
import urllib.request
import urllib.parse
import re
import os
import csv
with open('test.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
images = []
for row in readCSV:
image = row[14]
images.append(image)
x ='http://www.fakesite.com/pimages/'
url = os.path.join (x,image)
split = urllib.parse.urlsplit(url)
filename = split.path.split("/")[-1]
urllib.request.urlretrieve(url,filename)
saveFile = open(filename,'r')
saveFile.close()
Now this is great. It works perfectly. It is pulling the correct filename out of the CSV file, adding it on to the end of the URL, downloading the file, and saving it as the filename.
However, I can't seem to figure out how to make this work for more than one line of the CSV file. As of now, it takes that last line, and pulls the relevant information. Ideally, I would use the CSV file with all of the products on it, and it would go through and download every single one, not just the last image.

You're doing strange things ...
import urllib.request
import csv
# the images list should be outside the with block
images = []
IMAGE_COLUMN = 14
with open('test.csv') as csvfile:
# read csv
readCSV = csv.reader(csvfile, delimiter=",")
for row in readCSV:
# I guess 14 is the column-index of the image-name like 'image.jpg'
# I've put it in some constant
# now append all the image-names into the list
images.append(row[IMAGE_COLUMN])
# no need for the following
# image = row[14]
# images.append(image)
# make sure, root_url ends with a slash
# x was some strange name for an url
root_url = 'http://www.fakesite.com/pimages/'
# iterate through the list
for image in images:
# you don't need os.path.join, because that's operating system dependent.
# you don't need to urlsplit, because you have created the url yourself.
# you don't need to split the filename as it is the image name
# with the following line, the root_url must end with a slash
url = root_url + image
# urlretrieve saves the file as whatever image is into the current directory
urllib.request.urlretrieve(url, image)
or in short, that's all you need:
import urllib.request
import csv
IMAGE_COLUMN = 14
ROOT_URL = 'http://www.fakesite.com/pimages/'
images = []
with open('test.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
for row in readCSV:
images.append(row[IMAGE_COLUMN])
for image in images:
url = ROOT_URL + image
urllib.request.urlretrieve(url, image)

Related

Using a CSV file with file path info (file's orginal location end with .jpg, destination end with .jpg) to copy files

I have a csv file with location information of images in 1st column
and
a destination location information of images in 2nd column
I was able to move one image to location with code
import shutil
source_folder = r"C:/Users/JJ/Desktop/test/images/AoF06978.jpg"
destination_folder = r"C:/Users/JJ/Desktop/new_db/test_fire/AoF06978.jpg"
shutil.copyfile(source_folder, destination_folder)
However, when I tried to move all files using CSV file, I have no clue;
import csv, shutil
filename = 'C:/Users/JJ/Desktop/test.csv'
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile)
for row in datareader:
a= row[0]
b =row[1]
shutil.copyfile(a, b)
and I got an error: iterator should return strings, not bytes
Assuming that your .csv file has a header, you can skip this one, then read the two columns (origin and destination) as lists and finally use shutil.copyfile to copy/move the images :
import csv
import shutil
with open("C:/Users/JJ/Desktop/test.csv", "r") as f:
reader = csv.reader(f)
for index, row in enumerate(reader):
if index == 0:
pass
else:
origin, destination = row
shutil.copy(origin, destination)

How to read a csv file from S3 bucket using AWS lambda and write it as new CSV to another S3 bucket? Python boto3

Ok so I am a beginner to AWS in general. I am writing a lambda function to trigger based on file upload event in S3, remove some coulmns and write it to a new bucket. Been banging my head for the past two datas and I am getting different error each time. Can someone modify my code/fix it? outputlv will be my target bucket.. Currently I am getting '/outputlv/output.csv' path does not exist in the with open('/outputlv/output.csv', 'w') as output_file line. Thanks.
import json
import urllib.parse
import boto3
import csv
s3 = boto3.client('s3')
def lambda_handler(event, context):
bucket = event['Records'][0]['s3']['bucket']['name']
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
file_name = s3.get_object(Bucket=bucket, Key=key)
csv_reader = csv.reader(file_name)
with open('/outputlv/output.csv', 'w') as output_file:
wtr = csv.writer(output_file)
for i in csv_reader:
wtr.writerow(i[0], i[2], i[3])
target_bucket = 'outputlv'
final_file = 'outputlv/output.csv'
s3.put_object(Bucket=target_bucket, Key=final_file)
Why don't you get the content, is it required to work with local files at all ?
response = s3.get_object(Bucket=bucket, Key=key)
# Get file content
content = response['Body'].read()
# Pass file content to csv reader
csv_reader = csv.reader(content)

Python 3: Opening multiple .csv files

I want to open multiple csv files (with same data types/columns), save the data into one variable do some stuff to data and save it into one csv file. While I can easily open one file, I can't seem to find a way to open multiple files. Here is my code:
import numpy as np
import csv
from collections import Counter
files = ['11.csv', '12.csv', '13.csv', '14.csv', '15.csv']
with open(files) as csvfile:
info = csv.reader(csvfile, delimiter=',')
info_types = []
records = 0
for row in info:
records = row[2]
call_types.append(records)
stats = Counter(call_types).most_common()
print(stats)
results = stats
resultFile = open("Totals.csv",'w')
wr = csv.writer(resultFile, dialect='excel')
for output in results:
wr.writerow(output)
To make it work, simultaneouly less bug prone and efficient try the following.
# required imports
files = ['11.csv', '12.csv', '13.csv', '14.csv', '15.csv']
with open("outfile","wt") as fw:
writer = csv.writer(fw)
for file in files:
with open(file) as csvfile:
info = csv.reader(csvfile, delimiter=',')
info_types = []
records = 0
for row in info:
# process row but don't store it
# in any list if you
# don't have to(that will defeat the purpose)
# say you get processed_row
writer.writerow(processed_row)
I would do this within a loop. Since you are already appending the data as you are reading from the file.
for f in files:
with open(f) as csvfile:
...

ipython notebook .png figures after nbconvert not loaded by latest chrome/firefox

Running $ipython3 notebook --pylab=inline locally, I saved a simple notebook with a small png figure using pylab and python 3.3.
Contents of notebook cell:
from pylab import *
x = linspace(0, 5, 10)
y = x ** 2
figure()
plot(x, y, 'r')
xlabel('x')
ylabel('y')
title('title')
show()
running the cell resulted in an inline png figure being displayed.
The saved file (my_notebook.ipynb) has a .png saved as a data uri:
{ ..., "png":"iVBO...ZUmwK\n...", ... }
after executing command:
ipython3 nbconvert --to html my_notebook.html
my_notebook.html is generated with the figure as a data uri like this:
<img src="data:image/png;base64,b'iVBO...ZUmwk\n..." >
In latest chrome or firefox the image data uri does not load/display when opening file:///.../my_notebook.html locally and chrome console reports 'failed to load resource' for the img tag.
I have had the same results with images loaded and then displayed with imshow().
The figures appear fine in the notebook. It is after nbconvert to html that they do not display (at all).
(notice the escaped newline in the image data uri - I tried replacing all escaped newlines in the data string with actual newlines with no change in results)
How can I get png figures to display in an nbconverted-html-version of an ipython notebook opened locally ("file:///.../my_notebook.html") in browser?
(I would rather not have to save each figure and hand modify the converted html to reference the saved figure on disk.)
EDIT:
versions:
python 3.3.1
ipython==1.0.0
matplotlib==1.2.1
Pillow==2.1.0 (PIL)
Install BeautifulSoup4 first:
pip install BeautifulSoup4
Then use following function to freeze your generated html file. The images will be placed in the images folder under the same directory as the html file.
import os
import re
import base64
from bs4 import BeautifulSoup as BS
from uuid import uuid4
def dump(path, data):
root = os.path.dirname(path)
if not os.path.exists(root):
os.makedirs(root)
with open(path, 'wb') as f:
f.write(data)
# for windows
return path.replace('\\', '/')
def freeze_html(path):
'''pass in absolute path of your html'''
root = os.path.dirname(path)
with open(path, 'rb') as f:
soup = BS(f.read())
for img in soup.find_all('img'):
m = re.search(r"data:image/png;base64,b'(.*)'", img['src'])
if m:
iname = uuid4()
ipath = os.path.join(root, 'images', '%s.png' % iname)
# remove '\n'
s = m.group(1).replace(r'\n', '')
img['src'] = os.path.relpath(
dump(ipath, base64.b64decode(s.encode('ascii'))),
root
)
with open(path, 'wb') as f:
f.write(soup.encode('utf-8'))
If you do not need to further convert it to tex or pdf, you can just write string (\n removed) back to img['src'](with data:image/png;base64, prefix):
import re
from bs4 import BeautifulSoup as BS
def freeze_html(path):
'''pass in absolute path of your html'''
with open(path, 'rb') as f:
soup = BS(f.read())
for img in soup.find_all('img'):
m = re.search(r"data:image/png;base64,b'(.*)'", img['src'])
if m:
# remove '\n'
s = m.group(1).replace(r'\n', '')
img['src'] = 'data:image/png;base64,' + s
with open(path, 'wb') as f:
f.write(soup.encode('utf-8'))
I prefer to save png to separate file because it's more friendly to xelatex.

Python Import csv file as dataframe with File chooser

I would like to import a csv file into python with FileChooser and display it as dataframe. Here is the code and it didn't work. Thanks for your kind help.
def get_open_filename(self):
filename = None
chooser = gtk.FileChooserDialog("Open File...", self.window,
gtk.FILE_CHOOSER_ACTION_OPEN,
(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
response = chooser.run()
if response == gtk.RESPONSE_OK:
with open(chooser.get_filename(), 'rb') as csvfile:
don = DataFrame.from_csvfile(csvfile) ## I am confused here !!!
print don
chooser.destroy()
return filename
I believe from_csv file takes a filename not a file, using these docs
Try replacing
with open(chooser.get_filename(), 'rb') as csvfile:
don = DataFrame.from_csvfile(csvfile) ## I am confused here !!!
print don
with
don = DataFrame.from_csvfile(chooser.get_filename())
print don