Flask form does not let me upload TSV files - html

I have the following flask app where I want to be able to upload a TXT or TSV file to a form. The problem is, when I try to upload a TXT file, it works, but when I try to upload a TSV file, I get the following error:
File "/Users/cdastmalchi/Desktop/author_script/main.py", line 89, in process_file
if not places_exist(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
File "/Users/cdastmalchi/Desktop/author_script/main.py", line 27, in places_exist
infile = open(filename, 'rU')
IOError: [Errno 2] No such file or directory: './Authors_Template.tsv'
Authors_Template.tsv is a template file that gets downloaded from the form and goes into the Downloads, and then I want users to be able to edit this template and then re-upload it. When I make the template Authors_Template.txt instead and then Download and re-upload it, it works. How can I solve this problem? I've even tried narrowing down the ALLOWED_EXTENSIONS list to just TSV and I still get the same issue.
app.py
from werkzeug.utils import secure_filename
import flask, string, random
import json
import subprocess
import os
import re
import time
UPLOAD_FOLDER = '.'
ALLOWED_EXTENSIONS = set(['txt','tsv'])
app = flask.Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.secret_key = ''.join(random.choice(string.ascii_letters) for _ in range(20)) #needed to use flask.session
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS
def places_exist(filename):
infile = open(filename, 'rU')
placeDict = {}
addresses_temp = []
addresses = []
places_temp =[]
places = []
places_exist = True
for i in infile:
item = i.rstrip("\n").split("\t")
places_temp.append(item[0])
addresses_temp.append(item[1])
p_index = (places_temp.index('Place')) + 1
a_index = (addresses_temp.index('Address')) + 1
places = places_temp[p_index:]
addresses = addresses_temp[a_index:]
infile.close()
infile = open(filename, 'rU')
return places_exist
#app.route('/', methods=['GET'])
def home():
return flask.render_template('index.html')
#app.route('/process_file', methods=['POST'])
def process_file():
#here, you can run all the checks as before, but instead of flash, you can return jsonified results to read in the front-end
if 'file' not in flask.request.files or not flask.request.files['file'].filename:
return flask.jsonify({'result':'False', 'message':'no files selected'})
return flask.redirect(url_for('home'))
file = flask.request.files['file']
filename = secure_filename(file.filename)
if not allowed_file(file.filename):
return flask.jsonify({'result':'False', 'message':'Must be TXT file!'})
return flask.redirect(url_for('home'))
if not places_exist(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
return flask.jsonify({'result':'False', 'message':'There is an affiliation missing from your Place list. Please re-try.'})
return flask.redirect(url_for('home'))
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
flask.session['filename'] = filename
return flask.jsonify({'result':'True'})
UPDATE:
def process_file():
#here, you can run all the checks as before, but instead of flash, you can return jsonified results to read in the front-end
if 'file' not in flask.request.files or not flask.request.files['file'].filename:
return flask.jsonify({'result':'False', 'message':'no files selected'})
return flask.redirect(url_for('home'))
file = flask.request.files['file']
filename = secure_filename(file.filename)
if not allowed_file(file.filename):
return flask.jsonify({'result':'False', 'message':'Must be TXT file!'})
return flask.redirect(url_for('home'))
# Save the file in the temp folder
file.save(os.path.join(app.config['TEMP_FOLDER'], filename))
# Process the file
if not places_exist(os.path.join(app.config['TEMP_FOLDER'], filename)):
return flask.jsonify({'result':'False', 'message':'There is an affiliation missing from your Place list. Please re-try.'})
return flask.redirect(url_for('home'))
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
flask.session['filename'] = filename
return flask.jsonify({'result':'True'})

You are trying to read a file before its writing in your directory. First you need to save the file in your application upload directory then read it.
def process_file():
# here, you can run all the checks as before, but instead of flash, you can return jsonified results to read in the front-end
if 'file' not in flask.request.files or not flask.request.files['file'].filename:
return flask.jsonify({'result':'False', 'message':'no files selected'})
return flask.redirect(url_for('home'))
file = flask.request.files['file']
filename = secure_filename(file.filename)
if not allowed_file(file.filename):
return flask.jsonify({'result':'False', 'message':'Must be TXT file!'})
return flask.redirect(url_for('home'))
# Save the file in the correct Location
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
# Process your file already saved
if not places_exist(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
return flask.jsonify({'result':'False', 'message':'There is an affiliation missing from your Place list. Please re-try.'})
return flask.redirect(url_for('home'))
flask.session['filename'] = filename
return flask.jsonify({'result':'True'})
EDIT: You have to be careful if you need to check the file before saving, if you save right away you will override your old file, a good approach will be to save the file in a temp location, check that file and then save in the final directory, and obviously delete the file in the tmp folder.
PS: Also you have 2 return, if you need to respond as a HTML or JSON you have to check the headers of the request.

Related

How to load models into Spark-nlp in Foundry code authoring

I'm trying to load a model from Spark-nlp model hub into a Universal Sentence Encoder Model as shown in the snippet below:
stages = []
documentAssembler = [
DocumentAssembler()
.setInputCol("description")
.setOutputCol("document")
]
stages += documentAssembler
// Where file_path is the location of extracted files of the model
use = [
UniversalSentenceEncoder.load(f"{file_path}")
.setInputCols(["document"])
.setOutputCol("sentence_embeddings")
]
stages += use
The model is downloadable from [Spark-nlp Model Hub - Universal Sentence Encoder] (https://nlp.johnsnowlabs.com/2020/04/17/tfhub_use.html/) The function I use to extract the downloaded model zip file is below:
def get_unzipped_path(dataset_with_model_files):
'''
Return a folder location for a temporary folder after reading file locations
encapsulated within a dataframe
Reference: https://www.palantir.com/docs/foundry/transforms-python/unstructured-files/
'''
input_filesystem = dataset_with_model_files.filesystem()
full_folder_path = input_filesystem.hadoop_path
files = [file_status.path for file_status in input_filesystem.ls('*.zip')]
file_name = files[0]
with dataset_with_model_files.filesystem().open(file_name, mode='rb') as f:
with tempfile.NamedTemporaryFile() as tmp:
shutil.copyfileobj(f, tmp)
tmp.flush()
# file_stem = Path(os.path.basename(file_name)).stem # Remove .zip
unpacked_path = os.path.join(full_folder_path, file_name)
with zipfile.ZipFile(tmp) as zip_ref:
zip_ref.extractall(unpacked_path)
return unpacked_path
The general problem is that although they exist in the temporary folder, they cannot be found, a sample of an error is provided below:
java.io.FileNotFoundException: File file:/data/ssd/01/palantir/foundry~node-manager/data/local-dir/usercache/palantir/appcache/application_1675144414001_39656/container_e103_1675144414001_39656_01_000002/tfhub_use_en_2.4.0_2.4_1587136330099/metadata/part-00000 does not exist
I have tried the solution of code I've provided, i.e. extracting to a temporary folder then feeding that path to the load function
using spark.sql(f"ADD ARCHIVE {zipped_file}") to add the archive which I believe is unpacked and available by SparkFiles.get(f"{file_name}") as shown below:
def get_archive_file_loc(dataset_with_model_files, spark, glob):
'''
Return a file name after submitting archive to the Spark Context
Reference: https://www.palantir.com/docs/foundry/transforms-python/unstructured-files/
'''
input_filesystem = dataset_with_model_files.filesystem()
full_folder_path = os.getcwd()
all_file_names = [
f"{f.path}" for f in input_filesystem.ls(glob=glob)
]
file_name = all_file_names[0]
with dataset_with_model_files.filesystem().open(file_name, mode='rb') as f:
zipped_file = os.path.join(full_folder_path, file_name)
with open(zipped_file, 'wb') as zf:
shutil.copyfileobj(f, zf)
# Check if file has been added to spark context as duplicate
# attempts to add can crash the executors
# if file_name not in os.listdir(SparkFiles.getRootDirectory()):
try:
spark.sql(f"ADD ARCHIVE {zipped_file}") # noqa
except Exception as e:
pass
return file_name
Has anyone been successful or has an idea of how to load the model in Spark-nlp?

csv file isn't saved in different directory in python

My code reads a bunch of json files from a directory and extract "frequency" and "attenuation" data from those files and write to a csv file. Now I want to save that csv file in a different directory. The code executes without any error but saves in the current directory. Can anyone help to resolve this issue?
import csv
import glob
import json
import os
site = 'alpha'
frequency_to_check = '196050.000'
json_dir_name = 'V:/temp/test/'
json_pattern = os.path.join(json_dir_name, '*.json')
total_files = glob.glob(json_pattern)
atten = []
timestamp = []
save_path = 'V:/python/result/'
if not os.path.isdir(save_path):
os.makedirs(save_path)
filename = f'{site}-{frequency_to_check}.csv'
with open(filename, 'w', newline='') as csv_file:
for file in total_files:
with open(file) as json_file:
output_json = json.load(json_file)
for key in output_json:
if key['start-freq'] == frequency_to_check:
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
save_file = os.path.join(save_path, filename)
csv_file.close()
print(f'Total files processed {len(total_files)}')
The issue as far as I can deduce is here :
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
csv_file is your object that is loaded into memory , and everytime this line is executed you are just writing the rows in the already open file. After that you are just creating a new path :
save_file = os.path.join(save_path, filename)
which is never really used as you close the file too.
To fix this I would suggest that you put save_path as csv file :
import csv
import glob
import json
import os
site = 'alpha'
frequency_to_check = '196050.000'
json_dir_name = 'V:/temp/test/'
json_pattern = os.path.join(json_dir_name, '*.json')
total_files = glob.glob(json_pattern)
atten = []
timestamp = []
save_path = 'V:/python/result/'
if not os.path.isdir(save_path):
os.makedirs(save_path)
filename = f'{site}-{frequency_to_check}.csv'
save_file = os.path.join(save_path, filename)
with open(save_file, 'w', newline='') as csv_file:
for file in total_files:
with open(file) as json_file:
output_json = json.load(json_file)
for key in output_json:
if key['start-freq'] == frequency_to_check:
csv.writer(csv_file).writerow([key['start-freq'], key['attenuation']])
csv_file.close()
print(f'Total files processed {len(total_files)}')
I guess this should work.

Return file parse status django ajax

I have a script which is parsing and doing some actions on a json dir.
I want to show file status with current number of file being parsing.
here is what i do.
def post(self, request, *args, **kwargs):
self.response = _.default_response()
unlabeled = _.path_to_dict(settings.UNLABELEDJSONS)
for dirs in unlabeled['children']:
for jsonFile in dirs['children']:
if jsonFile['type']=='file':
#file current path with generated path.
filepath = "%s/%s/%s" % (settings.UNLABELEDJSONS,dirs['name'], jsonFile['name'])
targetpath = "%s/%s" % (settings.DATAFILESGENERATED, jsonFile['name'])
#Let's generate file.
_.call_testing_script(settings.GENERATEDRDA, filepath, targetpath)
#File generated let's move file.
shutil.move(targetpath, settings.DATAFILES)
#file is moved now and new path is existpath.
existpath = "%s/%s" % (settings.DATAFILES, jsonFile['name'])
#let's know database what we generated.
_.processDataFile(existpath,jsonFile['name'])
#We are done
self.response['status'] = True
return _.serialize_response(self.response)
Now i want to show current number of file being parsing IN THE UI
total number of files IN THE UI
and other information IN THE UI
Q: what library or method should I use?

Download files from FTP or HTTP using filenames in CSV - Python 3

I have a csv file of products for an ecommerce site I'm working on, as well as FTP access to the corresponding images for each product (~15K products).
I would like to use Python to pull only the images listed in the csv from either the FTP or HTTP and save them locally.
import urllib.request
import urllib.parse
import re
url='http://www.fakesite.com/pimages/filename.jpg'
split = urllib.parse.urlsplit(url)
filename = split.path.split("/")[-1]
urllib.request.urlretrieve(url, filename)
print(filename)
saveFile = open(filename,'r')
saveFile.close()
import csv
with open('test.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
images = []
for row in readCSV:
image = row[14]
print(image)
The code I have currently can pull the filename from the URL and save the file as that filename. It can also pull the filename of the image from the CSV file. (filename and image are the exact same) What I need it to do, is input the filename, from the CSV into the end of the URL, and then save that file as the filename.
I have graduated to this:
import urllib.request
import urllib.parse
import re
import os
import csv
with open('test.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
images = []
for row in readCSV:
image = row[14]
images.append(image)
x ='http://www.fakesite.com/pimages/'
url = os.path.join (x,image)
split = urllib.parse.urlsplit(url)
filename = split.path.split("/")[-1]
urllib.request.urlretrieve(url,filename)
saveFile = open(filename,'r')
saveFile.close()
Now this is great. It works perfectly. It is pulling the correct filename out of the CSV file, adding it on to the end of the URL, downloading the file, and saving it as the filename.
However, I can't seem to figure out how to make this work for more than one line of the CSV file. As of now, it takes that last line, and pulls the relevant information. Ideally, I would use the CSV file with all of the products on it, and it would go through and download every single one, not just the last image.
You're doing strange things ...
import urllib.request
import csv
# the images list should be outside the with block
images = []
IMAGE_COLUMN = 14
with open('test.csv') as csvfile:
# read csv
readCSV = csv.reader(csvfile, delimiter=",")
for row in readCSV:
# I guess 14 is the column-index of the image-name like 'image.jpg'
# I've put it in some constant
# now append all the image-names into the list
images.append(row[IMAGE_COLUMN])
# no need for the following
# image = row[14]
# images.append(image)
# make sure, root_url ends with a slash
# x was some strange name for an url
root_url = 'http://www.fakesite.com/pimages/'
# iterate through the list
for image in images:
# you don't need os.path.join, because that's operating system dependent.
# you don't need to urlsplit, because you have created the url yourself.
# you don't need to split the filename as it is the image name
# with the following line, the root_url must end with a slash
url = root_url + image
# urlretrieve saves the file as whatever image is into the current directory
urllib.request.urlretrieve(url, image)
or in short, that's all you need:
import urllib.request
import csv
IMAGE_COLUMN = 14
ROOT_URL = 'http://www.fakesite.com/pimages/'
images = []
with open('test.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=",")
for row in readCSV:
images.append(row[IMAGE_COLUMN])
for image in images:
url = ROOT_URL + image
urllib.request.urlretrieve(url, image)

Python Import csv file as dataframe with File chooser

I would like to import a csv file into python with FileChooser and display it as dataframe. Here is the code and it didn't work. Thanks for your kind help.
def get_open_filename(self):
filename = None
chooser = gtk.FileChooserDialog("Open File...", self.window,
gtk.FILE_CHOOSER_ACTION_OPEN,
(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
gtk.STOCK_OPEN, gtk.RESPONSE_OK))
response = chooser.run()
if response == gtk.RESPONSE_OK:
with open(chooser.get_filename(), 'rb') as csvfile:
don = DataFrame.from_csvfile(csvfile) ## I am confused here !!!
print don
chooser.destroy()
return filename
I believe from_csv file takes a filename not a file, using these docs
Try replacing
with open(chooser.get_filename(), 'rb') as csvfile:
don = DataFrame.from_csvfile(csvfile) ## I am confused here !!!
print don
with
don = DataFrame.from_csvfile(chooser.get_filename())
print don