How to make and download pdf-file in Dash app in memory - plotly-dash

I have a dash app where I want the user to be able to download a pdf-report.
I am able to make it work locally with pdfkit and jinja based on this method, but to make it work I have to save the file on the server, then fetch it for download. My users might not want this, so I want to save the temporary file only in memory (client side).
This is my code:
from dash import Dash, dcc, html, Input, Output
import pdfkit
import jinja2
# Set up sample pdf with pdfkit and jinja based on this tutorial: https://www.youtube.com/watch?v=1IYtkkEOuoU
my_name = "Frank "
item1 = "TV"
today_date = "2022"
context = {'my_name': my_name,
'item1': item1,
'today_date': today_date}
template_loader = jinja2.FileSystemLoader('./')
template_env = jinja2.Environment(loader=template_loader)
html_template = 'my-basic-template.html'
template = template_env.get_template(html_template)
output = template.render(context)
# downloaded wkhtmltopdf locally:
path_wkhtmltopdf = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
config = pdfkit.configuration(wkhtmltopdf=path_wkhtmltopdf)
output_pdf = 'pdf_generated.pdf'
pdfkit.from_string(output, output_pdf, configuration=config)
# Make Dash app
app = Dash(__name__)
app.layout = html.Div([
html.Button("Download pdf", id="btn-download-pdf"),
dcc.Download(id="download-pdf")
])
#app.callback(
Output("download-pdf", "data"),
Input("btn-download-pdf", "n_clicks"),
prevent_initial_call=True,
)
def func(n_clicks):
pdf = pdfkit.from_string(output, configuration=config)
print(output)
return dcc.send_file("pdf_generated.pdf", "test_pdf.pdf")
if __name__ == '__main__':
app.run_server(debug=True)
Is there a better way to do this?

Related

Flask form does not let me upload TSV files

I have the following flask app where I want to be able to upload a TXT or TSV file to a form. The problem is, when I try to upload a TXT file, it works, but when I try to upload a TSV file, I get the following error:
File "/Users/cdastmalchi/Desktop/author_script/main.py", line 89, in process_file
if not places_exist(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
File "/Users/cdastmalchi/Desktop/author_script/main.py", line 27, in places_exist
infile = open(filename, 'rU')
IOError: [Errno 2] No such file or directory: './Authors_Template.tsv'
Authors_Template.tsv is a template file that gets downloaded from the form and goes into the Downloads, and then I want users to be able to edit this template and then re-upload it. When I make the template Authors_Template.txt instead and then Download and re-upload it, it works. How can I solve this problem? I've even tried narrowing down the ALLOWED_EXTENSIONS list to just TSV and I still get the same issue.
app.py
from werkzeug.utils import secure_filename
import flask, string, random
import json
import subprocess
import os
import re
import time
UPLOAD_FOLDER = '.'
ALLOWED_EXTENSIONS = set(['txt','tsv'])
app = flask.Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.secret_key = ''.join(random.choice(string.ascii_letters) for _ in range(20)) #needed to use flask.session
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS
def places_exist(filename):
infile = open(filename, 'rU')
placeDict = {}
addresses_temp = []
addresses = []
places_temp =[]
places = []
places_exist = True
for i in infile:
item = i.rstrip("\n").split("\t")
places_temp.append(item[0])
addresses_temp.append(item[1])
p_index = (places_temp.index('Place')) + 1
a_index = (addresses_temp.index('Address')) + 1
places = places_temp[p_index:]
addresses = addresses_temp[a_index:]
infile.close()
infile = open(filename, 'rU')
return places_exist
#app.route('/', methods=['GET'])
def home():
return flask.render_template('index.html')
#app.route('/process_file', methods=['POST'])
def process_file():
#here, you can run all the checks as before, but instead of flash, you can return jsonified results to read in the front-end
if 'file' not in flask.request.files or not flask.request.files['file'].filename:
return flask.jsonify({'result':'False', 'message':'no files selected'})
return flask.redirect(url_for('home'))
file = flask.request.files['file']
filename = secure_filename(file.filename)
if not allowed_file(file.filename):
return flask.jsonify({'result':'False', 'message':'Must be TXT file!'})
return flask.redirect(url_for('home'))
if not places_exist(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
return flask.jsonify({'result':'False', 'message':'There is an affiliation missing from your Place list. Please re-try.'})
return flask.redirect(url_for('home'))
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
flask.session['filename'] = filename
return flask.jsonify({'result':'True'})
UPDATE:
def process_file():
#here, you can run all the checks as before, but instead of flash, you can return jsonified results to read in the front-end
if 'file' not in flask.request.files or not flask.request.files['file'].filename:
return flask.jsonify({'result':'False', 'message':'no files selected'})
return flask.redirect(url_for('home'))
file = flask.request.files['file']
filename = secure_filename(file.filename)
if not allowed_file(file.filename):
return flask.jsonify({'result':'False', 'message':'Must be TXT file!'})
return flask.redirect(url_for('home'))
# Save the file in the temp folder
file.save(os.path.join(app.config['TEMP_FOLDER'], filename))
# Process the file
if not places_exist(os.path.join(app.config['TEMP_FOLDER'], filename)):
return flask.jsonify({'result':'False', 'message':'There is an affiliation missing from your Place list. Please re-try.'})
return flask.redirect(url_for('home'))
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
flask.session['filename'] = filename
return flask.jsonify({'result':'True'})
You are trying to read a file before its writing in your directory. First you need to save the file in your application upload directory then read it.
def process_file():
# here, you can run all the checks as before, but instead of flash, you can return jsonified results to read in the front-end
if 'file' not in flask.request.files or not flask.request.files['file'].filename:
return flask.jsonify({'result':'False', 'message':'no files selected'})
return flask.redirect(url_for('home'))
file = flask.request.files['file']
filename = secure_filename(file.filename)
if not allowed_file(file.filename):
return flask.jsonify({'result':'False', 'message':'Must be TXT file!'})
return flask.redirect(url_for('home'))
# Save the file in the correct Location
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
# Process your file already saved
if not places_exist(os.path.join(app.config['UPLOAD_FOLDER'], filename)):
return flask.jsonify({'result':'False', 'message':'There is an affiliation missing from your Place list. Please re-try.'})
return flask.redirect(url_for('home'))
flask.session['filename'] = filename
return flask.jsonify({'result':'True'})
EDIT: You have to be careful if you need to check the file before saving, if you save right away you will override your old file, a good approach will be to save the file in a temp location, check that file and then save in the final directory, and obviously delete the file in the tmp folder.
PS: Also you have 2 return, if you need to respond as a HTML or JSON you have to check the headers of the request.

User to download file to their local directory

Backend - I wrote a python script that creates a csv file after some aggregation.
Frontend - Once the method finished running and the .csv file is generated and saved to a directory in the server, I want to be able to prompt the user to save the .csv file on their local computer (just like the windows prompt you get when you press "save as..." on a webpage).
This is an example of what I've done so far from what I learned in Return Excel file in Flask app and Download a file when button is pressed on web application? :
Sample code:
with open(save_path + unique_filename + ".csv", 'w', encoding = 'utf8') as g:
writer = csv.writer(g, lineterminator = '\n')
writer.writerow(['name', 'place', 'location'])
HTML:
#app.route('/login', method='POST')
def do_login():
category = request.forms.get('category')
return '''
<html><body>
Hello. Save Results
</body></html>
'''
#app.route("/getCSV", methods = ['GET', 'POST'])
def getPlotCSV():
return send_from_directory(save_path + unique_filename + ".csv", as_attachment=True)
if __name__ == "__main__":
run(app, host = 'localhost', port = 8000)
My questions are:
1) send_from_directory is from flask, what is the bottle equivalent?
2) Where in the code do I place the csv I created so the user can download it to their local machine?
3) What else is wrong with my code?
Bottle example: From https://bottlepy.org/docs/dev/tutorial.html
#route('/download/<filename:path>')
def download(filename):
return static_file(filename, root='/path/to/static/files', download=filename)

How to open multiple json files in Python?

I have to open a lot of json files in python. The following code works fine for a small amount of json files. But im already waiting 6 hours now and it's still not working. Im sure there should be a faster way than this.
base_dir = 'All Datasets EDIT/airlinesjson'
json_data_firstmonth2 = pd.DataFrame()
json_data_fmnoreset = pd.DataFrame()
for file in os.listdir(base_dir):
if 'json' in file:
json_path = os.path.join(base_dir, file)
json_data = pd.read_json(json_path, lines=True)
json_data_fmnoreset = pd.concat([json_data_fmnoreset,json_data], sort=False)
json_data_firstmonth2 = json_data_fmnoreset.reset_index()
Try to use this piece of code
json_list = [f for f in os.listdir(base_dir) if f.endswith('.json')]
for i in json_list:
with open(base_dir+ i) as json_file:
data = json.load(json_file)
...

Scrapinghub plugs my results in the log and not in item

I have a functioning spider project to extract urls content (no css). I crawled several set of data and stored them in a series of .csv files. Now I try to set it up to work on Scrapinghub in order to go for a long run scraping.
So far, I am able to get the spider uploaded and work on Scrapinghub. My problem is the result appears in the 'log' and not under the 'item'. The amount of data exceeds the log capacity and thus gives me an error.
How can I set my pipelines/extractor to work and return a js or csv file? I am happy with a solution that have the scraped data to be sent to a database. As I failed to achieve that too.
Any guidance is appreciated.
The spider:
class DataSpider(scrapy.Spider):
name = "Data_2018"
def url_values(self):
time = list(range(1538140980, 1538140820, -60))
return time
def start_requests(self):
allowed_domains = ["https://website.net"]
list_urls = []
for n in self.url_values():
list_urls.append("https://website.net/.../.../.../all/{}".format(n))
for url in list_urls:
yield scrapy.Request(url=url, callback=self.parse, dont_filter=True)
def parse(self, response):
data = response.body
items = positionsItem()
items['file'] = data
yield items
The pipeline
class positionsPipeline(object):
def process_item(self, item, spider):
return item
The settings
BOT_NAME = 'Positions'
SPIDER_MODULES = ['Positions.spiders']
NEWSPIDER_MODULE = 'Positions.spiders'
USER_AGENT = get_random_agent()
ROBOTSTXT_OBEY = True
CONCURRENT_REQUESTS = 32
DOWNLOAD_DELAY = 10
SPIDER_MIDDLEWARES = {
'Positions.middlewares.positionsSpiderMiddleware': 543,
}
DOWNLOADER_MIDDLEWARES = {
'Positions.middlewares.positionsDownloaderMiddleware': 543,
}
ITEM_PIPELINES = {
'Positions.pipelines.positionsPipeline': 300,
}
HTTPCACHE_ENABLED = True
HTTPCACHE_EXPIRATION_SECS = 0
HTTPCACHE_DIR = 'httpcache'
HTTPCACHE_IGNORE_HTTP_CODES = []
HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
the item
class positionsItem(scrapy.Item):
file = scrapy.Field()
Scrapinghub log shows:
13: 2019-02-28 07:46:13 ERROR Rejected message because it was too big: ITM {"_type":"AircraftpositionsItem","file":"{\"success\":true,\"payload\":{\"aircraft\":{\"0\":{\"000001\":[null,null,\"CFFAW\",9.95729,-84.1405,9500,90,136,1538140969,null,null,\"2000\",\"2-39710687\",[9.93233,-84.1386,277]],\"000023\":[\"ULAC\",null,\"PH4P4\",
From your settings file it looks like there isn't a predefined feed output mechanism for Scrapy to use. It's odd that it worked the first time locally (in producing a .csv file).
In any case, here's the extra lines in settings.py you need to add for the Scrapy to work. If you just want to feed the output locally to a .csv file:
# Local .csv version
FEED_URI = 'file://NAME_OF_FILE_PATH.csv'
FEED_FORMAT = 'csv'
I also use this version for uploading a json file to an S3 bucket
# Remote S3 .json version
AWS_ACCESS_KEY_ID = YOUR_AWS_ACCESS_KEY_ID
AWS_SECRET_ACCESS_KEY = YOUR_AWS_SECRET_ACCESS_KEY
FEED_URI = 's3://BUCKET_NAME/NAME_OF_FILE_PATH.json'
FEED_FORMAT = 'json'

Return file parse status django ajax

I have a script which is parsing and doing some actions on a json dir.
I want to show file status with current number of file being parsing.
here is what i do.
def post(self, request, *args, **kwargs):
self.response = _.default_response()
unlabeled = _.path_to_dict(settings.UNLABELEDJSONS)
for dirs in unlabeled['children']:
for jsonFile in dirs['children']:
if jsonFile['type']=='file':
#file current path with generated path.
filepath = "%s/%s/%s" % (settings.UNLABELEDJSONS,dirs['name'], jsonFile['name'])
targetpath = "%s/%s" % (settings.DATAFILESGENERATED, jsonFile['name'])
#Let's generate file.
_.call_testing_script(settings.GENERATEDRDA, filepath, targetpath)
#File generated let's move file.
shutil.move(targetpath, settings.DATAFILES)
#file is moved now and new path is existpath.
existpath = "%s/%s" % (settings.DATAFILES, jsonFile['name'])
#let's know database what we generated.
_.processDataFile(existpath,jsonFile['name'])
#We are done
self.response['status'] = True
return _.serialize_response(self.response)
Now i want to show current number of file being parsing IN THE UI
total number of files IN THE UI
and other information IN THE UI
Q: what library or method should I use?