I am making a dashboard in Python using Dash. I have a couple of questions about my code and how to improve it.
Context:
I have a lot of data (like 12M+ rows and ~30 columns) that I put into a SQLite database. The SQLite database is still pretty big (like 2GB). I don't need all of this data, and I have a query that does transformations in there (summing up columns grouped by a date field) which I then use for my dashboard for charts etc.
I want to give the user of the dashboard some flexibility to choose some filters to give a different type of view. For example, only selecting customers with "Product A" or only selecting male customers etc. So I have some dropdowns to select those options, which should then update the charts. This means that the SQL query gets re-run using these new filters.
Questions
Is there a faster way to do this? What is weird is that when I run the query on my local machine, it takes less than 1 minute to run. But when I run the dashboard, when I update the drop downs, it takes more like 5-6 minutes, which will be unfeasible to use.
It seems like when I quickly change two or more options on the drop downs, it takes even longer to update. I'm guessing because it does each update separately? So it would be running the query multiple times? Is there a way to incorporate a button that will run the callback only when I hit it with the new inputs?
Code for reference:
import os
import dash
import dash_table
import dash_core_components as dcc
import dash_html_components as html
import plotly.graph_objects as go
import pandas as pd
import sqlite3
app = dash.Dash()
app.layout = html.Div(children=[
html.H1(children='Dashboard'),
html.Div(children='''
Dashboard)
'''),
dcc.Dropdown(
id='filter1',
options=[
{'label': 'Filter1Option1', 'value': 1},
{'label': 'Filter1Option2', 'value': 0}
],
value=[0,1],
multi=True
),
dcc.Dropdown(
id='filter2',
options=[
{'label': 'Filter2Option1', 'value': 1},
{'label': 'Filter2Option2', 'value': 0}
],
value=[0,1],
multi=True
),
dcc.Graph(
id='myGraph'
)
])
#app.callback(
dash.dependencies.Output('myGraph', 'figure'),
[dash.dependencies.Input('filter1', 'value'),
dash.dependencies.Input('filter2', 'value')])
def update_everything(filter1, filter2):
conn = sqlite3.connect('dashboard_data.sqlite')
query = (
"""
QUERY BASED ON FILTERS. INVOLVES SOME CALCULATIONS AND GROUPING BY A DATE FIELD
"""
)
df = pd.read_sql(query, conn)
conn.close()
fig = go.Figure(data=go.Scatter(x=df['DATE'], y=df['METRIC']))
fig.update_layout(
yaxis_tickformat='%',
title='My Graph'
)
return fig
if __name__ == '__main__':
app.run_server(debug=True, host='0.0.0.0', port=8050)
Related
I have an animated graph that I update in a clientside callback. However, I want to update the text as well as the x and y values of the traces in Graph.extendData(), but it seems that that doesn't work. Is there something I'm missing? Alternatively, is there a different method I should be using instead?
Adopting the code from this post (Plotly/Dash display real time data in smooth animation), I'd like something like this, but where updating the text with extendData actually worked:
import dash
import dash_html_components as html
import dash_core_components as dcc
import numpy as np
from dash.dependencies import Input, Output, State
# Example data (a circle).
resolution = 1000
t = np.linspace(0, np.pi * 2, resolution)
x, y = np.cos(t), np.sin(t)
text = str(t)
# Example app.
figure = dict(data=[{'x': [], 'y': []}], text = [], layout=dict(xaxis=dict(range=[-1, 1]), yaxis=dict(range=[-1, 1])))
app = dash.Dash(__name__, update_title=None) # remove "Updating..." from title
app.layout = html.Div([
dcc.Graph(id='graph', figure=dict(figure)), dcc.Interval(id="interval", interval=25),
dcc.Store(id='offset', data=0), dcc.Store(id='store', data=dict(x=x, y=y, text=text, resolution=resolution)),
])
# This makes the graph fail to draw instead of just extending the text as wel!
app.clientside_callback(
"""
function (n_intervals, data, offset) {
offset = offset % data.x.length;
const end = Math.min((offset + 10), data.x.length);
return [[{x: [data.x.slice(offset, end)], y: [data.y.slice(offset, end)], text: [data.text.slice(offset, end)]}, [0], 500], end]
}
""",
[Output('graph', 'extendData'), Output('offset', 'data')],
[Input('interval', 'n_intervals')], [State('store', 'data'), State('offset', 'data')]
)
if __name__ == '__main__':
app.run_server()
Alternatively, is there a different method I should be using instead?
The dashboard include a dropdown, a table and a graph.
By selecting a item from dropdown, some rows of the table is shown and figure plot the data in the table.
I have two callback,
at the first one:
input : dropdown
output: table
second one:
input: table
output: figure
So the output of first callback is input of the second one.
The following code output look like this:
import dash
import dash_table
import pandas as pd
from copy import deepcopy
import plotly.graph_objects as go
import dash_core_components as dcc
import dash_html_components as html
import dash_bootstrap_components as dbc
from dash.dependencies import Input, Output
data = pd.DataFrame({"a": ["a1", "a2"],
"b": [1, 2]})
app = dash.Dash(
__name__,
external_stylesheets=[dbc.themes.BOOTSTRAP],
prevent_initial_callbacks=True,
)
app.layout = dbc.Container(
[
dbc.Row(dbc.Col([dcc.Dropdown(
id='dropdown1',
options=[
{'label': '1', 'value': 1},
{'label': '2', 'value': 2}
],
value=1)])),
dbc.Row(
dbc.Col([
dash_table.DataTable(
id="datatable",
columns=[dict(id=i,
name=j,
)
for i, j in zip(
["a", "b"],
["a", "b"],
)],
data=data.loc[data['a'] ==
"a1"].to_dict("records"),
sort_mode="single",
)
])),
dbc.Row([dbc.Col([html.Div(
children=dcc.Graph(id="graph"), className="card")
])]),
], fluid=True,
)
########################################################
#app.callback(
Output("datatable", "data"),
[Input("dropdown1", "value")],
prevent_initial_call=True
)
def update_current_table(value):
idx = int(value)
print(idx)
if idx == 1:
df = (data.loc[data["a"] == "a1"])
return df.to_dict("records")
else:
df = deepcopy(data.loc[data["a"] == "a2"])
return df.to_dict("records")
########################################################
# app.callback(
Output('graph', 'figure'),
[Input("datatable", "derived_virtual_data"),
Input("dropdown1", "value")
],
prevent_initial_call=True
)
def update_figure(table, value):
df = pd.DataFrame(table)
print(df)
fig = go.Figure()
return fig
if __name__ == "__main__":
app.run_server(debug=True, port=8000)
The problem here is callback run twice:
a b
0 a1 1
a b
0 a2 2
and for the first time print the previous values of the table
This cause error in the main code that I am working on.
How to prevent dash from runnig callback twice?
I found some other questions complaining about running the callback twic, but I could not found a proper solution for that.
The problem is that you have two inputs for the callback that's running twice:
# app.callback(
Output('graph', 'figure'),
[Input("datatable", "derived_virtual_data"),
Input("dropdown1", "value")
],
prevent_initial_call=True
)
def update_figure(table, value):
The dropdown changes, and triggers this callback, but it also triggers the table to update. The updated table causes the derived_virtual_datato trigger this callback again. You can fix this by making the dropdown value a State instead, like this:
# app.callback(
Output('graph', 'figure'),
[Input("datatable", "derived_virtual_data")],
[State("dropdown1", "value"),]
prevent_initial_call=True
)
def update_figure(table, value):
Edit:
On another read through, you don't even use the value from the dropdown in the second input. Just removing it entirely will also work:
# app.callback(
Output('graph', 'figure'),
[Input("datatable", "derived_virtual_data")],
prevent_initial_call=True
)
def update_figure(table):
I'm attempting to implement a live-update graph that is only displayed if the user selected it from the drop down menu. I've made a callback with input from "intervals" and from the dropdown menu "oxygen". I thought if "value" is "0" then no chart has been selected and I could return "null"(???), and if 'value' = '1', then the graph would update. Is this possible? or is my approach incorrect?
dcc.Graph(id='live-graph2', animate=False),
dcc.Interval(
id='graph-update',
interval=10000,
n_intervals=0
)),
dbc.Col(html.Div([
dcc.Dropdown(
id='oxygen',
options=[{'label': s, 'value': s}
for s in main_graph.keys()],
value=[],
multi=True),
#app.callback(
dash.dependencies.Output('live-graph2', 'figure'),
[dash.dependencies.Input('oxygen', 'value'),
dash.dependencies.Input('graph-update', 'n_intervals')],
)
def update_graph_scatter_2(n,value):
if value == 0:
....
else:
data = {}
I would approach the problem differently.
If you don't want to display the graph at all when no dropdown option is selected then the Graph component doesn't need to be in the layout by default.
Instead you could create a container component that your Graph component is appended to dynamically based on the option values of your Dropdown component.
Below is a simplified example using sample data, adjust per your requirements:
import dash
import dash_html_components as html
import dash_core_components as dcc
import plotly.express as px
import dash_bootstrap_components as dbc
df = px.data.iris()
app = dash.Dash(__name__)
app.layout = html.Div(
[
dcc.Interval(id="graph-update", interval=10000, n_intervals=0),
html.Div(id="graph-container"),
dbc.Col(
html.Div(
[
dcc.Dropdown(
id="oxygen",
options=[{"label": s, "value": s} for s in [1, 2, 3]],
value=[],
multi=True,
)
]
)
),
]
)
#app.callback(
dash.dependencies.Output("graph-container", "children"),
[
dash.dependencies.Input("oxygen", "value"),
dash.dependencies.Input("graph-update", "n_intervals"),
],
)
def update_graph_scatter_2(value, n):
if value:
# Change the line below to dynamically create the figure based on value
fig = px.scatter(df, x="sepal_width", y="sepal_length")
return dcc.Graph(id="live-graph2", figure=fig)
return html.Div()
I was googl'ing around trying to find solution for the following question (no luck so far). I am using Plotly Dash callback in order to build a graph:
#app.callback(
Output("graph", "figure"),
[Input("some-input", "value")],
[State("some-state", "value")])
def build_graph(input_value, state_value):
// computing data for graph_figure
return graph_figure
Now, I want to have another callback which will highlight a specific point on the graph (or add/remove a point) based on some input condition. I struggle to figure out what to use for Output in this case? Because I cannot output graph.figure again (Dash does not allow output to the same component from different callbacks). And re-drawing entire graph seems to be inefficient.
I will appreciate any suggestions.
It is possible to change the layout of your graph without redrawing the whole graph by using this library: https://github.com/jimmybow/mydcc
There is an example how to use it here: https://github.com/jimmybow/mydcc#3-mydccrelayout-
I have prepared a small example that adds an annotation on button click. Don't forget to pip install mydcc beforehand. I had to add a cache - in form of an invisible div - to preserve the old annotations when adding a new one.
import dash
import dash_core_components as dcc
import dash_html_components as html
import mydcc
import random
import json
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
initial_layout = {'title': 'Dash Data Visualization'}
app.layout = html.Div(children=[
html.H1(children='Hello Dash'),
html.Div(children='''
Dash: A web application framework for Python.
'''),
dcc.Graph(
id='example-graph',
figure={
'data': [{'x': [1, 2, 3], 'y': [4, 1, 2], 'name': 'Test'}],
'layout': initial_layout
}
),
mydcc.Relayout(id="rrr", aim='example-graph'),
html.Button('Add random annotation', id='button'),
html.Div(id='user-cache', style={'display': 'none'},
children=json.dumps(initial_layout)),
])
#app.callback(
[dash.dependencies.Output('rrr', 'layout'),
dash.dependencies.Output('user-cache', 'children')],
[dash.dependencies.Input('button', 'n_clicks')],
[dash.dependencies.State('user-cache', 'children')])
def update_graph_annotations(n_clicks, layout):
if n_clicks is not None:
layout = json.loads(layout)
if not 'annotations' in layout:
layout['annotations'] = []
layout['annotations'].append(dict(
x=random.uniform(0, 1) * 2 + 1,
y=random.uniform(0, 1) * 2 + 1,
xref="x",
yref="y",
text="Annotation" + str(n_clicks),
showarrow=True
))
return layout, json.dumps(layout)
return dash.no_update, dash.no_update
if __name__ == '__main__':
app.run_server(debug=True)
This is a very strange bug with plotly dash: downloaded files are not updated even when the source code is changed. I am using Mac OS X, and find the bug when using chrome or firefox but it works correctly using safari. The problem is that the downloaded file does not change after the code is updated.
import dash
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import numpy as np
import flask
import io
from dash.dependencies import Input, Output, State
# import plotly.graph_objs as go
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([
dcc.Dropdown(
id='dropdown',
options=[
{'label': 'New York City', 'value': 'NYC'},
{'label': 'Montreal', 'value': 'MTL'},
{'label': 'San Francisco', 'value': 'SF'}
],
value='NYC'
),
html.A('Download',
id='download',
href=''),
# dcc.Store(id='data_store',storage_type='memory'),
])
# Calculate the data and store it.
#app.callback(
Output('download', 'href'),
[Input('dropdown', 'value')])
def update(value):
return '/download_csv/get_it?value={}'.format(value)
#app.server.route('/download_csv/get_it')
def download_excel():
param = flask.request.args
print(param)
# Dummy dataframe for downloading.
d = {'col1': param['value'],'col2': 'Did it change?'}
df = pd.DataFrame(data=d,index=[0])
#Convert DF
str_io = io.StringIO()
df.to_csv(str_io, sep=",")
mem = io.BytesIO()
mem.write(str_io.getvalue().encode('utf-8'))
mem.seek(0)
str_io.close()
return flask.send_file(mem,
mimetype='text/csv',
attachment_filename='downloadFile.csv',
as_attachment=True)
if __name__ == '__main__':
app.run_server(debug=True)
Running this file, we are able to download a csv. However, when using chrome, if the attachment_filename is changed to 'donwloadFile_new.csv' and the download button is pressed again, I still get 'donwloadFile.csv' as the downloaded file! Also, the contents of the downloaded file don't change either.
However, in safari the download function works as expected: file names and contents are updated.
I just answered a similar question here.
you need to specify the cache_timeout parameter in send_file function.