Understand python code to create population pyramid - bar-chart

I've found this really cool plot (matplotlib visualizations) to make a population pyramid. I would like to change the condition for the colors. Instead of men/women, I want to have the whole bars in the same color and this color depends on the tag that I write on Gender. So, some whole bars in blue and the rest in purple.
But I don't understand how the colors are generated.
I imagined that removing the .unique() in df[group_col].unique() would work, but it doesn't plot anything when I switch the tag.
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
# Read data
df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/email_campaign_funnel.csv")
# Draw Plot
plt.figure(figsize=(13,10), dpi= 80)
group_col = 'Gender'
order_of_bars = df.Stage.unique()[::-1]
colors = [plt.cm.Spectral(i/float(len(df[group_col].unique())-1)) for i in range(len(df[group_col].unique()))]
for c, group in zip(colors, df[group_col].unique()):
sns.barplot(x='Users', y='Stage', data=df.loc[df[group_col]==group, :], order=order_of_bars, color=c, label=group)
# Decorations
plt.xlabel("$Users$")
plt.ylabel("Stage of Purchase")
plt.yticks(fontsize=12)
plt.title("Population Pyramid of the Marketing Funnel", fontsize=22)
plt.legend()
plt.show()
Pyramid plot
Any idea how to do that?

Related

How to round the borders of a card in a Dash app

I have the following code which gives me a scatter plot on a dbc.Card element within Dash. I'm trying to figure out what I need to do in order to make the corners of the card rounded.
Here's the code I have currently:
import dash
from dash import dcc
from dash import html
from dash import Input, Output
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd
# import the Caffeine dataset
caffeine = pd.read_csv('caffeine.csv')
# create scatter plot
scatter = px.scatter(caffeine,
x='Volume (ml)',
y='Caffeine (mg)',
color='type',
title='Volume x Caffeine',
height = 800,
hover_data=['drink'],
template='plotly_dark'
)
# layout of Dash app
pp.layout = dbc.Container([
dbc.Row(
dbc.Col(html.H1("Analysis of Caffeinated Drinks",
className='text-center mb-4',),
width=12
)
),
dbc.Row(
dbc.Col(
dbc.Card(
dbc.CardBody(
dcc.Graph(
figure=scatter
)
)
),
width={'size':10, 'offset':1}
)
)
I've tried using the answer on this stackoverflow question (How to round the corners of a card (body and image) in Bootstrap 4?) and as such I tried inserting a style={border-radius: '20px'} in a couple of different places but haven't had any luck producing a change.
Is there another method I could/should use, or else could someone please show me exactly what to do? I'm pretty new to Dash as well as html and css.
Many, many thanks in advance!
If you set the style in a CSS file, you need to use border-radius: 20px;, but if you set the style directly on the dbc.Card component, you have to set borderRadius=20.
I would suggest you add in your style.css a class called something like card-style and there, you will add all the CSS stuff
So, in your code, you will add the class in the className of the dbc.Card
app.layout = dbc.Container([
dbc.Row(
dbc.Col(html.H1("Analysis of Caffeinated Drinks",
className='text-center mb-4',),
width=12
)
),
dbc.Row(
[(dbc.Col(
dbc.Card(
dbc.CardBody(
html.Div(f"card {n}")
),
className="card-style"
),
width={'size':3, 'offset':1}
)) for n in range(8)]
)])
And in your assets/style.css you will add something like:
.card-style {
border: solid black 1px;
border-radius: 8px;
height: 120px;
}
It will help you to not repeat yourself by simply creating this kind of class for your elements.
You can check and test the HTML elements by pressing F12 and selecting what you're interested in;
The output of the code above is the above

Catboost calc_feature_statistics not plotting

When running the example script that is given on the catboost website (https://catboost.ai/docs/concepts/python-reference_catboost_calc_feature_statistics.html), no plot appears at the end, but only empty space. Any suggestions what could be the cause for this?
from catboost import CatBoost
import numpy as np
train_data = np.random.rand(200, 10)
label_values = np.random.rand(200)
model = CatBoost()
model.fit(train_data, label_values)
res = model.calc_feature_statistics(train_data,
label_values,
feature=2,
plot=True)
If you are not using jupyter the plot will not render by default in your console.
In my case, I use spyder and stumbled with the same issue.
The easiest is to save the plot to disk instead of trying to show it in the terminal.
Just add the plot_file argument instead:
res = model.calc_feature_statistics(train_data,
label_values,
feature=2,
plot_file="nameofyourplot.html)

Bokeh: Slider is not updating results on Hbar plot

I wrote the following code for using a slider to filter and update values on a Hbar plot in Bokeh.
The plot (as shown in the picture) outputs correctly, but when I move the slider nothing happens.
I'd greatly appreciate any feedback.
import pandas as pd
from bokeh.core.properties import value
from IPython.display import display, HTML
from bokeh.plotting import figure, show
from bokeh.layouts import row, column, gridplot
from bokeh.io import output_notebook, save, curdoc
from bokeh.models import ColumnDataSource, HoverTool, DatetimeTickFormatter, FactorRange, DataTable, TableColumn, DateFormatter
from bokeh.models.widgets import Panel, Tabs, Slider
import matplotlib.pyplot as plt
xls=pd.ExcelFile(path)
test_data=pd.read_excel(xls, 'test_data')
display(test_data)
AREA counts
A 500
B 100
C 70
D 50
E 40
F 20
G 10
H 2
def myplot(doc):
source = ColumnDataSource(pd.DataFrame(data=test_data))
area_list=source.data['AREA'].tolist()[::-1]
# Creating the Bar Chart
p = figure(y_range=area_list ,plot_height=500, plot_width=500, title="Total counts per area",
x_axis_label='counts', y_axis_label='AREA')
p.hbar(y='AREA', right='counts', height=1,
line_color="black", fill_color='red',line_width=1,
source=source)
def update_plot(attr, old, new):
Number_of_counts = slider.value
new_data = test_data.loc[test_data['counts'] >=Number_of_counts]
source = ColumnDataSource(data=new_data)
# Make a slider object: slider
slider = Slider(start=1, end=100, step=1, value=1, title='counts')
# Attach the callback to the 'value' property of slider
slider.on_change('value', update_plot)
doc.add_root(column(slider, p))
show(myplot)
You're replacing the value of the source variable, but the old source is still there, being used by all the created models.
Instead of recreating the source, try to reassign the data attribute of the old source:
# source = ColumnDataSource(data=new_data)
source.data = ColumnDataSource.from_df(new_data)

calling a function which shows sub category list in a select box using observe function is working only first time

I have simple df have main category and its sub items. I made two select boxes to select main category and it automatically shows belonged items at the other select box using 'observe'
But it works only at the first time category selection and after I selected another category it didn't show sub items in an another select box.
If i put 'observe' inside the 'def' the function is worked as I wanted.
But this approach is troublesome if I have many other 'def' executing on observe on change.
I am also looking for the way to make this code simple without using global.
And also welcome any ideas to make this code simpler and lighter.
import pandas as pd
import ipywidgets as wg
from ipywidgets import *
from IPython.display import display, clear_output
df_list=[{'Category':'fruit','name':['banana','apple','mango']},\
{'Category':'animal','name':['lion','monkey','tiger','cat','dog']},\
{'Category':'body','name':['eyes','hands','ears','arms']},\
{'Category':'office','name':['table','computer','printer']}]
df=pd.DataFrame(df_list)
Category_box=wg.Select(options=list(df.Category),continuous_update=False,layout=Layout(width='30%'))
name_box=wg.Select(options=list(df[df['Category']==Category_box.value].name)[0],continuous_update=False,\
layout=Layout(width='30%'))
hbox=wg.HBox(children=[Category_box,name_box])
display(hbox)
def select_on_change(change):
if change['name'] == 'value':
global Category_box
global name_box
global hbox
Category_box=wg.Select(options=list(df.Category),continuous_update=False,layout=Layout(width='30%'),value=Category_box.value)
name_box=wg.Select(options=list(df[df['Category']==Category_box.value].name)[0],continuous_update=False,\
layout=Layout(width='30%'))
hbox=wg.HBox(children=[Category_box,name_box])
clear_output()
display(hbox)
#Category_box.observe(select_on_change)
Category_box.observe(select_on_change)
Firstly, thanks for the complete and clear example.
The solution is to not create new widgets in your function that you are observing. Instead just update the .options of your name_box widget. This way, no globals required.
import pandas as pd
import ipywidgets as wg
from ipywidgets import *
from IPython.display import display, clear_output
df_list=[{'Category':'fruit','name':['banana','apple','mango']},\
{'Category':'animal','name':['lion','monkey','tiger','cat','dog']},\
{'Category':'body','name':['eyes','hands','ears','arms']},\
{'Category':'office','name':['table','computer','printer']}]
df=pd.DataFrame(df_list)
Category_box=wg.Select(options=list(df.Category),continuous_update=False,layout=Layout(width='30%'))
name_box=wg.Select(options=list(df[df['Category']==Category_box.value].name)[0],continuous_update=False,\
layout=Layout(width='30%'))
hbox=wg.HBox(children=[Category_box,name_box])
display(hbox)
def select_on_change(change):
if change['name'] == 'value':
name_box.options = list(df[df['Category']==Category_box.value].name)[0]
Category_box.observe(select_on_change)

Bokeh: Link Hover tooltips geometrically to subplots

I have multiple categorical heatmap plots that are in a single display that have identical shapes and x,y coordinates. When hovering on any of the subplots I would like the inspection on one plot to trigger a new inspection on all other plots in the grid and display multiple tooltips simultaneously.
I have researched this topic and found similar posts such as:
Bokeh: Synchronizing hover tooltips in linked plots
Takeaway from link above: There are 2 suggested answers to this question, which attempt to mimic hover tooltips with text glyphs, however these implementations are not successful when I copy and run the code on my own computer (the graphs display correctly but the hover text glyphs don't appear). I assume this could be because of Bokeh API updates, but I am unsure. My reputation doesn't allow comments or I'd address this issue there.
Coordinate tooltips across multiple plots #1547
Takeaway from link above: There is no reproducible data so I am not able to recreate the plot listed here, however bryevdv summarizes what I am trying to do quite efficiently which I'll quote below:
Link on geometry. You might want the geometry of the inspection on one plot to trigger a completely new inspection (using that same geometry) on another plot. So if the cursor is at (10.5, 7) on one plot, then the additional plots do a hit test at (10.5, 7) and if there are glyphs that have any hovers a that point, then a hover gets drawn there.
I have created some generalized data to illustrate my problem:
from bokeh.io import show, output_notebook
from bokeh.layouts import gridplot
from bokeh.models import LinearColorMapper, HoverTool
from bokeh.plotting import figure, show, output_file
from bokeh.transform import transform
import numpy as np
import pandas as pd
data1 = [['A','A',100], ['A','B',175], ['B','A',75], ['B','B',200]]
data2 = [['A','A',25], ['A','B',100], ['B','A',50], ['B','B',75]]
data3 = [['A','A',150], ['A','B',75], ['B','A',25], ['B','B',125]]
df1 = pd.DataFrame(data1, columns = ['Left','Right','Value'])
df2 = pd.DataFrame(data2, columns = ['Left','Right','Value'])
df3 = pd.DataFrame(data3, columns = ['Left','Right','Value'])
def heatmap(df, title):
letters = ['A','B']
mapper = LinearColorMapper(palette=['#225ea8', '#41b6c4', '#a1dab4', '#ffffcc'], low=0, high=200)
TOOLS = 'reset'
p = figure(plot_width=255, plot_height=250, title=title,
x_range=letters,
y_range=list(reversed(letters)), x_axis_location='above',
tools=TOOLS, toolbar_location='below')
p.grid.grid_line_color = None
p.grid.grid_line_width = 0.5
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = '9pt'
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = 0
hover = HoverTool()
p.rect(x='Right', y='Left', width=1, height=1, line_color=None, source=df,
fill_color={'field': 'Value', 'transform': mapper})
hover.tooltips = [('Group','#Left #Right'), ('Value','#Value')]
p.tools.append(hover)
return p
output_notebook()
p1 = heatmap(df1, 'Plot 1')
p2 = heatmap(df2, 'Plot 2')
p3 = heatmap(df3, 'Plot 3')
grid = gridplot([[p1,p2,p3]])
show(grid)
Output:
My goal is to be able to observe the values across multiple plots at one time without having to be directed to another page or source, so I am open to alternative ways of doing this that doesn't involve hover tooltips. Thanks!