Vega Lite: Normalized Stacked Bar Chart + Overlay percentages as text - vega-lite

I have a stacked normalized bar chart similar to this:
https://vega.github.io/editor/#/examples/vega-lite/stacked_bar_normalize
I'm trying to show the related percentages (per bar segment) as text on the bars similar to: https://gist.github.com/pratapvardhan/00800a4981d43a84efdba0c4cf8ee2e1
I tried adding a transform field to calculate the percentages, but still couldn't get it to work after hours of trying.
I'm lost help 🥺
My best try:
{
"description":
"A bar chart showing the US population distribution of age groups and gender in 2000.",
"data": {
"url": "data/population.json"
},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"},
{
"stack": "people",
"offset": "normalize",
"as": ["v1", "v2"],
"groupby": ["age"],
"sort": [{"field": "gender", "order": "descending"}]
}
],
"encoding": {
"y": {
"field": "v1",
"type": "quantitative",
"title": "population"
},
"y2": {"field": "v2"},
"x": {
"field": "age",
"type": "ordinal"
},
"color": {
"field": "gender",
"type": "nominal",
"scale": {
"range": ["#675193", "#ca8861"]
}
}
},
"layer":[
{ "mark": "bar"},
{"mark": {"type": "text", "dx": 0, "dy": 0},
"encoding": {
"color":{"value":"black"},
"text": { "field": "v1", "type": "quantitative", "format": ".1f"}}
}
]
}

You can use a joinaggregate transform to normalize each group, and then use "format": ".1%" to display fractions as percents. Using this, there is no need to manually compute the stack transform; it is simpler to specify the stack via the encoding, as in the example you linked to.
Here is the result (open in editor):
{
"description": "A bar chart showing the US population distribution of age groups and gender in 2000.",
"data": {"url": "data/population.json"},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"},
{
"joinaggregate": [{"op": "sum", "field": "people", "as": "total"}],
"groupby": ["age"]
},
{"calculate": "datum.people / datum.total", "as": "fraction"}
],
"encoding": {
"y": {
"aggregate": "sum",
"field": "people",
"title": "population",
"stack": "normalize"
},
"order": {"field": "gender", "sort": "descending"},
"x": {"field": "age", "type": "ordinal"},
"color": {
"field": "gender",
"type": "nominal",
"scale": {"range": ["#675193", "#ca8861"]}
}
},
"layer": [
{"mark": "bar"},
{
"mark": {"type": "text", "dx": 20, "dy": 0, "angle": 90},
"encoding": {
"color": {"value": "white"},
"text": {"field": "fraction", "type": "quantitative", "format": ".1%"}
}
}
]
}

Related

How can I add a year slider to this Ranged Dot Plot in Vega Lite?

I have a dataset with relevant values from 2000-2019, and when I load up the graph with this specification:
"data": {
"name": "chart6",
"url": "https://raw.githubusercontent.com/sebaconstable/sebaconstable.github.io/main/chart6data.csv"
},
"height": 300,
"width": 450,
"encoding": {
"x": {
"field": "average years in school",
"type": "quantitative",
"scale": {"domain": [0, 20]},
"title": "Average Years in School"
},
"y": {
"field": "Country",
"type": "nominal",
"axis": {"offset": 5, "ticks": false, "minExtent": 70, "domain": false}
}
},
"layer": [
{
"mark": "line",
"encoding": {
"detail": {"field": "Country", "type": "nominal"},
"color": {"value": "#BBBBBB"}
}
},
{
"mark": {"type": "point", "filled": true},
"encoding": {
"tooltip": [
{"field": "Country", "type": "nominal", "title": "Country"},
{"field": "QuintGap", "type": "quantitative", "title": "Difference between richest and poorest quintile"},
{"field": "Median % Pop", "type": "nominal", "title": "Median % of population in CCT programmes (2000-2019)"}
],
"color": {
"field": "Quintile",
"type": "nominal",
"title": null,
"scale": {"scheme": "inferno"}
},
"size": {
"field": "Median % Pop",
"type": "quantitative",
"legend":null,
"scale": {"range": [10, 100]}
},
"opacity": {"value": 1}
}
}
]
}
The points for every year appear on each country. I want to make it so it has a year slider and then only the two points for the selected year show.
I have tried many things. I added:
"transform": [
{"filter": {"field": "Quintile", "oneOf": ["Poorest Quintile", "Richest Quintile"]}},
{"filter": "datum.Year==selecta"}
],
"params": [
{
"name": "selecta",
"value": [{"year":2019}],
"bind": {
"input": "range",
"min": 2000,
"max": 2019,
"step": 1,
"name": "Select year:"
}
}
],
this code above the first encoding, and that successfully creates a slider (which filters to the relevant data correctly) however the rest of the chart disappears. I also tried adding a filter to "oneOf" the 20 years, however this made the visualisation dissapear.
I feel that I'm quite close to the solution but after several hours can't quite figure it out. Any help is much appreciated!
Here you go.
{
"data": {
"name": "chart6",
"url": "https://raw.githubusercontent.com/sebaconstable/sebaconstable.github.io/main/chart6data.csv"
},
"transform": [{"filter": "datum.Year==selecta"}],
"params": [
{
"name": "selecta",
"value": [2019],
"bind": {
"input": "range",
"min": 2000,
"max": 2019,
"step": 1,
"name": "Select year:"
}
}
],
"height": 300,
"width": 450,
"encoding": {
"x": {
"field": "average years in school",
"type": "quantitative",
"scale": {"domain": [0, 20]},
"title": "Average Years in School"
},
"y": {
"field": "Country",
"type": "nominal",
"axis": {"offset": 5, "ticks": false, "minExtent": 70, "domain": false}
}
},
"layer": [
{
"mark": "line",
"encoding": {
"detail": {"field": "Country", "type": "nominal"},
"color": {"value": "#BBBBBB"}
}
},
{
"mark": {"type": "point", "filled": true},
"encoding": {
"tooltip": [
{"field": "Country", "type": "nominal", "title": "Country"},
{
"field": "QuintGap",
"type": "quantitative",
"title": "Difference between richest and poorest quintile"
},
{
"field": "Median % Pop",
"type": "nominal",
"title": "Median % of population in CCT programmes (2000-2019)"
}
],
"color": {
"field": "Quintile",
"type": "nominal",
"title": null,
"scale": {"scheme": "inferno"}
},
"size": {
"field": "Median % Pop",
"type": "quantitative",
"legend": null,
"scale": {"range": [10, 100]}
},
"opacity": {"value": 1}
}
}
]
}

Align area and line marks to same domain in Vega-Lite

I'm trying to build a line chart with error area in vega lite.
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"},
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"}
],
"params": [
{ "name": "scaleDomain", "expr": "[0, 10]"}
],
"encoding": {
"y": {
"field": "x2",
"type": "ordinal",
"sort": "descending"
}
},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
"scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "l",
"scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
},
"x2": {
"field": "r"
},
"opacity": { "value": 0.3 }
}
}
]
}
So far, it's nice looking. But there's a problem: to get this to work I have had to manually constrain the scale domain for the two marks by setting a param called scaleDomain. This is a problem, because if ever the data changes I need to manually update the domain :/
However, look what would happen if I didn't manually set the scale to the same domain for the area plot and a line plot:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"},
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"}
],
"params": [
{ "name": "scaleDomain", "expr": "[0, 10]"}
],
"encoding": {
"y": {
"field": "x2",
"type": "ordinal",
"sort": "descending"
}
},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
// "scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "l",
// "scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
},
"x2": {
"field": "r"
},
"opacity": { "value": 0.3 }
}
}
]
}
Yikes! The area plot gets a bit lost and doesn't track the line.
I can see one of two solutions to this problem:
Shared Scale: Coax the two mark layers to share the same scale
Manually Calculate Scale Domain: Use a parameter or a signal to store the desired domain.
I don't know how to do #1, but it seems like the correct approach. One imagined solution is something like:
"scale": {"align": "shared"},
I tried adding an aggregation to transform, but that of course results in summarizing the whole data set.
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"},
{ "aggregate": [
{
"field": "l",
"op": "min",
"as": "min"
},
{
"field": "r",
"op": "max",
"as": "max"
}
]}
],
It seems like I'd want to somehow put the transform directly into the layer or the params, but it's not clear how to do that.
I have seen these answers (finding max and min from dataset in vega and Post aggregation calculation & filter ##) but I don't know how to use them to achieve this.
You don't need any transforms and scales are automatically shared. Try this:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width":500,
"height":500,
"data": {
"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"
},
"encoding": {"y": {"field": "x", "type": "quantitative", "sort": "ascending"}},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"sort": null,
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
"axis": {"orient": "top"}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {"field": "CI_left", "type": "quantitative"},
"x2": {"field": "CI_right"},
"opacity": {"value": 0.3}
}
}
]
}

Does anyone have an explanation as to why this graph is being made on vega?

Am trying to make a graph in vegalite whereby i show the evolution of stock prices overtime. Intuitively this should be very easy however for some reason only two lines seem to get output and they aren't reflective of the stock prices at all. Is there something wrong with my data or am i missing something quite basic?
{"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"title": {
"text": "Cases: UK nations",
"subtitle": [
"New cases by publish date, rolling rate"
],
"subtitleFontStyle": "italic",
"subtitleFontSize": 10,
"anchor": "start",
"color": "black"
},
"background": "whitesmoke",
"width": 800,
"height": 600,
"data": {
"url": "https://raw.githubusercontent.com/andrewsnowdon/andrewsnowdon.github.io/main/graph1megasheet.csv",
"format": {"type": "csv"}},
"layer": [
{
"encoding": {
"x": {"field": "Date", "type": "temporal"},
"y": {"field": "Open", "type": "quantitative"},
"color": {
"field":"Stockname",
"type": "nominal"
}
},
"layer": [
{"mark": "line"},
{
"params": [
{
"name": "label",
"select": {
"type": "point",
"encodings": ["x"],
"nearest": true,
"on": "mouseover"
}
}
],
"mark": "point",
"encoding": {
"opacity": {
"condition": {"param": "label", "empty": false, "value": 1},
"value": 0
}
}
}
]
},
{
"transform": [{"filter": {"param": "label", "empty": true}}],
"layer": [
{
"mark": {"type": "rule", "color": "grey"},
"encoding": {
"x": {"type": "temporal", "field": "Date", "aggregate": "min"}
}
},
{
"encoding": {
"text": {"type": "quantitative", "field": "Open"},
"x": {"type": "temporal", "field": "Date", "title": "Month"},
"y": {
"type": "quantitative",
"field": "Open",
"title": "Price"
}
},
"layer": [
{
"mark": {
"type": "text",
"stroke": "white",
"strokeWidth": 0.5,
"align": "left",
"dx": 5,
"dy": -5
}
},
{
"mark": {"type": "text", "align": "left", "dx": 5, "dy": -5},
"encoding": {"color": {"type": "quantitative"}}
}
]
}
]
}
],
"config": {}
}
Four of your stocks have identical data, so the lines are hidden below the last one drawn. You can see this by faceting your dataset:
{
"data": {
"url": "https://raw.githubusercontent.com/andrewsnowdon/andrewsnowdon.github.io/main/graph1megasheet.csv",
"format": {"type": "csv", "parse": {"Date": "date:'%d/%m/%Y'"}}
},
"mark": "line",
"encoding": {
"x": {"field": "Date", "type": "temporal"},
"y": {"field": "Open", "type": "quantitative"},
"facet": {"field": "Stockname", "type": "nominal", "columns": 3}
}
}
Notice the parse argument to the data format; this is required for correct parsing of your date entries (as mentioned in https://stackoverflow.com/a/70658380/2937831).

Highest Value Wrong Colour

Just made a simple bar chart, but for some reason, the final value is the wrong colour?
Code:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width": 800,
"height": 600,
"title": "Death Rates Amongst Ages",
"data": {"url": "https://raw.githubusercontent.com/githubuser0099/Repo55/main/AgeBracket_DeathRate.csv"},
"transform": [
{"calculate":"parseInt(datum.Death_Rate)", "as": "Death_Rate"}
],
"mark": "bar",
"encoding": {
"x": {"field": "Death_Rate", "type": "quantitative", "title": ""},
"y": {"field": "Age", "type": "nominal", "title": "", "sort": "-x"},
"color": {
"field": "Age",
"type": "nominal",
"scale": {"scheme": "reds"}
}
}
}
The problem with your colour scale is: "Age" is currently encoded as a string (nominal variable). You define the type of "Age" as "nominal", but use a sequential colour scale ("reds"). Your data also has some issues - there are some empty spaces before 5-9, and 10-14.
In string comparison, white space < "0" < "100" < "15".
To solve the issue, we can get the first number from the range, and then define another channel to encode this first number (but hide the legend), then in the colour channel, you can define the colour order based on this additional channel.
Check the result and the codes below.
I have printed out the data and let you know how the calculation works.
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width": 800,
"height": 600,
"title": "Death Rates Amongst Ages",
"data": {"url": "https://raw.githubusercontent.com/githubuser0099/Repo55/main/AgeBracket_DeathRate.csv"},
"transform": [
{"calculate":"parseInt(datum.Death_Rate)", "as": "Death_Rate"},
{"calculate": "split(datum['Age'], '-')[0]", "as": "Age_new"},
{"calculate": "replace(datum['Age_new'], ' ', '')", "as": "Age_new_2"},
{"calculate": "replace(datum['Age_new_2'], ' ', '')", "as": "Age_new_3"},
{"calculate": "parseInt(datum['Age_new_3'])", "as": "Age_new_4"}
],
"mark": "bar",
"encoding": {
"x": {"field": "Death_Rate", "type": "quantitative", "title": ""},
"y": {"field": "Age", "type": "nominal", "title": "", "sort": "-x"},
"opacity":{"field": "Age_new_4", "legend": null},
"color": {
"field": "Age",
"type": "ordinal",
"sort": "opacity",
"scale": {"scheme": "reds"}
}
}
}
Cheers,
KL

How to use zero=false in vega-lite when also using a color encoding?

I am trying to figure out how to not have my y-axis start at zero? It works in general for me, but if I add the color encoding (see below) it is not working anymore and instead I get to see the zero.
{
"data": {"name": "d"},
"mark": {"type": "bar"},
"encoding": {
"color": {"type": "nominal", "field": "group"},
"x": {"type": "nominal", "field": "model"},
"y": {
"type": "quantitative",
"field": "inf_f1",
"scale": {"zero": false}
}
},
"$schema": "https://vega.github.io/schema/vega-lite/v4.0.2.json",
"datasets": {
"d": [
{
"model": "lr-bow",
"inf_f1": 0.7991841662090597,
"group" : "A"
},
{
"model": "fcn-bow",
"inf_f1": 0.8220151833558302,
"group" : "B"
}
]
}
}
The reason the scale includes zero is that bars are stacked by default, and each bar has an implicit stacked zero-height bar for the group that does not appear, but does affect the automatically chosen axis limits. You can address this by setting stack to "none" in the y encoding (view in editor):
{
"data": {"name": "d"},
"mark": {"type": "bar"},
"encoding": {
"color": {"type": "nominal", "field": "group"},
"x": {"type": "nominal", "field": "model"},
"y": {
"type": "quantitative",
"field": "inf_f1",
"stack": "none",
"scale": {"zero": false}
}
},
"datasets": {
"d": [
{"model": "lr-bow", "inf_f1": 0.7991841662090597, "group": "A"},
{"model": "fcn-bow", "inf_f1": 0.8220151833558302, "group": "B"}
]
}
}