Align area and line marks to same domain in Vega-Lite - vega-lite

I'm trying to build a line chart with error area in vega lite.
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"},
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"}
],
"params": [
{ "name": "scaleDomain", "expr": "[0, 10]"}
],
"encoding": {
"y": {
"field": "x2",
"type": "ordinal",
"sort": "descending"
}
},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
"scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "l",
"scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
},
"x2": {
"field": "r"
},
"opacity": { "value": 0.3 }
}
}
]
}
So far, it's nice looking. But there's a problem: to get this to work I have had to manually constrain the scale domain for the two marks by setting a param called scaleDomain. This is a problem, because if ever the data changes I need to manually update the domain :/
However, look what would happen if I didn't manually set the scale to the same domain for the area plot and a line plot:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"},
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"}
],
"params": [
{ "name": "scaleDomain", "expr": "[0, 10]"}
],
"encoding": {
"y": {
"field": "x2",
"type": "ordinal",
"sort": "descending"
}
},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
// "scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "l",
// "scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
},
"x2": {
"field": "r"
},
"opacity": { "value": 0.3 }
}
}
]
}
Yikes! The area plot gets a bit lost and doesn't track the line.
I can see one of two solutions to this problem:
Shared Scale: Coax the two mark layers to share the same scale
Manually Calculate Scale Domain: Use a parameter or a signal to store the desired domain.
I don't know how to do #1, but it seems like the correct approach. One imagined solution is something like:
"scale": {"align": "shared"},
I tried adding an aggregation to transform, but that of course results in summarizing the whole data set.
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"},
{ "aggregate": [
{
"field": "l",
"op": "min",
"as": "min"
},
{
"field": "r",
"op": "max",
"as": "max"
}
]}
],
It seems like I'd want to somehow put the transform directly into the layer or the params, but it's not clear how to do that.
I have seen these answers (finding max and min from dataset in vega and Post aggregation calculation & filter ##) but I don't know how to use them to achieve this.

You don't need any transforms and scales are automatically shared. Try this:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width":500,
"height":500,
"data": {
"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"
},
"encoding": {"y": {"field": "x", "type": "quantitative", "sort": "ascending"}},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"sort": null,
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
"axis": {"orient": "top"}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {"field": "CI_left", "type": "quantitative"},
"x2": {"field": "CI_right"},
"opacity": {"value": 0.3}
}
}
]
}

Related

Does anyone have an explanation as to why this graph is being made on vega?

Am trying to make a graph in vegalite whereby i show the evolution of stock prices overtime. Intuitively this should be very easy however for some reason only two lines seem to get output and they aren't reflective of the stock prices at all. Is there something wrong with my data or am i missing something quite basic?
{"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"title": {
"text": "Cases: UK nations",
"subtitle": [
"New cases by publish date, rolling rate"
],
"subtitleFontStyle": "italic",
"subtitleFontSize": 10,
"anchor": "start",
"color": "black"
},
"background": "whitesmoke",
"width": 800,
"height": 600,
"data": {
"url": "https://raw.githubusercontent.com/andrewsnowdon/andrewsnowdon.github.io/main/graph1megasheet.csv",
"format": {"type": "csv"}},
"layer": [
{
"encoding": {
"x": {"field": "Date", "type": "temporal"},
"y": {"field": "Open", "type": "quantitative"},
"color": {
"field":"Stockname",
"type": "nominal"
}
},
"layer": [
{"mark": "line"},
{
"params": [
{
"name": "label",
"select": {
"type": "point",
"encodings": ["x"],
"nearest": true,
"on": "mouseover"
}
}
],
"mark": "point",
"encoding": {
"opacity": {
"condition": {"param": "label", "empty": false, "value": 1},
"value": 0
}
}
}
]
},
{
"transform": [{"filter": {"param": "label", "empty": true}}],
"layer": [
{
"mark": {"type": "rule", "color": "grey"},
"encoding": {
"x": {"type": "temporal", "field": "Date", "aggregate": "min"}
}
},
{
"encoding": {
"text": {"type": "quantitative", "field": "Open"},
"x": {"type": "temporal", "field": "Date", "title": "Month"},
"y": {
"type": "quantitative",
"field": "Open",
"title": "Price"
}
},
"layer": [
{
"mark": {
"type": "text",
"stroke": "white",
"strokeWidth": 0.5,
"align": "left",
"dx": 5,
"dy": -5
}
},
{
"mark": {"type": "text", "align": "left", "dx": 5, "dy": -5},
"encoding": {"color": {"type": "quantitative"}}
}
]
}
]
}
],
"config": {}
}
Four of your stocks have identical data, so the lines are hidden below the last one drawn. You can see this by faceting your dataset:
{
"data": {
"url": "https://raw.githubusercontent.com/andrewsnowdon/andrewsnowdon.github.io/main/graph1megasheet.csv",
"format": {"type": "csv", "parse": {"Date": "date:'%d/%m/%Y'"}}
},
"mark": "line",
"encoding": {
"x": {"field": "Date", "type": "temporal"},
"y": {"field": "Open", "type": "quantitative"},
"facet": {"field": "Stockname", "type": "nominal", "columns": 3}
}
}
Notice the parse argument to the data format; this is required for correct parsing of your date entries (as mentioned in https://stackoverflow.com/a/70658380/2937831).

Set domainMin to 6 months before max date in data

I have the following Vega-Lite chart:
Open the Chart in the Vega Editor
Currently, I have the scale set as follows:
"scale": {"domainMin": "2021-06-01"}
However, what I really want is for the domainMin to be automatically calculated to be 6 months before the latest date in the notification_date field in the data.
I've looked at aggregate and expressions, but it's not exactly clear.
How can I get the maximum value of notification_date and subtract 6 months from it, and use that in "domainMin"?
Edit: To clarify, I don't want to filter the data. I want the user to be able to zoom out or pan to see the data outside the initial 6-month window. I get exactly what I want with "scale": {"domainMin": "2021-06-01"}, but this becomes out-of-date very quickly.
I have tried giving params and expr to domainMin, but I was unable to use the data fields in expr through datum.
The 2nd approach I tried will work for you, in this you will need to make use of joinaggregate/calculate/filter transforms. You will manually gather the max year and max months and then use it to filter your data.
Below is the modified config or refer the editor url:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width": "container",
"height": "container",
"config": {
"group": {"fill": "#e5e5e5"},
"arc": {"fill": "#2b2c39"},
"area": {"fill": "#2b2c39"},
"line": {"stroke": "#2b2c39"},
"path": {"stroke": "#2b2c39"},
"rect": {"fill": "#2b2c39"},
"shape": {"stroke": "#2b2c39"},
"symbol": {"fill": "#2b2c39"},
"range": {
"category": [
"#2283a2",
"#003e6a",
"#a1ce5e",
"#FDBE13",
"#F2727E",
"#EA3F3F",
"#25A9E0",
"#F97A08",
"#41BFB8",
"#518DCA",
"#9460A8",
"#6F7D84",
"#D1DCA5"
]
}
},
"title": "South Western Sydney Cumulative and Daily COVID-19 Cases by LGA",
"data": {
"url": "https://davidwales.github.io/nsw-covid-19-data/confirmed_cases_table1_location.csv"
},
"transform": [
{
"filter": {
"and": [
{"field": "lhd_2010_name", "equal": "South Western Sydney"},
{"not": {"field": "lga_name19", "equal": "Penrith"}}
]
}
},
{"calculate": "utcyear(datum.notification_date)", "as": "yearNumber"},
{"calculate": "utcmonth(datum.notification_date)", "as": "monthNumber"},
{
"window": [
{"op": "count", "field": "notification_date", "as": "cumulative_count"}
],
"frame": [null, 0]
},
{
"joinaggregate": [
{"field": "monthNumber", "op": "max", "as": "max_month_count"},
{"field": "yearNumber", "op": "max", "as": "max_year"}
]
},
{"calculate": "abs(datum.max_month_count-6)", "as": "min_month_count"},
{
"filter": "datum.min_month_count < datum.monthNumber && datum.max_year === datum.yearNumber"
}
],
"layer": [
{
"selection": {
"date": {"type": "interval", "bind": "scales", "encodings": ["x"]}
},
"mark": {"type": "bar", "tooltip": true},
"encoding": {
"x": {
"timeUnit": "yearmonthdate",
"field": "notification_date",
"type": "temporal",
"title": "Date"
},
"color": {
"field": "lga_name19",
"type": "nominal",
"title": "LGA",
"legend": {"orient": "top", "columns": 4}
},
"y": {
"aggregate": "count",
"field": "lga_name19",
"type": "quantitative",
"title": "Cases",
"axis": {"title": "Daily Cases by SWS LGA"}
}
}
},
{
"mark": "line",
"encoding": {
"x": {
"timeUnit": "yearmonthdate",
"field": "notification_date",
"title": "Date",
"type": "temporal"
},
"y": {
"aggregate": "max",
"field": "cumulative_count",
"type": "quantitative",
"axis": {"title": "Cumulative Cases"}
}
}
}
],
"resolve": {"scale": {"y": "independent"}}
}
A bit simpler approach to filter approximately the last six months of data might look like this:
"transform": [
...,
{"joinaggregate": [{"op": "max", "field": "notification_date", "as": "last_date"}]},
{"filter": "datum.notification_date > datum.last_date - 6 * 30 * 24 * 60 * 60 * 1000"}
]
It makes use of the fact that dates are stored as millisecond time-stamps, and has the benefit that it will work across year boundaries.
This is not quite an answer to the question you asked, but if your data is reasonably up-to-date (that is, the most recent data point is close to the current date), you can do something like this:
"scale": { "domainMin": { "expr": "timeOffset('month', now(), -6)" } }

Vega Lite: Normalized Stacked Bar Chart + Overlay percentages as text

I have a stacked normalized bar chart similar to this:
https://vega.github.io/editor/#/examples/vega-lite/stacked_bar_normalize
I'm trying to show the related percentages (per bar segment) as text on the bars similar to: https://gist.github.com/pratapvardhan/00800a4981d43a84efdba0c4cf8ee2e1
I tried adding a transform field to calculate the percentages, but still couldn't get it to work after hours of trying.
I'm lost help 🥺
My best try:
{
"description":
"A bar chart showing the US population distribution of age groups and gender in 2000.",
"data": {
"url": "data/population.json"
},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"},
{
"stack": "people",
"offset": "normalize",
"as": ["v1", "v2"],
"groupby": ["age"],
"sort": [{"field": "gender", "order": "descending"}]
}
],
"encoding": {
"y": {
"field": "v1",
"type": "quantitative",
"title": "population"
},
"y2": {"field": "v2"},
"x": {
"field": "age",
"type": "ordinal"
},
"color": {
"field": "gender",
"type": "nominal",
"scale": {
"range": ["#675193", "#ca8861"]
}
}
},
"layer":[
{ "mark": "bar"},
{"mark": {"type": "text", "dx": 0, "dy": 0},
"encoding": {
"color":{"value":"black"},
"text": { "field": "v1", "type": "quantitative", "format": ".1f"}}
}
]
}
You can use a joinaggregate transform to normalize each group, and then use "format": ".1%" to display fractions as percents. Using this, there is no need to manually compute the stack transform; it is simpler to specify the stack via the encoding, as in the example you linked to.
Here is the result (open in editor):
{
"description": "A bar chart showing the US population distribution of age groups and gender in 2000.",
"data": {"url": "data/population.json"},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"},
{
"joinaggregate": [{"op": "sum", "field": "people", "as": "total"}],
"groupby": ["age"]
},
{"calculate": "datum.people / datum.total", "as": "fraction"}
],
"encoding": {
"y": {
"aggregate": "sum",
"field": "people",
"title": "population",
"stack": "normalize"
},
"order": {"field": "gender", "sort": "descending"},
"x": {"field": "age", "type": "ordinal"},
"color": {
"field": "gender",
"type": "nominal",
"scale": {"range": ["#675193", "#ca8861"]}
}
},
"layer": [
{"mark": "bar"},
{
"mark": {"type": "text", "dx": 20, "dy": 0, "angle": 90},
"encoding": {
"color": {"value": "white"},
"text": {"field": "fraction", "type": "quantitative", "format": ".1%"}
}
}
]
}

Vega-Lite layered chart: how to get tick marks and tick labels to span the entire axis?

I am working on a layered chart where I have both bars and rule lines. The issue I'm having is that on the x-axis, the tick marks and tick labels only appear under the bars and do not span the entire axis, making it so that there are no tick marks and labels underneath where the rule lines are located. Here is an example of what I'm seeing (link to Vega editor):
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {"url": "data/movies.json"},
"transform": [
{"calculate": "2*datum.IMDB_Rating", "as": "UpperLimit"}
],
"layer": [
{
"mark": "bar",
"encoding": {
"x": {"bin": true, "field": "IMDB_Rating", "type": "quantitative"},
"y": {"aggregate": "count", "type": "quantitative"}
}
},
{
"mark": "rule",
"encoding": {
"x": {
"aggregate": "max",
"field": "UpperLimit",
"type": "quantitative"
},
"color": {"value": "red"},
"size": {"value": 5}
}
}
]
}
Image of issue
How do I get the tick marks and labels to span the entire axis? Thanks in advance for the help!
When you use a bin transform within an encoding, Vega-Lite adjusts the default axis properties to match the binning. You can re-adjust these manually via the encoding's scale and axis properties, but I think a simpler way is to move the bin transform to the chart's transform specification. Here is an example (Vega Editor):
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {"url": "data/movies.json"},
"transform": [
{"calculate": "2*datum.IMDB_Rating", "as": "UpperLimit"},
{
"bin": true,
"field": "IMDB_Rating",
"as": ["IMDB_Rating_0", "IMDB_Rating_1"]
}
],
"layer": [
{
"mark": "bar",
"encoding": {
"x": {
"field": "IMDB_Rating_0",
"type": "quantitative",
"bin": "binned"
},
"x2": {"field": "IMDB_Rating_1"},
"y": {"aggregate": "count", "type": "quantitative"}
}
},
{
"mark": "rule",
"encoding": {
"x": {
"aggregate": "max",
"field": "UpperLimit",
"type": "quantitative"
},
"color": {"value": "red"},
"size": {"value": 5}
}
}
]
}

How do I add secondary Y axis in vega-lite with 2 series having the same scale?

I'm trying to build something like this:
example histogram with multiple independent series
I have 2 independent y-axis with orient left and right.
All series/layers using "orient":"right" should be sharing the same scale and all series/layers using "orient":"left" should be sharing the same scale.
I know of the "resolve" option as documented here but having read this How do I add a secondary Y axis to my vega-lite chart? and a bunch of other questions I couldn't find my particular use case.
My futile attempt so far looks like this:
example in online editor
example screenshot
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"data": {"url": "data/movies.json"},
"transform":[
{"calculate":"datum.Production_Budget * 0.5","as":"y2"}
],
"layer":[
{
"mark": "bar",
"encoding": {
"x": {
"bin": true,
"field": "IMDB_Rating",
"type": "quantitative"
},
"y": {
"axis":{"orient":"left","title":"# of movies","grid":false},
"aggregate": "count",
"type": "quantitative"
}
}},
{
"mark": "line",
"encoding": {
"x": {
"bin": true,
"field": "IMDB_Rating",
"type": "quantitative"
},
"y": {
"field":"Production_Budget",
"aggregate": "average",
"type": "quantitative",
"axis":{"orient":"right","format":"s","title":"avg production budget in $"}
}
}
},
{
"mark": "line",
"encoding": {
"x": {
"bin": true,
"field": "IMDB_Rating",
"type": "quantitative"
},
"y": {
"field":"y2",
"aggregate": "average",
"type": "quantitative",
"axis":{"orient":"right","format":"s","title":"avg production budget in $"}
}
}
}
]
,"resolve": {
"scale":{"y":"independent"}
}
}
I've tried playing with the resolve option:
"resolve": {
"scale":{"axisLeft":"independent"}
}
"resolve": {
"axisLeft":{"y":"independent"}
}
"resolve": {
"axis":{"left":"independent"}
}
but none of them work.
You can do this by creating a layer within a layer: the two orient: "right" charts in a single layer with a shared axis, and the orient: "left" chart with an independent scale:
vega editor link
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"data": {"url": "data/movies.json"},
"transform": [{"calculate": "datum.Production_Budget * 0.5", "as": "y2"}],
"layer": [
{
"mark": "bar",
"encoding": {
"x": {"bin": true, "field": "IMDB_Rating", "type": "quantitative"},
"y": {
"axis": {"orient": "left", "title": "# of movies", "grid": false},
"aggregate": "count",
"type": "quantitative"
}
}
},
{
"layer": [
{
"mark": "line",
"encoding": {
"x": {"bin": true, "field": "IMDB_Rating", "type": "quantitative"},
"y": {
"field": "Production_Budget",
"aggregate": "average",
"type": "quantitative",
"axis": {
"orient": "right",
"format": "s",
"title": "avg production budget in $"
}
}
}
},
{
"mark": "line",
"encoding": {
"x": {"bin": true, "field": "IMDB_Rating", "type": "quantitative"},
"y": {
"field": "y2",
"aggregate": "average",
"type": "quantitative",
"axis": {
"orient": "right",
"format": "s",
"title": "avg production budget in $"
}
}
}
}
]
}
],
"resolve": {"scale": {"y": "independent"}}
}