Create array for x axis with known min, max and step - vega-lite

I am trying to create values for x axis for each y data point and I can't create a formula.
This is what I am trying to achieve:
Here is my code:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data":{
"values":[
{
"days": {
"min" : 0,
"max" : 10,
"step" : 2,
"count" : [0.2,0.6,0.4,0.3,0.1]
}}
]},
"transform": [
{"calculate": "datum.days.count", "as": "y"},
{"flatten": ["y"]},
{"calculate": "datum.max/datum.step", "as": "x"}
],
"mark": "line",
"encoding": {
"x": { "scale": {"type": "linear", "domain":[0,10], "exponent": 2},"field": "x",
"type": "quantitative"
},
"y": {
"field": "y",
"type": "quantitative"
}
}}
Where count is on y axis and I need generate x axis values for each y point using "min","max" and "step" data (something like "x":[1,2,3,4,5]).
I tried scale function but it didn't work.
I need the x axis to have labels 0,2,4,6,8,10 - because my data max is 10 and step is 2 (10/2).

I'm not sure I fully understand what you're trying to achieve but does the following help?
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": [
{
"days": {
"min": 0,
"max": 10,
"step": 2,
"count": [0.2, 0.6, 0.4, 0.3, 0.1]
}
}
]
},
"transform": [
{"calculate": "datum.days.count", "as": "y"},
{"flatten": ["y"]},
{
"window": [{"op": "count", "field": "count", "as": "i"}],
"frame": [null, 0]
},
{"calculate": "(datum.i-1)*2", "as": "x"}
],
"mark": "line",
"encoding": {
"x": {
"scale": {"type": "linear", "domain": [0, 10], "exponent": 2},
"field": "x",
"type": "quantitative"
},
"y": {"field": "y", "type": "quantitative"}
}
}

Related

how to align rule on x axis with line chart data in vega-lite

I am getting this chart where rule is not correctly aligned on x axis.
group a has value 3.2 and it is not between 2 and 4 on x axis as supposed to. same for b group and c group.
I tried resolve, scale separately line chart and rule without success
Result should be that rule for group a, group b and group c lie on axis x aligned the value they have.
Here is my code:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width": 606,
"height":384,
"data": {
"values": [
{
"datapoints": {
"division": [
{
"participation_duration": {
"total": {
"mean": 384,
"a": 190,
"b": 365,
"c": 495,
"distribution": {
"min": 0,
"max": 10,
"step": 2,
"data": [0,0.3,0.1,0.2,0.2, 0]
}
}
}
}
]
}
}
]
},
"transform": [
{ "calculate": "datum.datapoints.division", "as": "D" },
{ "flatten": ["D"] },
{"calculate": "datum.D.participation_duration.total.distribution.data", "as": "y"},
{"flatten": ["y"]},
{
"window": [{ "op": "count", "field": "y", "as": "i" }],
"frame": [null, 0]
},
{ "calculate": "(datum.i-1)*2", "as": "x" }
],
"layer": [
{
"mark": {"type":"line",
"point": false,
"interpolate": "cardinal",
"color":"blueviolet"
},
"encoding": {
"x": {
"field": "x",
"type": "quantitative",
"title": "",
"axis":{
"tickCount": 5,
"grid": true
}
},
"y": {
"scale":{"domain":[0,0.3]},
"field": "y",
"type": "quantitative",
"title": "",
"axis": {
"orient": "right",
"tickCount": 3,
"format": "%"
}
}
}
},
{
"transform": [
{"calculate": "datum.D.participation_duration.total", "as": "total"},
{"calculate": "datum.total.a/60", "as": "a group"},
{"calculate": "datum.total.b/60", "as": "b group"},
{"calculate": "datum.total.c/60", "as": "c group"},
{"fold": ["a group", "b group", "c group"]}
],
"encoding": {
"x":{
"field": "value",
"title":null,
"axis":{
"format":".2",
"grid": false,
"domain": false,
"labels":true,
"ticks": false,
"labelAlign":"center",
"labelAngle":0,
"labelPadding": 15
}
},
"color":{
"field": "key",
"legend":null
},
"text": {"field":"key"}
},
"layer": [
{
"mark":{"type":"rule",
"strokeWidth": 5,
"strokeDash":[3,16]
}},
{
"mark":{"type": "text", "align": "center", "dy":-220}
}
]
}
]
}````
Please advise which part is missing.
Thank you
You mean like this?
You need to explicitly set the type.
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width": 606,
"height": 384,
"data": {
"values": [
{
"datapoints": {
"division": [
{
"participation_duration": {
"total": {
"mean": 384,
"a": 190,
"b": 365,
"c": 495,
"distribution": {
"min": 0,
"max": 10,
"step": 2,
"data": [0, 0.3, 0.1, 0.2, 0.2, 0]
}
}
}
}
]
}
}
]
},
"transform": [
{"calculate": "datum.datapoints.division", "as": "D"},
{"flatten": ["D"]},
{
"calculate": "datum.D.participation_duration.total.distribution.data",
"as": "y"
},
{"flatten": ["y"]},
{"window": [{"op": "count", "field": "y", "as": "i"}], "frame": [null, 0]},
{"calculate": "(datum.i-1)*2", "as": "x"}
],
"layer": [
{
"mark": {
"type": "line",
"point": false,
"interpolate": "cardinal",
"color": "blueviolet"
},
"encoding": {
"x": {
"field": "x",
"type": "quantitative",
"title": "",
"axis": {"tickCount": 5, "grid": true}
},
"y": {
"scale": {"domain": [0, 0.3]},
"field": "y",
"type": "quantitative",
"title": "",
"axis": {"orient": "right", "tickCount": 3, "format": "%"}
}
}
},
{
"transform": [
{"calculate": "datum.D.participation_duration.total", "as": "total"},
{"calculate": "datum.total.a/60", "as": "a group"},
{"calculate": "datum.total.b/60", "as": "b group"},
{"calculate": "datum.total.c/60", "as": "c group"},
{"fold": ["a group", "b group", "c group"]}
],
"encoding": {
"x": {
"field": "value",
"title": null,
"type": "quantitative",
"axis": {
"format": ".2",
"grid": false,
"domain": false,
"labels": true,
"ticks": false,
"labelAlign": "center",
"labelAngle": 0,
"labelPadding": 15
}
},
"color": {"field": "key", "legend": null},
"text": {"field": "key"}
},
"layer": [
{"mark": {"type": "rule", "strokeWidth": 5, "strokeDash": [3, 16]}},
{"mark": {"type": "text", "align": "center", "dy": -220}}
]
}
]
}

Align area and line marks to same domain in Vega-Lite

I'm trying to build a line chart with error area in vega lite.
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"},
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"}
],
"params": [
{ "name": "scaleDomain", "expr": "[0, 10]"}
],
"encoding": {
"y": {
"field": "x2",
"type": "ordinal",
"sort": "descending"
}
},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
"scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "l",
"scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
},
"x2": {
"field": "r"
},
"opacity": { "value": 0.3 }
}
}
]
}
So far, it's nice looking. But there's a problem: to get this to work I have had to manually constrain the scale domain for the two marks by setting a param called scaleDomain. This is a problem, because if ever the data changes I need to manually update the domain :/
However, look what would happen if I didn't manually set the scale to the same domain for the area plot and a line plot:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"},
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"}
],
"params": [
{ "name": "scaleDomain", "expr": "[0, 10]"}
],
"encoding": {
"y": {
"field": "x2",
"type": "ordinal",
"sort": "descending"
}
},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
// "scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "l",
// "scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
},
"x2": {
"field": "r"
},
"opacity": { "value": 0.3 }
}
}
]
}
Yikes! The area plot gets a bit lost and doesn't track the line.
I can see one of two solutions to this problem:
Shared Scale: Coax the two mark layers to share the same scale
Manually Calculate Scale Domain: Use a parameter or a signal to store the desired domain.
I don't know how to do #1, but it seems like the correct approach. One imagined solution is something like:
"scale": {"align": "shared"},
I tried adding an aggregation to transform, but that of course results in summarizing the whole data set.
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"},
{ "aggregate": [
{
"field": "l",
"op": "min",
"as": "min"
},
{
"field": "r",
"op": "max",
"as": "max"
}
]}
],
It seems like I'd want to somehow put the transform directly into the layer or the params, but it's not clear how to do that.
I have seen these answers (finding max and min from dataset in vega and Post aggregation calculation & filter ##) but I don't know how to use them to achieve this.
You don't need any transforms and scales are automatically shared. Try this:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width":500,
"height":500,
"data": {
"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"
},
"encoding": {"y": {"field": "x", "type": "quantitative", "sort": "ascending"}},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"sort": null,
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
"axis": {"orient": "top"}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {"field": "CI_left", "type": "quantitative"},
"x2": {"field": "CI_right"},
"opacity": {"value": 0.3}
}
}
]
}

How to position text at the top edge of a VegaLite chart?

I am trying to create a VegaLite chart with labelled highlighted areas which are specified in a separate dataset using [min_x, max_x] coordinates.
I've managed to highlight the area itself using a rect mark, but I'm struggling to properly position the label at the top edge of the chart.
This is the end result I'm trying to achieve. In this example I'm using the dy property of the text mark to position the label. Unfortunately this only works when the chart height is known in advance, which doesn't work for my use case.
If there is a better way to achieve the result I want, please let me know. This is the Vegalite specification for the chart above, without previously mentioned dy property:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"datasets": {
"test_data": [
{"x": 0, "y": 1.5},
{"x": 5, "y": 2},
{"x": 9, "y": 4},
{"x": 14, "y": 0}
],
"highlight_data": [
{"from_x": 2.3, "to_x": 3, "label": "AAA"},
{"from_x": 6.3, "to_x": 8, "label": "BBB"}
]
},
"data": {"name": "test_data"},
"layer": [
{
"data": {"name": "highlight_data"},
"transform": [
{"calculate": "(datum.from_x + datum.to_x) / 2", "as": "mean_x"}
],
"layer": [
{
"mark": {"type": "rect", "opacity": 0.3},
"encoding": {
"x": {"field": "from_x", "type": "quantitative"},
"x2": {"field": "to_x"},
"color": {"value": "#fcfc00"}
}
},
{
"mark": {"type": "text"},
"encoding": {
"text": {"field": "label"},
"x": {"field": "mean_x", "type": "quantitative"}
}
}
]
},
{
"mark": {"type": "line"},
"encoding": {
"x": {"field": "x", "type": "quantitative", "title": "X Label"},
"y": {"field": "y", "type": "quantitative", "title": "Y Label"}
}
}
],
"title": "Title",
"width": 800,
"height": 500
}
You can reference height inside an expression to make it dynamic.
"dy": { "expr": "-height + (height/2) -10"}
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"datasets": {
"test_data": [
{"x": 0, "y": 1.5},
{"x": 5, "y": 2},
{"x": 9, "y": 4},
{"x": 14, "y": 0}
],
"highlight_data": [
{"from_x": 2.3, "to_x": 3, "label": "AAA"},
{"from_x": 6.3, "to_x": 8, "label": "BBB"}
]
},
"data": {"name": "test_data"},
"layer": [
{
"data": {"name": "highlight_data"},
"transform": [
{"calculate": "(datum.from_x + datum.to_x) / 2", "as": "mean_x"}
],
"layer": [
{
"mark": {"type": "rect", "opacity": 0.3},
"encoding": {
"x": {"field": "from_x", "type": "quantitative"},
"x2": {"field": "to_x"},
"color": {"value": "#fcfc00"}
}
},
{
"mark": {"type": "text", "dy": { "expr": "-height + (height/2) -10"}},
"encoding": {
"text": {"field": "label"},
"x": {"field": "mean_x", "type": "quantitative"}
}
}
]
},
{
"mark": {"type": "line"},
"encoding": {
"x": {"field": "x", "type": "quantitative", "title": "X Label"},
"y": {"field": "y", "type": "quantitative", "title": "Y Label"}
}
}
],
"title": "Title",
"width": 800,
"height": 500
}

Vega Lite: Normalized Stacked Bar Chart + Overlay percentages as text

I have a stacked normalized bar chart similar to this:
https://vega.github.io/editor/#/examples/vega-lite/stacked_bar_normalize
I'm trying to show the related percentages (per bar segment) as text on the bars similar to: https://gist.github.com/pratapvardhan/00800a4981d43a84efdba0c4cf8ee2e1
I tried adding a transform field to calculate the percentages, but still couldn't get it to work after hours of trying.
I'm lost help 🥺
My best try:
{
"description":
"A bar chart showing the US population distribution of age groups and gender in 2000.",
"data": {
"url": "data/population.json"
},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"},
{
"stack": "people",
"offset": "normalize",
"as": ["v1", "v2"],
"groupby": ["age"],
"sort": [{"field": "gender", "order": "descending"}]
}
],
"encoding": {
"y": {
"field": "v1",
"type": "quantitative",
"title": "population"
},
"y2": {"field": "v2"},
"x": {
"field": "age",
"type": "ordinal"
},
"color": {
"field": "gender",
"type": "nominal",
"scale": {
"range": ["#675193", "#ca8861"]
}
}
},
"layer":[
{ "mark": "bar"},
{"mark": {"type": "text", "dx": 0, "dy": 0},
"encoding": {
"color":{"value":"black"},
"text": { "field": "v1", "type": "quantitative", "format": ".1f"}}
}
]
}
You can use a joinaggregate transform to normalize each group, and then use "format": ".1%" to display fractions as percents. Using this, there is no need to manually compute the stack transform; it is simpler to specify the stack via the encoding, as in the example you linked to.
Here is the result (open in editor):
{
"description": "A bar chart showing the US population distribution of age groups and gender in 2000.",
"data": {"url": "data/population.json"},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"},
{
"joinaggregate": [{"op": "sum", "field": "people", "as": "total"}],
"groupby": ["age"]
},
{"calculate": "datum.people / datum.total", "as": "fraction"}
],
"encoding": {
"y": {
"aggregate": "sum",
"field": "people",
"title": "population",
"stack": "normalize"
},
"order": {"field": "gender", "sort": "descending"},
"x": {"field": "age", "type": "ordinal"},
"color": {
"field": "gender",
"type": "nominal",
"scale": {"range": ["#675193", "#ca8861"]}
}
},
"layer": [
{"mark": "bar"},
{
"mark": {"type": "text", "dx": 20, "dy": 0, "angle": 90},
"encoding": {
"color": {"value": "white"},
"text": {"field": "fraction", "type": "quantitative", "format": ".1%"}
}
}
]
}

Can I make it look more like a box plot

I am working with Elasticsearch which provides the min, max, Q1, Q2 and Q3 data. All I have to do is to plot it in form of a box plot. Kibana as of now only supports vega-lite version 2.6.0 and vega 4.3.0.
Here is a complete sample I have made.
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"width": 100,
"height": 200,
"padding": 10,
"data": {
"name": "sample",
"values": [
{
"task": "A",
"min" : 72.66500091552734,
"max" : 139.54299926757812,
"q1" : 98.68599700927734,
"q2" : 120.12850189208984,
"q3" : 121.82099914550781
},
{
"task": "B",
"min" : 71.66500091552734,
"max" : 159.54299926757812,
"q1" : 88.68599700927734,
"q2" : 110.12850189208984,
"q3" : 141.82099914550781
},
{
"task": "c",
"min" : 45.66500091552734,
"max" : 169.54299926757812,
"q1" : 88.68599700927734,
"q2" : 110.12850189208984,
"q3" : 141.82099914550781
}
]
},
"layer": [
{
"width": 5,
"encoding": {
"x": {"type": "ordinal","field": "task"},
"y": {"type": "quantitative","field": "min"},
"y2": {"type": "quantitative","field": "max"},
"color": {"value": "#2CB5E8"}
},
"mark": {
"type": "bar"
}
},
{
"width": 20,
"encoding": {
"x": {"type": "ordinal","field": "task"},
"y": {"type": "quantitative","field": "q1"},
"y2": {"type": "quantitative","field": "q3"},
"color": {"value": "#EB985E"}
},
"mark": "bar"
},
{
"encoding": {
"x": {"type": "ordinal","field": "task"},
"y": {"type": "quantitative","field": "q2"},
"color": {"value": "#090502"}
},
"mark": "point"
}
]
}
This is what the plot looks like:
But box plot look something like this
The current version of vega-lite does support the boxplot. But I am stuck with older version.
I am trying to reduce the width of the bar plot for min and max. And keep it thick for Q1 and Q3. Somehow it is not working.
Also is it possible to plot Q2 as a flat line instead of a point?
You can construct a boxplot manually using a layer chart with a bar, tick, and rule mark. For example (view in editor):
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"width": 100,
"height": 200,
"data": {
"name": "sample",
"values": [
{
"task": "A",
"min": 72.66500091552734,
"max": 139.54299926757812,
"q1": 98.68599700927734,
"q2": 120.12850189208984,
"q3": 121.82099914550781
},
{
"task": "B",
"min": 71.66500091552734,
"max": 159.54299926757812,
"q1": 88.68599700927734,
"q2": 110.12850189208984,
"q3": 141.8209991455078
},
{
"task": "c",
"min": 45.66500091552734,
"max": 169.54299926757812,
"q1": 88.68599700927734,
"q2": 110.12850189208984,
"q3": 141.8209991455078
}
]
},
"layer": [
{
"encoding": {
"x": {"type": "ordinal", "field": "task"},
"y": {"type": "quantitative", "field": "min"},
"y2": {"type": "quantitative", "field": "max"}
},
"mark": {"type": "rule", "color": "black"}
},
{
"encoding": {
"x": {"type": "ordinal", "field": "task"},
"y": {"type": "quantitative", "field": "q1"},
"y2": {"type": "quantitative", "field": "q3"}
},
"mark": {"type": "bar", "color": "#EB985E", "size": 20}
},
{
"encoding": {
"x": {"type": "ordinal", "field": "task"},
"y": {"type": "quantitative", "field": "q2"}
},
"mark": {"type": "tick", "color": "gray", "size": 20}
}
]
}