Can I make it look more like a box plot - vega-lite

I am working with Elasticsearch which provides the min, max, Q1, Q2 and Q3 data. All I have to do is to plot it in form of a box plot. Kibana as of now only supports vega-lite version 2.6.0 and vega 4.3.0.
Here is a complete sample I have made.
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"width": 100,
"height": 200,
"padding": 10,
"data": {
"name": "sample",
"values": [
{
"task": "A",
"min" : 72.66500091552734,
"max" : 139.54299926757812,
"q1" : 98.68599700927734,
"q2" : 120.12850189208984,
"q3" : 121.82099914550781
},
{
"task": "B",
"min" : 71.66500091552734,
"max" : 159.54299926757812,
"q1" : 88.68599700927734,
"q2" : 110.12850189208984,
"q3" : 141.82099914550781
},
{
"task": "c",
"min" : 45.66500091552734,
"max" : 169.54299926757812,
"q1" : 88.68599700927734,
"q2" : 110.12850189208984,
"q3" : 141.82099914550781
}
]
},
"layer": [
{
"width": 5,
"encoding": {
"x": {"type": "ordinal","field": "task"},
"y": {"type": "quantitative","field": "min"},
"y2": {"type": "quantitative","field": "max"},
"color": {"value": "#2CB5E8"}
},
"mark": {
"type": "bar"
}
},
{
"width": 20,
"encoding": {
"x": {"type": "ordinal","field": "task"},
"y": {"type": "quantitative","field": "q1"},
"y2": {"type": "quantitative","field": "q3"},
"color": {"value": "#EB985E"}
},
"mark": "bar"
},
{
"encoding": {
"x": {"type": "ordinal","field": "task"},
"y": {"type": "quantitative","field": "q2"},
"color": {"value": "#090502"}
},
"mark": "point"
}
]
}
This is what the plot looks like:
But box plot look something like this
The current version of vega-lite does support the boxplot. But I am stuck with older version.
I am trying to reduce the width of the bar plot for min and max. And keep it thick for Q1 and Q3. Somehow it is not working.
Also is it possible to plot Q2 as a flat line instead of a point?

You can construct a boxplot manually using a layer chart with a bar, tick, and rule mark. For example (view in editor):
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"width": 100,
"height": 200,
"data": {
"name": "sample",
"values": [
{
"task": "A",
"min": 72.66500091552734,
"max": 139.54299926757812,
"q1": 98.68599700927734,
"q2": 120.12850189208984,
"q3": 121.82099914550781
},
{
"task": "B",
"min": 71.66500091552734,
"max": 159.54299926757812,
"q1": 88.68599700927734,
"q2": 110.12850189208984,
"q3": 141.8209991455078
},
{
"task": "c",
"min": 45.66500091552734,
"max": 169.54299926757812,
"q1": 88.68599700927734,
"q2": 110.12850189208984,
"q3": 141.8209991455078
}
]
},
"layer": [
{
"encoding": {
"x": {"type": "ordinal", "field": "task"},
"y": {"type": "quantitative", "field": "min"},
"y2": {"type": "quantitative", "field": "max"}
},
"mark": {"type": "rule", "color": "black"}
},
{
"encoding": {
"x": {"type": "ordinal", "field": "task"},
"y": {"type": "quantitative", "field": "q1"},
"y2": {"type": "quantitative", "field": "q3"}
},
"mark": {"type": "bar", "color": "#EB985E", "size": 20}
},
{
"encoding": {
"x": {"type": "ordinal", "field": "task"},
"y": {"type": "quantitative", "field": "q2"}
},
"mark": {"type": "tick", "color": "gray", "size": 20}
}
]
}

Related

How to position text at the top edge of a VegaLite chart?

I am trying to create a VegaLite chart with labelled highlighted areas which are specified in a separate dataset using [min_x, max_x] coordinates.
I've managed to highlight the area itself using a rect mark, but I'm struggling to properly position the label at the top edge of the chart.
This is the end result I'm trying to achieve. In this example I'm using the dy property of the text mark to position the label. Unfortunately this only works when the chart height is known in advance, which doesn't work for my use case.
If there is a better way to achieve the result I want, please let me know. This is the Vegalite specification for the chart above, without previously mentioned dy property:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"datasets": {
"test_data": [
{"x": 0, "y": 1.5},
{"x": 5, "y": 2},
{"x": 9, "y": 4},
{"x": 14, "y": 0}
],
"highlight_data": [
{"from_x": 2.3, "to_x": 3, "label": "AAA"},
{"from_x": 6.3, "to_x": 8, "label": "BBB"}
]
},
"data": {"name": "test_data"},
"layer": [
{
"data": {"name": "highlight_data"},
"transform": [
{"calculate": "(datum.from_x + datum.to_x) / 2", "as": "mean_x"}
],
"layer": [
{
"mark": {"type": "rect", "opacity": 0.3},
"encoding": {
"x": {"field": "from_x", "type": "quantitative"},
"x2": {"field": "to_x"},
"color": {"value": "#fcfc00"}
}
},
{
"mark": {"type": "text"},
"encoding": {
"text": {"field": "label"},
"x": {"field": "mean_x", "type": "quantitative"}
}
}
]
},
{
"mark": {"type": "line"},
"encoding": {
"x": {"field": "x", "type": "quantitative", "title": "X Label"},
"y": {"field": "y", "type": "quantitative", "title": "Y Label"}
}
}
],
"title": "Title",
"width": 800,
"height": 500
}
You can reference height inside an expression to make it dynamic.
"dy": { "expr": "-height + (height/2) -10"}
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"datasets": {
"test_data": [
{"x": 0, "y": 1.5},
{"x": 5, "y": 2},
{"x": 9, "y": 4},
{"x": 14, "y": 0}
],
"highlight_data": [
{"from_x": 2.3, "to_x": 3, "label": "AAA"},
{"from_x": 6.3, "to_x": 8, "label": "BBB"}
]
},
"data": {"name": "test_data"},
"layer": [
{
"data": {"name": "highlight_data"},
"transform": [
{"calculate": "(datum.from_x + datum.to_x) / 2", "as": "mean_x"}
],
"layer": [
{
"mark": {"type": "rect", "opacity": 0.3},
"encoding": {
"x": {"field": "from_x", "type": "quantitative"},
"x2": {"field": "to_x"},
"color": {"value": "#fcfc00"}
}
},
{
"mark": {"type": "text", "dy": { "expr": "-height + (height/2) -10"}},
"encoding": {
"text": {"field": "label"},
"x": {"field": "mean_x", "type": "quantitative"}
}
}
]
},
{
"mark": {"type": "line"},
"encoding": {
"x": {"field": "x", "type": "quantitative", "title": "X Label"},
"y": {"field": "y", "type": "quantitative", "title": "Y Label"}
}
}
],
"title": "Title",
"width": 800,
"height": 500
}

How to Annotate a Line in line Chart in Vega-Lite?

How to annotate a line in line chart in vega-lite
For the below code https://vega.github.io/editor/#/
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"layer": [
{
"data": {"url": "data/stocks.csv"},
"mark": "line",
"encoding": {
"x": {"field": "date", "type": "temporal"},
"y": {"field": "price", "type": "quantitative"},
"color": {"field": "symbol", "type": "nominal"}
}
},
{
"data": {"values": [{}]},
"mark": {"type": "rule", "strokeDash": [2, 2], "size": 2},
"encoding": {"x": {"datum": {"year": 2006}}}
}
]
}
We get plot
If I want to annotate the line at a specific position like (2004,400)
I tried this, it is working, but I don't want to pass hardcoded values like "a": 2004, "b": 400,
{
"data": {
"values": [
{"a": 2004, "b": 400}
]
},
"mark": {"type": "text", "fontSize" : 16, "fontWeight":"bold", "align" : "left"},
"encoding": {
"text": {"value": "Optimum"},
"x": {"field": "a", "type": "quantitative", "title":""},
"y": {"field": "b", "type": "quantitative", "title":""}
}
},
How to pass specific values from the data like average value of date (say:2004) and average value of price (say:400)?
or
just next to the line in the middle of y-axis
Transform and Aggregate Worked, I needed only for Y axis average position, so below code worked good. For another axis we can use same transform.
{
"mark": {"type": "text", "fontSize" : 16, "fontWeight":"bold", "align" : "left"},
"transform": [
{
"aggregate": [{
"op": "mean",
"field": "price",
"as": "mean_y_axis"
}],
"groupby": ["date"]
}
],
"encoding": {
"text": {"value": "Optimum"},
"x": {"field": "date",
"type": "quantitative"},
"y": {"field": "mean_y_axis",
"type": "quantitative"}
}
}

Does anyone have an explanation as to why this graph is being made on vega?

Am trying to make a graph in vegalite whereby i show the evolution of stock prices overtime. Intuitively this should be very easy however for some reason only two lines seem to get output and they aren't reflective of the stock prices at all. Is there something wrong with my data or am i missing something quite basic?
{"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"title": {
"text": "Cases: UK nations",
"subtitle": [
"New cases by publish date, rolling rate"
],
"subtitleFontStyle": "italic",
"subtitleFontSize": 10,
"anchor": "start",
"color": "black"
},
"background": "whitesmoke",
"width": 800,
"height": 600,
"data": {
"url": "https://raw.githubusercontent.com/andrewsnowdon/andrewsnowdon.github.io/main/graph1megasheet.csv",
"format": {"type": "csv"}},
"layer": [
{
"encoding": {
"x": {"field": "Date", "type": "temporal"},
"y": {"field": "Open", "type": "quantitative"},
"color": {
"field":"Stockname",
"type": "nominal"
}
},
"layer": [
{"mark": "line"},
{
"params": [
{
"name": "label",
"select": {
"type": "point",
"encodings": ["x"],
"nearest": true,
"on": "mouseover"
}
}
],
"mark": "point",
"encoding": {
"opacity": {
"condition": {"param": "label", "empty": false, "value": 1},
"value": 0
}
}
}
]
},
{
"transform": [{"filter": {"param": "label", "empty": true}}],
"layer": [
{
"mark": {"type": "rule", "color": "grey"},
"encoding": {
"x": {"type": "temporal", "field": "Date", "aggregate": "min"}
}
},
{
"encoding": {
"text": {"type": "quantitative", "field": "Open"},
"x": {"type": "temporal", "field": "Date", "title": "Month"},
"y": {
"type": "quantitative",
"field": "Open",
"title": "Price"
}
},
"layer": [
{
"mark": {
"type": "text",
"stroke": "white",
"strokeWidth": 0.5,
"align": "left",
"dx": 5,
"dy": -5
}
},
{
"mark": {"type": "text", "align": "left", "dx": 5, "dy": -5},
"encoding": {"color": {"type": "quantitative"}}
}
]
}
]
}
],
"config": {}
}
Four of your stocks have identical data, so the lines are hidden below the last one drawn. You can see this by faceting your dataset:
{
"data": {
"url": "https://raw.githubusercontent.com/andrewsnowdon/andrewsnowdon.github.io/main/graph1megasheet.csv",
"format": {"type": "csv", "parse": {"Date": "date:'%d/%m/%Y'"}}
},
"mark": "line",
"encoding": {
"x": {"field": "Date", "type": "temporal"},
"y": {"field": "Open", "type": "quantitative"},
"facet": {"field": "Stockname", "type": "nominal", "columns": 3}
}
}
Notice the parse argument to the data format; this is required for correct parsing of your date entries (as mentioned in https://stackoverflow.com/a/70658380/2937831).

Vega Lite: Normalized Stacked Bar Chart + Overlay percentages as text

I have a stacked normalized bar chart similar to this:
https://vega.github.io/editor/#/examples/vega-lite/stacked_bar_normalize
I'm trying to show the related percentages (per bar segment) as text on the bars similar to: https://gist.github.com/pratapvardhan/00800a4981d43a84efdba0c4cf8ee2e1
I tried adding a transform field to calculate the percentages, but still couldn't get it to work after hours of trying.
I'm lost help 🥺
My best try:
{
"description":
"A bar chart showing the US population distribution of age groups and gender in 2000.",
"data": {
"url": "data/population.json"
},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"},
{
"stack": "people",
"offset": "normalize",
"as": ["v1", "v2"],
"groupby": ["age"],
"sort": [{"field": "gender", "order": "descending"}]
}
],
"encoding": {
"y": {
"field": "v1",
"type": "quantitative",
"title": "population"
},
"y2": {"field": "v2"},
"x": {
"field": "age",
"type": "ordinal"
},
"color": {
"field": "gender",
"type": "nominal",
"scale": {
"range": ["#675193", "#ca8861"]
}
}
},
"layer":[
{ "mark": "bar"},
{"mark": {"type": "text", "dx": 0, "dy": 0},
"encoding": {
"color":{"value":"black"},
"text": { "field": "v1", "type": "quantitative", "format": ".1f"}}
}
]
}
You can use a joinaggregate transform to normalize each group, and then use "format": ".1%" to display fractions as percents. Using this, there is no need to manually compute the stack transform; it is simpler to specify the stack via the encoding, as in the example you linked to.
Here is the result (open in editor):
{
"description": "A bar chart showing the US population distribution of age groups and gender in 2000.",
"data": {"url": "data/population.json"},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"},
{
"joinaggregate": [{"op": "sum", "field": "people", "as": "total"}],
"groupby": ["age"]
},
{"calculate": "datum.people / datum.total", "as": "fraction"}
],
"encoding": {
"y": {
"aggregate": "sum",
"field": "people",
"title": "population",
"stack": "normalize"
},
"order": {"field": "gender", "sort": "descending"},
"x": {"field": "age", "type": "ordinal"},
"color": {
"field": "gender",
"type": "nominal",
"scale": {"range": ["#675193", "#ca8861"]}
}
},
"layer": [
{"mark": "bar"},
{
"mark": {"type": "text", "dx": 20, "dy": 0, "angle": 90},
"encoding": {
"color": {"value": "white"},
"text": {"field": "fraction", "type": "quantitative", "format": ".1%"}
}
}
]
}

Is it possible to have columns of donut chart with gray arc indicating the missing part?

Currently, the donut chart assumed that the max value is 100%. What I would like is for the category Chinese to have a percentage of 50% with gray arc indicating the missing 50%. The two other donut charts will behave similarly in their respective column.
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"description": "A simple donut chart with embedded data.",
"data": {
"values": [
{"category": "English", "value": 4},
{"category": "Malay", "value": 6},
{"category": "Chinese", "value": 10}
]
},
"mark": {"type": "arc", "innerRadius": 80},
"encoding": {
"column": {"field": "category"},
"theta": {"field": "value", "type": "quantitative"},
"color": {"field": "category", "type": "nominal"}
},
"view": {"stroke": null}
}
It's a bit more complex, but one way to do this is to use a transform to calculate the total, and then a layer chart to plot that total in the background.
Here is an example (open in editor):
{
"data": {
"values": [
{"category": "English", "value": 4},
{"category": "Malay", "value": 6},
{"category": "Chinese", "value": 10}
]
},
"transform": [
{"joinaggregate": [{"op": "sum", "field": "value", "as": "total"}]}
],
"facet": {"column": {"field": "category"}},
"spec": {
"layer": [
{
"mark": {"type": "arc", "innerRadius": 80, "color": "lightgray"},
"encoding": {"theta": {"field": "total", "type": "quantitative"}}
},
{
"mark": {"type": "arc", "innerRadius": 80},
"encoding": {
"theta": {"field": "value", "type": "quantitative"},
"color": {"field": "category"}
}
}
],
"view": {"stroke": null}
}
}