How to use zero=false in vega-lite when also using a color encoding? - vega-lite

I am trying to figure out how to not have my y-axis start at zero? It works in general for me, but if I add the color encoding (see below) it is not working anymore and instead I get to see the zero.
{
"data": {"name": "d"},
"mark": {"type": "bar"},
"encoding": {
"color": {"type": "nominal", "field": "group"},
"x": {"type": "nominal", "field": "model"},
"y": {
"type": "quantitative",
"field": "inf_f1",
"scale": {"zero": false}
}
},
"$schema": "https://vega.github.io/schema/vega-lite/v4.0.2.json",
"datasets": {
"d": [
{
"model": "lr-bow",
"inf_f1": 0.7991841662090597,
"group" : "A"
},
{
"model": "fcn-bow",
"inf_f1": 0.8220151833558302,
"group" : "B"
}
]
}
}

The reason the scale includes zero is that bars are stacked by default, and each bar has an implicit stacked zero-height bar for the group that does not appear, but does affect the automatically chosen axis limits. You can address this by setting stack to "none" in the y encoding (view in editor):
{
"data": {"name": "d"},
"mark": {"type": "bar"},
"encoding": {
"color": {"type": "nominal", "field": "group"},
"x": {"type": "nominal", "field": "model"},
"y": {
"type": "quantitative",
"field": "inf_f1",
"stack": "none",
"scale": {"zero": false}
}
},
"datasets": {
"d": [
{"model": "lr-bow", "inf_f1": 0.7991841662090597, "group": "A"},
{"model": "fcn-bow", "inf_f1": 0.8220151833558302, "group": "B"}
]
}
}

Related

Align area and line marks to same domain in Vega-Lite

I'm trying to build a line chart with error area in vega lite.
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"},
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"}
],
"params": [
{ "name": "scaleDomain", "expr": "[0, 10]"}
],
"encoding": {
"y": {
"field": "x2",
"type": "ordinal",
"sort": "descending"
}
},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
"scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "l",
"scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
},
"x2": {
"field": "r"
},
"opacity": { "value": 0.3 }
}
}
]
}
So far, it's nice looking. But there's a problem: to get this to work I have had to manually constrain the scale domain for the two marks by setting a param called scaleDomain. This is a problem, because if ever the data changes I need to manually update the domain :/
However, look what would happen if I didn't manually set the scale to the same domain for the area plot and a line plot:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"},
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"}
],
"params": [
{ "name": "scaleDomain", "expr": "[0, 10]"}
],
"encoding": {
"y": {
"field": "x2",
"type": "ordinal",
"sort": "descending"
}
},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
// "scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "l",
// "scale": {"type": "linear", "domain": {"expr": "scaleDomain"}},
"axis": {
"orient": "top"
}
},
"x2": {
"field": "r"
},
"opacity": { "value": 0.3 }
}
}
]
}
Yikes! The area plot gets a bit lost and doesn't track the line.
I can see one of two solutions to this problem:
Shared Scale: Coax the two mark layers to share the same scale
Manually Calculate Scale Domain: Use a parameter or a signal to store the desired domain.
I don't know how to do #1, but it seems like the correct approach. One imagined solution is something like:
"scale": {"align": "shared"},
I tried adding an aggregation to transform, but that of course results in summarizing the whole data set.
"transform": [
{"calculate": "toNumber(datum.x)", "as": "x2"},
{"calculate": "toNumber(datum.y)", "as": "y2"},
{"calculate": "toNumber(datum.CI_left)", "as": "l"},
{"calculate": "toNumber(datum.CI_right)", "as": "r"},
{ "aggregate": [
{
"field": "l",
"op": "min",
"as": "min"
},
{
"field": "r",
"op": "max",
"as": "max"
}
]}
],
It seems like I'd want to somehow put the transform directly into the layer or the params, but it's not clear how to do that.
I have seen these answers (finding max and min from dataset in vega and Post aggregation calculation & filter ##) but I don't know how to use them to achieve this.
You don't need any transforms and scales are automatically shared. Try this:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width":500,
"height":500,
"data": {
"url": "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/data_IC.csv"
},
"encoding": {"y": {"field": "x", "type": "quantitative", "sort": "ascending"}},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "y",
"sort": null,
"type": "quantitative",
"title": "Mean of Miles per Gallon (95% CIs)",
"axis": {"orient": "top"}
}
}
},
{
"mark": {"type": "area", "interpolate": "cardinal"},
"encoding": {
"x": {"field": "CI_left", "type": "quantitative"},
"x2": {"field": "CI_right"},
"opacity": {"value": 0.3}
}
}
]
}

Legend Series Doubled in Line Chart

I've been trying to edit my legend on a line chart to use different symbols for each field in the series. My output on the actual chart is showing correctly, where each series had a different symbol, but my legend duplicates the series, one showing the original shapes with the correct colors, the other showing the correct shapes with the wrong color (just black). Am I missing something about how these properties need to be combined so it's not duplicated? Thanks in advance for the help. Link to online editor
Doubled Legend Series Image
(also how do I get images to just show up in the post??)
Thanks,
Bryan
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {"url": "data/barley.json"},
"layer": [
{
"mark": {"type": "line", "tooltip": true, "interpolate": "linear"},
"encoding": {
"stroke": {"field": "site", "type": "nominal", "legend": null},
"opacity": {
"condition": {
"test": {"field": "__selected__", "equal": "false"},
"value": 0.3
},
"value": 1
}
}
},
{
"mark": {"type": "point"},
"encoding": {
"shape": {"field": "site", "type": "nominal"},
"color": {"field": "site", "type": "nominal"},
"opacity": {
"condition": {
"test": {"field": "__selected__", "equal": "false"},
"value": 0.3
},
"value": 1
}
}
}
],
"encoding": {
"y": {
"field": "variety",
"type": "nominal",
"axis": {"grid": true},
"sort": {
"op": "average",
"field": "All Except Difference",
"order": "descending"
}
},
"x": {
"field": "yield",
"type": "quantitative",
"sort": {
"op": "average",
"field": "All Except Difference",
"order": "descending"
},
"scale": {"zero": false}
}
}
}

how to make marker for tooltip bigger

Here's my wandb vega. The problem is, right now, it is very hard to mouse over my line and get the tooltip to show. It is like you must hover over the exact pixel of the line with your mouse. How do I make the activation radius larger, so my tooltip shows up if I am approximately on top of the point of my line?
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"description": "A plot for an arbitrary number of lines",
"data": {
"name": "wandb"
},
"transform": [
{"filter": {"field": "${field:lineVal}", "valid": true}},
{"filter": {"field": "${field:step}", "valid": true}}
],
"title": "${string:title}",
"layer": [
{
"selection": {
"grid": {
"type": "interval",
"bind": "scales"
}
},
"mark": {"type": "line", "strokeWidth": 5, "interpolate": "linear", "tooltip": true},
"encoding": {
"x":{
"field": "${field:step}",
"type": "quantitative",
"title": "${string:xname}"
},
"y": {
"field": "${field:lineVal}",
"title": "y",
"type": "quantitative"
},
"color": {
"type": "nominal",
"field": "${field:lineKey}"
},
"strokeDash": {
"type": "nominal",
"field": "name"
}
}
}
]
}
You haven't provided the data along with your schema so it is difficult to answer your specific case. However, you should be able to adapt the example code from https://vega.github.io/vega-lite/examples/interactive_multi_line_pivot_tooltip.html to achieve what you want.

Vega Lite: Normalized Stacked Bar Chart + Overlay percentages as text

I have a stacked normalized bar chart similar to this:
https://vega.github.io/editor/#/examples/vega-lite/stacked_bar_normalize
I'm trying to show the related percentages (per bar segment) as text on the bars similar to: https://gist.github.com/pratapvardhan/00800a4981d43a84efdba0c4cf8ee2e1
I tried adding a transform field to calculate the percentages, but still couldn't get it to work after hours of trying.
I'm lost help 🥺
My best try:
{
"description":
"A bar chart showing the US population distribution of age groups and gender in 2000.",
"data": {
"url": "data/population.json"
},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"},
{
"stack": "people",
"offset": "normalize",
"as": ["v1", "v2"],
"groupby": ["age"],
"sort": [{"field": "gender", "order": "descending"}]
}
],
"encoding": {
"y": {
"field": "v1",
"type": "quantitative",
"title": "population"
},
"y2": {"field": "v2"},
"x": {
"field": "age",
"type": "ordinal"
},
"color": {
"field": "gender",
"type": "nominal",
"scale": {
"range": ["#675193", "#ca8861"]
}
}
},
"layer":[
{ "mark": "bar"},
{"mark": {"type": "text", "dx": 0, "dy": 0},
"encoding": {
"color":{"value":"black"},
"text": { "field": "v1", "type": "quantitative", "format": ".1f"}}
}
]
}
You can use a joinaggregate transform to normalize each group, and then use "format": ".1%" to display fractions as percents. Using this, there is no need to manually compute the stack transform; it is simpler to specify the stack via the encoding, as in the example you linked to.
Here is the result (open in editor):
{
"description": "A bar chart showing the US population distribution of age groups and gender in 2000.",
"data": {"url": "data/population.json"},
"transform": [
{"filter": "datum.year == 2000"},
{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"},
{
"joinaggregate": [{"op": "sum", "field": "people", "as": "total"}],
"groupby": ["age"]
},
{"calculate": "datum.people / datum.total", "as": "fraction"}
],
"encoding": {
"y": {
"aggregate": "sum",
"field": "people",
"title": "population",
"stack": "normalize"
},
"order": {"field": "gender", "sort": "descending"},
"x": {"field": "age", "type": "ordinal"},
"color": {
"field": "gender",
"type": "nominal",
"scale": {"range": ["#675193", "#ca8861"]}
}
},
"layer": [
{"mark": "bar"},
{
"mark": {"type": "text", "dx": 20, "dy": 0, "angle": 90},
"encoding": {
"color": {"value": "white"},
"text": {"field": "fraction", "type": "quantitative", "format": ".1%"}
}
}
]
}

Vega lite select N number of objects (count)

I just started using Vega lite and was wondering how to cut out everything after my 10th object (I have thousands of rows and am just interested in the top 10).
This is what I have so far:
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {
"url": "https://raw.githubusercontent.com/DanStein91/Info-vis/master/anage.csv",
"format": {
"type": "csv"
}
},
"transform": [
{
"filter": {
"field": "Female_maturity_(days)",
"gt": 0
}
}
],
"title": {
"text": "",
"anchor": "middle"
},
"mark": "bar",
"encoding": {
"y": {
"field": "Common_name",
"type": "nominal",
"sort": {
"op": "mean",
"field": "Female_maturity_(days)",
"order": "descending"
}
},
"x": {
"field": "Female_maturity_(days)",
"type": "quantitative"
}
},
"config": {}
}
You can follow the Filtering Top K Items example from the documentation. The result looks something like this (view in vega editor):
{
"data": {
"url": "https://raw.githubusercontent.com/DanStein91/Info-vis/master/anage.csv",
"format": {"type": "csv", "parse": {"Female_maturity_(days)": "number"}}
},
"transform": [
{
"window": [{"op": "rank", "as": "rank"}],
"sort": [{"field": "Female_maturity_(days)", "order": "descending"}]
},
{"filter": "datum.rank <= 10"}
],
"mark": "bar",
"encoding": {
"y": {
"field": "Common_name",
"type": "nominal",
"sort": {
"op": "mean",
"field": "Female_maturity_(days)",
"order": "descending"
}
},
"x": {"field": "Female_maturity_(days)", "type": "quantitative"}
},
"title": {"text": "", "anchor": "middle"}
}
One note: when doing transforms on CSV data (as opposed to JSON data), it's important to use format.parse to specify the desired data type for the columns: by default, CSV columns are interpreted as strings, which can cause sorting-based operations to behave in unexpected ways.