Cannot impute missing values - vega-lite

Have this image
Given this vega-lite
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": [
{
"timestamp": "2011-04-01T17:06:21.000Z",
"value": 0.44777528325189986
},
{
"timestamp": "2011-04-02T17:06:21.000Z",
"value": 0.44390285331388984
},
{
"timestamp": "2011-04-03T17:06:21.000Z",
"value": 0.44813958999449255
},
{
"timestamp": "2011-04-04T17:06:21.000Z",
"value": 0.4440416510172272
},
{
"timestamp": "2011-04-05T17:06:21.000Z",
"missing": "NO value KEY HERE!"
},
{
"timestamp": "2011-04-06T17:06:21.000Z",
"value": 0.3797480270068858
},
{
"timestamp": "2011-04-07T17:06:21.000Z",
"value": 0.31955288375970203
},
{
"timestamp": "2011-04-08T17:06:21.000Z",
"value": 0.3171368880067786
},
{
"timestamp": "2011-04-10T17:06:21.000Z",
"value": 0.30021395605134893
},
{
"timestamp": "2011-04-11T17:06:21.000Z",
"value": 0.3130485242947531
}
]
},
"encoding": {"y": {"field": "timestamp", "type": "temporal", "sort": "ascending"}},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "value",
"sort": null,
"type": "quantitative",
"axis": {"orient": "top"},
"impute": {"keyvals": ["value"], "method": "mean", "frame": [-5, 5]}
}
}
}
]
}
But I thought the impute line would cause it to fill that gap in the data:
"impute": {"keyvals": ["value"], "method": "mean", "frame": [-5, 5]}
Have tried many permutations of this, including:
changing keyvals to ["timestamp"]
Moving the impute line to inside the "encoding": {"y": ... definition
#2 but also switch keyvals to ["value"]
None of those seem to be working
Update
Also tried an impute in transform, and that doesn't work either:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": [
...
]
},
"transform": [
{
"impute": "value",
"key": "timestamp",
"frame": [-1, 1],
"method": "mean"
}
],
"encoding": {"y": {"field": "timestamp", "type": "temporal", "sort": "ascending"}},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "value",
"sort": null,
"type": "quantitative",
"axis": {"orient": "top"}
}
}
}
]
}
Update 2
Here's something that almost feels like progress, but doesn't behave how I would expect. This is the exact same data with the "transform" : [ "impute" : { ... approach, but now it's displaying imputed_value_value (which by the way is never mentioned in the docs) instead of value:
It does successfully impute, but it imputes (averages) everything, when I only want it to impute places with missing data. Is this how impute is supposed to work?
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"values": [
...
]
},
"transform": [
{
"impute": "value",
"key": "timestamp",
"frame": [-5, 5],
"method": "mean"
}
],
"encoding": {"y": {"field": "timestamp", "type": "temporal", "sort": "ascending"}},
"layer": [
{
"mark": {"type": "line", "interpolate": "cardinal"},
"encoding": {
"x": {
"field": "imputed_value_value",
"sort": null,
"type": "quantitative",
"axis": {"orient": "top"},
}
}
}
]
}

Related

Error: Undefined data set name: "source_1" in Vega-Lite interactive charts

I am making an interactive dashboard using Vega-Lite. The end graph has to look like this:
Vega-Lite Dashboard
The code I have so far works perfectly fine for the bar chart and map, but when I draw the histogram it gives following error:
Undefined data set name: "source_1"
This is the code I have so far:
"$schema": "https://vega.github.io/schema/vega-lite/v5.json"
"title": {
"text": "Exploring Irish",
"anchor": "middle",
"fontSize": 20,
"offset": 20,
"color": "brown"
},
"vconcat":[
{ "hconcat": [
{
"width": 500,
"height": 700,
"projection": {
"type": "conicConformal"
},
"layer": [
{
"data": {
"url": "https://gist.githubusercontent.com/carsonfarmer/9791524/raw/b27ca0d78d46a84664fe7ef709eed4f7621f7a25/irish-counties-segmentized.topojson",
"format": {
"type": "topojson",
"feature": "counties"
}
},
"transform": [{
"lookup": "id",
"from": {
"data": {"url": "https://raw.githubusercontent.com/colmr/vis_class/master/FakeAttractionDetails.csv"},
"key": "County",
"fields": ["Population"]
}
}],
"mark": {
"type": "geoshape",
"stroke": "white",
"fill":"#ccc"
}
},
{
"data": {
"url": "https://raw.githubusercontent.com/colmr/vis_class/master/FakeAttractionDetails.csv"
},
"mark": "circle",
"params": [{
"name": "Attrac",
"select": {"type": "point", "fields": ["Type"]},
"bind": "legend"
}],
"encoding": {
"longitude": {
"field": "Longitude",
"type": "quantitative"
},
"latitude": {
"field": "Latitude",
"type": "quantitative"
},
"color":{"field":"Type", "type":"nominal", "scale": {"range": ["#E69F00", "#0072B2", "#CC79A7","#009E73","#56B4E9"]}},
"size": {"value": 40},
"opacity": {"condition": {"param": "Attrac", "value": 1},
"value": 0},
"tooltip": [
{"field": "Name", "type": "nominal", "title": "Accommodation"},
{"field": "Type", "type": "nominal", "title": "Property Type"},
{"field": "Telephone", "type": "nominal", "title": "Contact"}
],
"href": {"field": "Url", "type": "nominal"}
}
}
]
},
{
"data": {
"url": "https://raw.githubusercontent.com/colmr/vis_class/master/FakeAttractionDetails.csv"
},
"width": 335,
"height": 700,
"mark": "bar",
"params": [{
"name": "Attrac",
"select": {"type": "point", "fields": ["Type"]},
"bind": "legend"
},
{ "name": "Attrac",
"select": {"type": "point", "encodings": ["y"]}
}
],
"encoding": {
"y": {
"field": "AddressRegion",
"type": "nominal",
"sort": {
"op": "count",
"field": "Type",
"order": "descending"
},
"axis":{"title":null, "labelFontSize": 15}
},
"x": {
"field": "Type",
"type": "nominal",
"aggregate":"count",
"axis":{"title":"Total Accommodations", "titleFontSize":15}
},
"color":{"field":"Type", "type":"nominal",
"scale": {"range": ["#E69F00", "#0072B2", "#CC79A7","#009E73","#56B4E9"]}
},
"opacity": {"condition": {"param": "Attrac", "value": 1},
"value": 0.05},
"order": {"aggregate": "count", "field": "Type", "type": "nominal", "sort": "descending"}
}
}
]
},
{
"data": {"url": "https://raw.githubusercontent.com/colmr/vis_class/master/FakeAttractionDetails.csv"},
"width": 950,
"height": 45,
"mark": "bar",
"params": [{
"name": "Attrac",
"select": {"type": "point", "fields": ["Type"]},
"bind": "legend"},
{ "name": "Attrac",
"select": {"type": "interval", "encodings": ["x"]}
}],
"encoding": {
"x": {
"field": "Popularity",
"bin": true,
"type": "quantitative"
},
"y": {"aggregate": "count"},
"color":{"field":"Type", "type":"nominal",
"scale": {"range": ["#E69F00", "#0072B2", "#CC79A7","#009E73","#56B4E9"]}},
"opacity": {"condition": {"param": "Attrac", "value": 1},
"value": 0.02}
}
}
],
"config": {
"legend": {
"orient":"top-left", "labelFontSize":15, "titleFontSize":15
}, "tick": {"thickness": 1.5, "bandSize": 18}
}
}
I tried making the histogram with another csv file and that worked fine. Any idea what is wrong with this dataset or this code?
It looks like there's some issue with specifying the same data URL in multiple places in the chart. If you move the data specification to the top level, it appears to work:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"url": "https://raw.githubusercontent.com/colmr/vis_class/master/FakeAttractionDetails.csv"
},
...

Axis label dissappear when adding zooming (Vega-Lite)

I try to show timeseries data as point charts with two x-axis labels (2 text marks as my main x-axis attribute should not be displayed), mainly one at the top and one at the bottom. This works with a layered approach but as soon as I add the zoomable parameter to the visual, the text mark for the axes labels disappear. Is there a way on how to solve this issue?
That's how the visual looks like - without adding the zooming feature:
Timeseries point visual with two measure attributes and top and bottom x-axis label
What I’ve tried so far
I tried to position the params at different positions in the code as I am also using a vertical rule but it did not work out.
I also tried to make use of the scale resolve but I was neither successful.
Within the resolve, I tried to make use of the labelBound axis information and set it to false.
Basically, here is the code that I am currently using
{
"data": {
"name": "dataset"
},
"encoding": {
"x": {
"field": "TIMESTAMP",
"timeUnit": "utcyearmonthdatehoursminutes",
"type": "ordinal",
"axis": {
"grid": false,
"title": null,
"orient": "bottom",
"labels": false
}
}
},
"vconcat": [
{
"hconcat": [
{
"layer": [
{
"transform": [
{
"fold": [
"ATTRIBUTE1",
"ATTRIBUTE2"
],
"as": [
"measure1",
"temp1"
]
}
],
"mark": {
"type": "point",
"filled": true,
"size": 20
},
"height": 150,
"width": 700,
"encoding": {
"x": {
"timeUnit": "utcyearmonthdatehoursminutes",
"field": "TIMESTAMP",
"type": "ordinal",
"axis": {
"title": null,
"labels": false,
"ticks": false
}
},
"y": {
"field": "temp1",
"type": "quantitative",
"axis": {
"title": null
},
"scale": {
"zero": false,
"domain": [
450,
490
]
}
},
"color": {
"field": "measure1",
"type": "nominal",
"legend": {
"title": "Measures",
"orient": "right"
}
},
"opacity": {
"condition": [
{
"param": "legendhighlight",
"value": 1,
"empty": false
},
{
"param": "hover",
"value": 1,
"empty": false
}
],
"value": 0.1
}
}
},
{
"mark": {
"type": "text",
"align": "left",
"angle": -90,
"fontSize": 10
},
"encoding": {
"x": {
"timeUnit": "utcyearmonthdatehoursminutes",
"field": "TIMESTAMP",
"type": "ordinal",
"axis": {
"title": null,
"labels": false
}
},
"text": {
"field": "Attribute_TopX"
},
"y": {
"value": -5
},
"color": {
"condition": [
{
"test": "datum['COLORATTRIBUTE']=='COLOR_ITEM1'",
"value": "green"
},
{
"test": "datum['COLORATTRIBUTE']=='COLOR_ITEM2'",
"value": "steelblue"
}
],
"value": "black"
}
}
},{
"mark": {
"type": "text",
"align": "right",
"angle": -90,
"fontSize": 10
},
"encoding": {
"x": {
"timeUnit": "utcyearmonthdatehoursminutes",
"field": "TIMESTAMP",
"type": "ordinal",
"axis": {
"title": null,
"labels": false
}
},
"text": {
"field": "Attribute_BottomX"
},
"y": {
"value": "height"
},
"color": {
"condition": [
{
"test": "datum['COLORATTRIBUTE']=='COLOR_ITEM1'",
"value": "green"
},
{
"test": "datum['COLORATTRIBUTE']=='COLOR_ITEM2'",
"value": "steelblue"
}
],
"value": "black"
}
}
},
{
"mark": "rule",
"encoding": {
"x": {
"field": "TIMESTAMP",
"type": "temporal"
},
"opacity": {
"condition": [
{
"param": "hover",
"value": 0.8,
"empty": false
}
],
"value": 0
},
"size": {
"value": 1
},
"params": [
{
"name": "hover",
"select": {
"type": "point",
"encodings": [
"x"
],
"nearest": true,
"on": "mouseover"
}
},
{
"name": "legendhighlight",
"select": {
"type": "point",
"fields": [
"measure1"
]
},
"bind": "legend"
}
]
}
]
},
{
"layer": [
{
"transform": [
{
"fold": [
"ATTRIBUTE1",
"ATTRIBUTE2"
],
"as": [
"measure1",
"temp1"
]
}
],
"mark": {
"type": "boxplot"
},
"height": 150,
"width": 100,
"encoding": {
"x": {
"field": "measure1",
"type": "nominal",
"axis": {
"labels": false,
"ticks": false,
"title": null
}
},
"y": {
"field": "temp1",
"type": "quantitative",
"axis": {
"labels": false,
"ticks": false,
"title": null
},
"scale": {
"zero": false
}
},
"color": {
"field": "measure1",
"type": "nominal",
"legend": null
}
}
}
]
}
]
}
],
"resolve": {
"scale": {
"y": "independent",
"x": "shared",
"color": "independent"
}
}
}
And here is the params code that I try to add using Vega-Lite v5:
"params": [
{
"name": "grid",
"select": "interval",
"bind": "scales"
}
],
Thank you for your help!

How to filter data in vega-lite?

I have a following code for line plot, I am not sure how to use the filter transform, I have the mark and encoding inside a layer to use the tooltip for the plot
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.4.json",
"title": "Dashboard",
"data": {
"url" : {
"%context%": true,
"index": "paytrans",
"body": {
"size":10000,
"_source": ["Metrics","Value","ModelName"],
}
}
"format": {"property": "hits.hits"},
},
"layer": [
{
"mark": {
"type": "line",
"point": true
},
"encoding": {
"x": {"field": "_source.ModelName",
"type": "ordinal",
"title":"Models"
"axis": {
"labelAngle": 0
}
},
"y": {"field": "_source.Value", "type": "quantitative", "title":"Metric Score"
"scale": { "domain": [0.0, 1.0] }},
"color": {"field": "_source.Metrics", "type": "nominal", "title":"Metrics"},
"tooltip": [
{"field": "_source.Metrics", "type": "nominal", "title":"Metric"},
{"field": "_source.Value", "type": "quantitative", "title":"Value"}
]
}
}
]
}
If I add
"transform": [
{
"filter": "datum.Value <= 0.5"
}
],
Its not working, may I how to filter the Value Field
It appears that you don't have a field named Value; you have a field named _source.Value. So the correct way to filter would be:
"transform": [
{
"filter": "datum._source.Value <= 0.5"
}
],

How to highlight the zoomed in bar and know the details of that bar in vega-lite?

I am able to create the overview details bar graph and zoomed-in nicely when I select the range in the bottom graph. But I am having difficulty to highlight the bars in the zoomed-in graph and also know what bars have been selected.
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"actions": false,
"data": {
"values": [
{ "created": 1595053277243 },
{ "created": 1595053277244 },
{ "created": 1595055277243 },
{ "created": 1594880606860 },
{ "created": 1594880604261 }
]
},
"vconcat": [{
"width": 1500,
"height": 300,
"selection": {
"highlight": {"type": "single", "empty": "none", "on": "mouseover"},
"select": {"type": "multi"}
},
"mark": {
"type": "bar",
"fill": "#4C78A8",
"stroke": "black",
"cursor": "pointer"
},
"encoding": {
"x": {
"field": "created",
"type": "temporal",
"scale": {"domain": {"selection": "brush"}},
"axis": {"title": ""},
"timeUnit": "utcyearmonthdatehoursminutes",
"update": {
"fillOpacity": {
"condition": {"selection": "select", "value": 1},
"value": 0.3
},
"strokeWidth": {
"condition": [
{
"test": {
"and": [
{"selection": "select"},
"length(data(\"select_store\"))"
]
},
"value": 2
},
{"selection": "highlight", "value": 1}
],
"value": 0
}
}
},
"y": {
"field": "created",
"type": "quantitative",
"aggregate": "count"
}
},
"config": {
"scale": {
"bandPaddingInner": 0.2
}
}
}, {
"width": 1500,
"height": 100,
"padding": 10,
"mark": "bar",
"selection": {
"brush": {"type": "interval", "encodings": ["x"]}
},
"encoding": {
"x": {
"field": "created",
"type": "temporal",
"timeUnit": "utcyearmonthdatehours"
},
"y": {
"field": "created",
"type": "quantitative",
"aggregate": "count"
}
}
}]
}
I tried to put the fill-opacity and stroke-width in encoding but doesn't seem to work. I also tried to patch the compiled vega in vega-embed to listen to the bar click event but it doesn't listen to the top (zoomed in) graph.
Example of what I am trying to do
Vega-Lite encodings have no update property. You can specify the features directly in the encoding mapping:
"encoding": {
"x": {
"field": "created",
"type": "temporal",
"scale": {"domain": {"selection": "brush"}},
"axis": {"title": ""},
"timeUnit": "utcyearmonthdatehoursminutes"
},
"fillOpacity": {
"condition": {"selection": "select", "value": 1},
"value": 0.3
},
"strokeWidth": {
"condition": [
{
"test": {
"and": [
{"selection": "select"},
"length(data(\"select_store\"))"
]
},
"value": 2
},
{"selection": "highlight", "value": 1}
],
"value": 0
},
"y": {"field": "created", "type": "quantitative", "aggregate": "count"}
}
Open the Chart in the Vega Editor

Vega lite select N number of objects (count)

I just started using Vega lite and was wondering how to cut out everything after my 10th object (I have thousands of rows and am just interested in the top 10).
This is what I have so far:
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"data": {
"url": "https://raw.githubusercontent.com/DanStein91/Info-vis/master/anage.csv",
"format": {
"type": "csv"
}
},
"transform": [
{
"filter": {
"field": "Female_maturity_(days)",
"gt": 0
}
}
],
"title": {
"text": "",
"anchor": "middle"
},
"mark": "bar",
"encoding": {
"y": {
"field": "Common_name",
"type": "nominal",
"sort": {
"op": "mean",
"field": "Female_maturity_(days)",
"order": "descending"
}
},
"x": {
"field": "Female_maturity_(days)",
"type": "quantitative"
}
},
"config": {}
}
You can follow the Filtering Top K Items example from the documentation. The result looks something like this (view in vega editor):
{
"data": {
"url": "https://raw.githubusercontent.com/DanStein91/Info-vis/master/anage.csv",
"format": {"type": "csv", "parse": {"Female_maturity_(days)": "number"}}
},
"transform": [
{
"window": [{"op": "rank", "as": "rank"}],
"sort": [{"field": "Female_maturity_(days)", "order": "descending"}]
},
{"filter": "datum.rank <= 10"}
],
"mark": "bar",
"encoding": {
"y": {
"field": "Common_name",
"type": "nominal",
"sort": {
"op": "mean",
"field": "Female_maturity_(days)",
"order": "descending"
}
},
"x": {"field": "Female_maturity_(days)", "type": "quantitative"}
},
"title": {"text": "", "anchor": "middle"}
}
One note: when doing transforms on CSV data (as opposed to JSON data), it's important to use format.parse to specify the desired data type for the columns: by default, CSV columns are interpreted as strings, which can cause sorting-based operations to behave in unexpected ways.