Sankey Diagram (Alluvial Diagram) in Vega-lite - vega-lite

Anyone having an idea on how to create Sankey diagrams like those in Vega-lite?
https://observablehq.com/#d3/sankey-diagram
Input would be data like
From | To | Amount

Using the blog post from Yuri Astrakhan, I created the following in Vega.
{
"$schema": "https://vega.github.io/schema/vega/v5.2.json",
"height": 300,
"width": 600,
"data": [
{
"name": "rawData",
"values": [
{"key": {"stk1": "aa", "stk2": "cc"}, "doc_count": 7},
{"key": {"stk1": "aa", "stk2": "bb"}, "doc_count": 4},
{"key": {"stk1": "bb", "stk2": "aa"}, "doc_count": 8},
{"key": {"stk1": "bb", "stk2": "bb"}, "doc_count": 6},
{"key": {"stk1": "bb", "stk2": "cc"}, "doc_count": 3},
{"key": {"stk1": "cc", "stk2": "aa"}, "doc_count": 9}
],
"transform": [
{"type": "formula", "expr": "datum.key.stk1", "as": "stk1"},
{"type": "formula", "expr": "datum.key.stk2", "as": "stk2"},
{"type": "formula", "expr": "datum.doc_count", "as": "size"}
]
},
{
"name": "nodes",
"source": "rawData",
"transform": [
{
"type": "filter",
"expr": "!groupSelector || groupSelector.stk1 == datum.stk1 || groupSelector.stk2 == datum.stk2"
},
{"type": "formula", "expr": "datum.stk1+datum.stk2", "as": "key"},
{"type": "fold", "fields": ["stk1", "stk2"], "as": ["stack", "grpId"]},
{
"type": "formula",
"expr": "datum.stack == 'stk1' ? datum.stk1+' '+datum.stk2 : datum.stk2+' '+datum.stk1",
"as": "sortField"
},
{
"type": "stack",
"groupby": ["stack"],
"sort": {"field": "sortField", "order": "descending"},
"field": "size"
},
{"type": "formula", "expr": "(datum.y0+datum.y1)/2", "as": "yc"}
]
},
{
"name": "groups",
"source": "nodes",
"transform": [
{
"type": "aggregate",
"groupby": ["stack", "grpId"],
"fields": ["size"],
"ops": ["sum"],
"as": ["total"]
},
{
"type": "stack",
"groupby": ["stack"],
"sort": {"field": "grpId", "order": "descending"},
"field": "total"
},
{"type": "formula", "expr": "scale('y', datum.y0)", "as": "scaledY0"},
{"type": "formula", "expr": "scale('y', datum.y1)", "as": "scaledY1"},
{
"type": "formula",
"expr": "datum.stack == 'stk1'",
"as": "rightLabel"
},
{
"type": "formula",
"expr": "datum.total/domain('y')[1]",
"as": "percentage"
}
]
},
{
"name": "destinationNodes",
"source": "nodes",
"transform": [{"type": "filter", "expr": "datum.stack == 'stk2'"}]
},
{
"name": "edges",
"source": "nodes",
"transform": [
{"type": "filter", "expr": "datum.stack == 'stk1'"},
{
"type": "lookup",
"from": "destinationNodes",
"key": "key",
"fields": ["key"],
"as": ["target"]
},
{
"type": "linkpath",
"orient": "horizontal",
"shape": "diagonal",
"sourceY": {"expr": "scale('y', datum.yc)"},
"sourceX": {"expr": "scale('x', 'stk1') + bandwidth('x')"},
"targetY": {"expr": "scale('y', datum.target.yc)"},
"targetX": {"expr": "scale('x', 'stk2')"}
},
{
"type": "formula",
"expr": "range('y')[0]-scale('y', datum.size)",
"as": "strokeWidth"
},
{
"type": "formula",
"expr": "datum.size/domain('y')[1]",
"as": "percentage"
}
]
}
],
"scales": [
{
"name": "x",
"type": "band",
"range": "width",
"domain": ["stk1", "stk2"],
"paddingOuter": 0.05,
"paddingInner": 0.95
},
{
"name": "y",
"type": "linear",
"range": "height",
"domain": {"data": "nodes", "field": "y1"}
},
{
"name": "color",
"type": "ordinal",
"range": "category",
"domain": {"data": "rawData", "field": "stk1"}
},
{
"name": "stackNames",
"type": "ordinal",
"range": ["Source", "Destination"],
"domain": ["stk1", "stk2"]
}
],
"axes": [
{
"orient": "bottom",
"scale": "x",
"encode": {
"labels": {
"update": {"text": {"scale": "stackNames", "field": "value"}}
}
}
},
{"orient": "left", "scale": "y"}
],
"marks": [
{
"type": "path",
"name": "edgeMark",
"from": {"data": "edges"},
"clip": true,
"encode": {
"update": {
"stroke": [
{
"test": "groupSelector && groupSelector.stack=='stk1'",
"scale": "color",
"field": "stk2"
},
{"scale": "color", "field": "stk1"}
],
"strokeWidth": {"field": "strokeWidth"},
"path": {"field": "path"},
"strokeOpacity": {
"signal": "!groupSelector && (groupHover.stk1 == datum.stk1 || groupHover.stk2 == datum.stk2) ? 0.9 : 0.3"
},
"zindex": {
"signal": "!groupSelector && (groupHover.stk1 == datum.stk1 || groupHover.stk2 == datum.stk2) ? 1 : 0"
},
"tooltip": {
"signal": "datum.stk1 + ' → ' + datum.stk2 + ' ' + format(datum.size, ',.0f') + ' (' + format(datum.percentage, '.1%') + ')'"
}
},
"hover": {"strokeOpacity": {"value": 1}}
}
},
{
"type": "rect",
"name": "groupMark",
"from": {"data": "groups"},
"encode": {
"enter": {
"fill": {"scale": "color", "field": "grpId"},
"width": {"scale": "x", "band": 1}
},
"update": {
"x": {"scale": "x", "field": "stack"},
"y": {"field": "scaledY0"},
"y2": {"field": "scaledY1"},
"fillOpacity": {"value": 0.6},
"tooltip": {
"signal": "datum.grpId + ' ' + format(datum.total, ',.0f') + ' (' + format(datum.percentage, '.1%') + ')'"
}
},
"hover": {"fillOpacity": {"value": 1}}
}
},
{
"type": "text",
"from": {"data": "groups"},
"interactive": false,
"encode": {
"update": {
"x": {
"signal": "scale('x', datum.stack) + (datum.rightLabel ? bandwidth('x') + 8 : -8)"
},
"yc": {"signal": "(datum.scaledY0 + datum.scaledY1)/2"},
"align": {"signal": "datum.rightLabel ? 'left' : 'right'"},
"baseline": {"value": "middle"},
"fontWeight": {"value": "bold"},
"text": {
"signal": "abs(datum.scaledY0-datum.scaledY1) > 13 ? datum.grpId : ''"
}
}
}
},
{
"type": "group",
"data": [
{
"name": "dataForShowAll",
"values": [{}],
"transform": [{"type": "filter", "expr": "groupSelector"}]
}
],
"encode": {
"enter": {
"xc": {"signal": "width/2"},
"y": {"value": 30},
"width": {"value": 80},
"height": {"value": 30}
}
},
"marks": [
{
"type": "group",
"name": "groupReset",
"from": {"data": "dataForShowAll"},
"encode": {
"enter": {
"cornerRadius": {"value": 6},
"fill": {"value": "#f5f5f5"},
"stroke": {"value": "#c1c1c1"},
"strokeWidth": {"value": 2},
"height": {"field": {"group": "height"}},
"width": {"field": {"group": "width"}}
},
"update": {"opacity": {"value": 1}},
"hover": {"opacity": {"value": 0.7}}
},
"marks": [
{
"type": "text",
"interactive": false,
"encode": {
"enter": {
"xc": {"field": {"group": "width"}, "mult": 0.5},
"yc": {
"field": {"group": "height"},
"mult": 0.5,
"offset": 2
},
"align": {"value": "center"},
"baseline": {"value": "middle"},
"fontWeight": {"value": "bold"},
"text": {"value": "Show All"}
}
}
}
]
}
]
},
{
"type": "rect",
"from": {"data": "nodes"},
"encode": {
"enter": {
"stroke": {"value": "#000"},
"strokeWidth": {"value": 2},
"width": {"scale": "x", "band": 1},
"x": {"scale": "x", "field": "stack"},
"y": {"field": "y0", "scale": "y"},
"y2": {"field": "y1", "scale": "y"}
}
}
}
],
"signals": [
{
"name": "groupHover",
"value": {},
"on": [
{
"events": "#groupMark:mouseover",
"update": "{stk1:datum.stack=='stk1' && datum.grpId, stk2:datum.stack=='stk2' && datum.grpId}"
},
{"events": "mouseout", "update": "{}"}
]
},
{
"name": "groupSelector",
"value": false,
"on": [
{
"events": "#groupMark:click!",
"update": "{stack:datum.stack, stk1:datum.stack=='stk1' && datum.grpId, stk2:datum.stack=='stk2' && datum.grpId}"
},
{
"events": [
{"type": "click", "markname": "groupReset"},
{"type": "dblclick"}
],
"update": "false"
}
]
}
]
}

It's a little bit dated, but this blog post shows how to do this kind of 'Sankey' diagram within Kibana using the 'Vega' visualisations.
https://www.elastic.co/blog/sankey-visualization-with-vega-in-kibana
I've played with it a bit a few years ago; it's possible certainly, and rewarding, but that complexity does come at a cost.

Related

how to align rule on x axis with line chart data in vega-lite

I am getting this chart where rule is not correctly aligned on x axis.
group a has value 3.2 and it is not between 2 and 4 on x axis as supposed to. same for b group and c group.
I tried resolve, scale separately line chart and rule without success
Result should be that rule for group a, group b and group c lie on axis x aligned the value they have.
Here is my code:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width": 606,
"height":384,
"data": {
"values": [
{
"datapoints": {
"division": [
{
"participation_duration": {
"total": {
"mean": 384,
"a": 190,
"b": 365,
"c": 495,
"distribution": {
"min": 0,
"max": 10,
"step": 2,
"data": [0,0.3,0.1,0.2,0.2, 0]
}
}
}
}
]
}
}
]
},
"transform": [
{ "calculate": "datum.datapoints.division", "as": "D" },
{ "flatten": ["D"] },
{"calculate": "datum.D.participation_duration.total.distribution.data", "as": "y"},
{"flatten": ["y"]},
{
"window": [{ "op": "count", "field": "y", "as": "i" }],
"frame": [null, 0]
},
{ "calculate": "(datum.i-1)*2", "as": "x" }
],
"layer": [
{
"mark": {"type":"line",
"point": false,
"interpolate": "cardinal",
"color":"blueviolet"
},
"encoding": {
"x": {
"field": "x",
"type": "quantitative",
"title": "",
"axis":{
"tickCount": 5,
"grid": true
}
},
"y": {
"scale":{"domain":[0,0.3]},
"field": "y",
"type": "quantitative",
"title": "",
"axis": {
"orient": "right",
"tickCount": 3,
"format": "%"
}
}
}
},
{
"transform": [
{"calculate": "datum.D.participation_duration.total", "as": "total"},
{"calculate": "datum.total.a/60", "as": "a group"},
{"calculate": "datum.total.b/60", "as": "b group"},
{"calculate": "datum.total.c/60", "as": "c group"},
{"fold": ["a group", "b group", "c group"]}
],
"encoding": {
"x":{
"field": "value",
"title":null,
"axis":{
"format":".2",
"grid": false,
"domain": false,
"labels":true,
"ticks": false,
"labelAlign":"center",
"labelAngle":0,
"labelPadding": 15
}
},
"color":{
"field": "key",
"legend":null
},
"text": {"field":"key"}
},
"layer": [
{
"mark":{"type":"rule",
"strokeWidth": 5,
"strokeDash":[3,16]
}},
{
"mark":{"type": "text", "align": "center", "dy":-220}
}
]
}
]
}````
Please advise which part is missing.
Thank you
You mean like this?
You need to explicitly set the type.
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width": 606,
"height": 384,
"data": {
"values": [
{
"datapoints": {
"division": [
{
"participation_duration": {
"total": {
"mean": 384,
"a": 190,
"b": 365,
"c": 495,
"distribution": {
"min": 0,
"max": 10,
"step": 2,
"data": [0, 0.3, 0.1, 0.2, 0.2, 0]
}
}
}
}
]
}
}
]
},
"transform": [
{"calculate": "datum.datapoints.division", "as": "D"},
{"flatten": ["D"]},
{
"calculate": "datum.D.participation_duration.total.distribution.data",
"as": "y"
},
{"flatten": ["y"]},
{"window": [{"op": "count", "field": "y", "as": "i"}], "frame": [null, 0]},
{"calculate": "(datum.i-1)*2", "as": "x"}
],
"layer": [
{
"mark": {
"type": "line",
"point": false,
"interpolate": "cardinal",
"color": "blueviolet"
},
"encoding": {
"x": {
"field": "x",
"type": "quantitative",
"title": "",
"axis": {"tickCount": 5, "grid": true}
},
"y": {
"scale": {"domain": [0, 0.3]},
"field": "y",
"type": "quantitative",
"title": "",
"axis": {"orient": "right", "tickCount": 3, "format": "%"}
}
}
},
{
"transform": [
{"calculate": "datum.D.participation_duration.total", "as": "total"},
{"calculate": "datum.total.a/60", "as": "a group"},
{"calculate": "datum.total.b/60", "as": "b group"},
{"calculate": "datum.total.c/60", "as": "c group"},
{"fold": ["a group", "b group", "c group"]}
],
"encoding": {
"x": {
"field": "value",
"title": null,
"type": "quantitative",
"axis": {
"format": ".2",
"grid": false,
"domain": false,
"labels": true,
"ticks": false,
"labelAlign": "center",
"labelAngle": 0,
"labelPadding": 15
}
},
"color": {"field": "key", "legend": null},
"text": {"field": "key"}
},
"layer": [
{"mark": {"type": "rule", "strokeWidth": 5, "strokeDash": [3, 16]}},
{"mark": {"type": "text", "align": "center", "dy": -220}}
]
}
]
}

Error: Undefined data set name: "source_1" in Vega-Lite interactive charts

I am making an interactive dashboard using Vega-Lite. The end graph has to look like this:
Vega-Lite Dashboard
The code I have so far works perfectly fine for the bar chart and map, but when I draw the histogram it gives following error:
Undefined data set name: "source_1"
This is the code I have so far:
"$schema": "https://vega.github.io/schema/vega-lite/v5.json"
"title": {
"text": "Exploring Irish",
"anchor": "middle",
"fontSize": 20,
"offset": 20,
"color": "brown"
},
"vconcat":[
{ "hconcat": [
{
"width": 500,
"height": 700,
"projection": {
"type": "conicConformal"
},
"layer": [
{
"data": {
"url": "https://gist.githubusercontent.com/carsonfarmer/9791524/raw/b27ca0d78d46a84664fe7ef709eed4f7621f7a25/irish-counties-segmentized.topojson",
"format": {
"type": "topojson",
"feature": "counties"
}
},
"transform": [{
"lookup": "id",
"from": {
"data": {"url": "https://raw.githubusercontent.com/colmr/vis_class/master/FakeAttractionDetails.csv"},
"key": "County",
"fields": ["Population"]
}
}],
"mark": {
"type": "geoshape",
"stroke": "white",
"fill":"#ccc"
}
},
{
"data": {
"url": "https://raw.githubusercontent.com/colmr/vis_class/master/FakeAttractionDetails.csv"
},
"mark": "circle",
"params": [{
"name": "Attrac",
"select": {"type": "point", "fields": ["Type"]},
"bind": "legend"
}],
"encoding": {
"longitude": {
"field": "Longitude",
"type": "quantitative"
},
"latitude": {
"field": "Latitude",
"type": "quantitative"
},
"color":{"field":"Type", "type":"nominal", "scale": {"range": ["#E69F00", "#0072B2", "#CC79A7","#009E73","#56B4E9"]}},
"size": {"value": 40},
"opacity": {"condition": {"param": "Attrac", "value": 1},
"value": 0},
"tooltip": [
{"field": "Name", "type": "nominal", "title": "Accommodation"},
{"field": "Type", "type": "nominal", "title": "Property Type"},
{"field": "Telephone", "type": "nominal", "title": "Contact"}
],
"href": {"field": "Url", "type": "nominal"}
}
}
]
},
{
"data": {
"url": "https://raw.githubusercontent.com/colmr/vis_class/master/FakeAttractionDetails.csv"
},
"width": 335,
"height": 700,
"mark": "bar",
"params": [{
"name": "Attrac",
"select": {"type": "point", "fields": ["Type"]},
"bind": "legend"
},
{ "name": "Attrac",
"select": {"type": "point", "encodings": ["y"]}
}
],
"encoding": {
"y": {
"field": "AddressRegion",
"type": "nominal",
"sort": {
"op": "count",
"field": "Type",
"order": "descending"
},
"axis":{"title":null, "labelFontSize": 15}
},
"x": {
"field": "Type",
"type": "nominal",
"aggregate":"count",
"axis":{"title":"Total Accommodations", "titleFontSize":15}
},
"color":{"field":"Type", "type":"nominal",
"scale": {"range": ["#E69F00", "#0072B2", "#CC79A7","#009E73","#56B4E9"]}
},
"opacity": {"condition": {"param": "Attrac", "value": 1},
"value": 0.05},
"order": {"aggregate": "count", "field": "Type", "type": "nominal", "sort": "descending"}
}
}
]
},
{
"data": {"url": "https://raw.githubusercontent.com/colmr/vis_class/master/FakeAttractionDetails.csv"},
"width": 950,
"height": 45,
"mark": "bar",
"params": [{
"name": "Attrac",
"select": {"type": "point", "fields": ["Type"]},
"bind": "legend"},
{ "name": "Attrac",
"select": {"type": "interval", "encodings": ["x"]}
}],
"encoding": {
"x": {
"field": "Popularity",
"bin": true,
"type": "quantitative"
},
"y": {"aggregate": "count"},
"color":{"field":"Type", "type":"nominal",
"scale": {"range": ["#E69F00", "#0072B2", "#CC79A7","#009E73","#56B4E9"]}},
"opacity": {"condition": {"param": "Attrac", "value": 1},
"value": 0.02}
}
}
],
"config": {
"legend": {
"orient":"top-left", "labelFontSize":15, "titleFontSize":15
}, "tick": {"thickness": 1.5, "bandSize": 18}
}
}
I tried making the histogram with another csv file and that worked fine. Any idea what is wrong with this dataset or this code?
It looks like there's some issue with specifying the same data URL in multiple places in the chart. If you move the data specification to the top level, it appears to work:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"data": {
"url": "https://raw.githubusercontent.com/colmr/vis_class/master/FakeAttractionDetails.csv"
},
...

How to change Shape and Color of points in Vega Scatter Plot?

I have a scatter plot generated with contours code in Vega.
Plot looks like
Based on 3rd field, differentiated the points with colour as Blue and Green Dots, but couldn't see the difference very clearly.
Is it possible to Change the Shape and Colour (atleast one) of the points to make the difference more visible
Code
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"title": {
"text": "Outlier Distribution between Duration Vs Age",
"anchor": "middle",
"fontSize": 16,
"frame": "group",
"offset": 4
},
"signals": [
{
"name": "bandwidth", "value": 0.5,
"bind": {"input": "range", "min": -1, "max": 1, "step": 0.1}
}
],
"data": [
{
"name": "source",
"url" : {
"index": "tenant_id.model_training_artefact",
"body": {
"size":10000,
"_source": ["duration", "credit_amount", "asnm", "age", "outlier"]
}
}
"format": {"property": "hits.hits"},
},
{
"name": "density",
"source": "source",
"transform": [
{
"type": "kde2d",
"groupby": ["_source.outlier"],
"size": [{"signal": "width"}, {"signal": "height"}],
"x": {"expr": "scale('x', datum.duration)"},
"y": {"expr": "scale('y', datum.age)"},
"bandwidth": {"signal": "[bandwidth, bandwidth]"}
}
]
},
{
"name": "contours",
"source": "density",
"transform": [
{
"type": "isocontour",
"field": "grid",
"levels": 4
}
]
}
],
"scales": [
{
"name": "x",
"type": "linear",
"round": true,
"nice": true,
"zero": true,
"domain": {"data": "source", "field": "_source.duration"},
"range": "width"
},
{
"name": "y",
"type": "linear",
"round": true,
"nice": true,
"zero": true,
"domain": {"data": "source", "field": "_source.age"},
"range": "height"
},
{
"name": "color",
"type": "ordinal",
"domain": {
"data": "source", "field": "_source.outlier",
"sort": {"order": "descending"}
},
"range": "category"
}
],
"axes": [
{
"scale": "x",
"grid": true,
"domain": false,
"orient": "bottom",
"tickCount": 5,
"title": "Duration"
},
{
"scale": "y",
"grid": true,
"domain": false,
"orient": "left",
"titlePadding": 5,
"title": "Age"
}
],
"legends": [
{"stroke": "color", "symbolType": "stroke"}
],
"marks": [
{
"name": "marks",
"type": "symbol",
"from": {"data": "source"},
"encode": {
"update": {
"x": {"scale": "x", "field": "_source.duration"},
"y": {"scale": "y", "field": "_source.age"},
"size": {"value": 50},
"fill": {"scale": "color" , "field": "_source.outlier"}
}
}
},
{
"type": "image",
"from": {"data": "density"},
"encode": {
"update": {
"x": {"value": 0},
"y": {"value": 0},
"width": {"signal": "width"},
"height": {"signal": "height"},
"aspect": {"value": false}
}
},
"transform": [
{
"type": "heatmap",
"field": "datum.grid",
"color": {"expr": "scale('color', datum.datum.outlier)"}
}
]
},
{
"type": "path",
"clip": true,
"from": {"data": "contours"},
"encode": {
"enter": {
"strokeWidth": {"value": 1},
"strokeOpacity": {"value": 1},
"stroke": {"scale": "color", "field": "outlier"}
}
},
"transform": [
{ "type": "geopath", "field": "datum.contour" }
]
}
]
}
Try defining scales for mark symbol color, shape and size.
For example, assuming your field outlier has 3 possible values:
"scales": [
...
{
"name": "scale_symbol_color",
"type": "ordinal",
"domain": {
"data": "source", "field": "_source.outlier",
"sort": {"order": "descending"}
},
"range": ["red", "orange", "blue"]
},
{
"name": "scale_symbol_shape",
"type": "ordinal",
"domain": {
"data": "source", "field": "_source.outlier",
"sort": {"order": "descending"}
},
"range": ["triangle", "square", "circle"]
},
{
"name": "scale_symbol_size",
"type": "ordinal",
"domain": {
"data": "source", "field": "_source.outlier",
"sort": {"order": "descending"}
},
"range": [400, 300, 200]
}
],
...
"marks": [
{
"name": "marks",
"type": "symbol",
"from": {"data": "source"},
"encode": {
"update": {
"x": {"scale": "x", "field": "_source.duration"},
"y": {"scale": "y", "field": "_source.age"},
"size": {"value": 50},
"fill": {"scale": "scale_symbol_color" , "field": "_source.outlier"},
"shape": {"scale": "scale_symbol_shape" , "field": "_source.outlier"},
"size": {"scale": "scale_symbol_size" , "field": "_source.outlier"},
}
}
},
...
Vega docs:
https://vega.github.io/vega/docs/scales/#ordinal
https://vega.github.io/vega/docs/marks/symbol/

Text overlay in heatmap

Is it possible to overlay text in selectable heatmap
https://vega.github.io/editor/#/examples/vega-lite/selection_heatmap
If I plot a heatmap of actual vs predicted for binary in the selectable heatmap, can I overlay text as True Positives, FP, TN, FN with corresponding values
and turn off legend
Here full code
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.4.json",
"data": {
"values": [
{"actual": "Good", "predicted": "Bad", "count": 6386},
{"actual": "Bad", "predicted": "Good", "count": 1261},
{"actual": "Bad", "predicted": "Bad", "count": 6386},
{"actual": "Good", "predicted": "Good", "count": 24030}
]
},
"mark": {"type": "rect", "strokeWidth": 2},
"encoding": {
"y": {
"field": "actual",
"type": "nominal"
},
"x": {
"field": "predicted",
"type": "nominal"
},
"fill": {
"field": "count",
"type": "quantitative"
},
"config": {
"scale": {
"bandPaddingInner": 0,
"bandPaddingOuter": 0
}
}
}
Output like
Is it possible to extract values from index and feed to count values
"url" : {
"%context%": true,
"index": "index",
"body": {
"size":1000,
"_source": ["modelMetrics"],
}
}
"format": {"property": "hits.hits"}
},
where index has
"_source" : {
"modelMetrics" : {
"TN" : 110868,
"FP" : 6386,
"FN" : 1261,
"TP" : 24030,
}
}
You can do this using a Layered chart construct. For example (view in editor):
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.4.json",
"data": {
"values": [
{"actual": "Good", "predicted": "Bad", "label": "FP", "count": 6386},
{"actual": "Bad", "predicted": "Good", "label": "FN", "count": 1261},
{"actual": "Bad", "predicted": "Bad", "label": "TN", "count": 6386},
{"actual": "Good", "predicted": "Good", "label": "TP", "count": 24030}
]
},
"encoding": {
"y": {"field": "actual", "type": "nominal"},
"x": {"field": "predicted", "type": "nominal"}
},
"layer": [
{
"mark": {"type": "rect", "strokeWidth": 2},
"encoding": {
"fill": {"field": "count", "type": "quantitative", "legend": null}
}
},
{
"mark": {"type": "text", "dy": -5},
"encoding": {
"text": {"field": "label", "type": "nominal"},
"color": {
"condition": {"test": "datum.count < 10000", "value": "black"},
"value": "white"
}
}
},
{
"mark": {"type": "text", "dy": 5},
"encoding": {
"text": {"field": "count", "type": "nominal"},
"color": {
"condition": {"test": "datum.count < 10000", "value": "black"},
"value": "white"
}
}
}
],
"width": 100,
"height": 100
}

Pulling values from a csv file in JSON [SyntaxError: Unexpected token a in JSON at position 0]

I'm using VEGA with the code at the bottom of this post. The code works fine when I manually specify values for a, b, c and d.
Now I'm trying to change the code so that rather than manually specifying the values for a-d, it reads it from a csv file [which looks like this:
[
However my problem is that the graph fails to display any info, instead I get the following error in chrome's console:
This is my configuration file:
{
"$schema": "https://vega.github.io/schema/vega/v4.json",
"width": 400,
"padding": 5,
"autosize": "pad",
"data": [
{
"url": "data/summary3.csv",
"name": "tuples",
"transform": [
{
"type": "aggregate",
"groupby": ["a", "b", "d"],
"fields": ["c"],
"ops": ["average"],
"as": ["c"]
}
]
},
{
"name": "trellis",
"source": "tuples",
"transform": [
{"type": "aggregate", "groupby": ["a"]},
{
"type": "formula",
"as": "span",
"expr": "rangeStep * bandspace(datum.count, innerPadding, outerPadding)"
},
{"type": "stack", "field": "span"},
{"type": "extent", "field": "y1", "signal": "trellisExtent"}
]
}
],
"legends": [
{"fill": "color", "orient": "right", "title": "Objective Categories"}
],
"signals": [
{"name": "rangeStep", "value": 20},
{"name": "innerPadding", "value": 0.1},
{"name": "outerPadding", "value": 0.2},
{"name": "height", "update": "trellisExtent[1]"},
{"name": "colors", "value": ["#1f77b4", "#2ca02c", "#ff7f0e", "#1f77b4"]}
],
"scales": [
{
"name": "xscale",
"domain": {"data": "tuples", "field": "c"},
"nice": true,
"zero": true,
"round": true,
"range": "width"
},
{
"name": "color",
"type": "ordinal",
"range": {"signal": "colors"},
"domain": {"data": "trellis", "field": "a"}
}
],
"axes": [
{
"orient": "bottom",
"scale": "xscale",
"title": "Percent complete",
"ticks": true,
"labels": true,
"grid": true,
"domain": true
}
],
"marks": [
{
"type": "group",
"from": {
"data": "trellis",
"facet": {"name": "faceted_tuples", "data": "tuples", "groupby": "a"}
},
"encode": {
"enter": {"x": {"value": 0}, "width": {"signal": "width"}},
"update": {"y": {"field": "y0"}, "y2": {"field": "y1"}}
},
"scales": [
{
"name": "yscale",
"type": "band",
"paddingInner": {"signal": "innerPadding"},
"paddingOuter": {"signal": "outerPadding"},
"round": true,
"domain": {"data": "faceted_tuples", "field": "b"},
"range": {"step": {"signal": "rangeStep"}}
}
],
"axes": [
{
"orient": "left",
"scale": "yscale",
"ticks": false,
"domain": false,
"labelPadding": 4
}
],
"marks": [
{
"type": "rect",
"from": {"data": "faceted_tuples"},
"encode": {
"enter": {
"x": {"value": 0},
"x2": {"scale": "xscale", "field": "c"},
"fill": {"scale": "color", "field": "a"},
"strokeWidth": {"value": 2},
"tooltip": {"field": "d"},
"hover": {"fill": {"value": "red"}}
},
"update": {
"y": {"scale": "yscale", "field": "b"},
"height": {"scale": "yscale", "band": 1},
"stroke": {"value": null},
"zindex": {"value": 0}
},
"hover": {"stroke": {"value": "firebrick"}, "zindex": {"value": 1}}
}
}
]
}
]
}
Your example works if you add "format": {"type":"csv"} into the spec after line 9, i.e.
...
"name": "tuples",
"format": {"type":"csv"},
"transform": [
{
...
The error message is saying it's trying to parse the csv file as json. I'm not sure why it doesn't auto detect from the file extension like it does in vega lite, but it looks from the docs like this is expected behaviour:
https://vega.github.io/vega/docs/data/#format