Introduction to vegalite

vegalite is an R htmlwidget interface to the Vega-Lite JavaScript visualization library.

What is “Vega” and why “-Lite”? Vega is “a full declarative visualization grammar, suitable for expressive custom interactive visualization design and programmatic generation._”” Vega-Lite ”provides a higher-level grammar for visual analysis, comparable to ggplot or Tableau, that generates complete Vega specifications.”_ Vega-Lite compiles to Vega and is more compact and accessible than Vega. Both are just JSON data files with a particular schema that let you encode the data, encodings and aesthetics for statistical charts.

The following is a gallery of code & examples to help you get started with the package.

library(vegalite)

bar mark

dat <- jsonlite::fromJSON('[
      {"a": "A","b": 28}, {"a": "B","b": 55}, {"a": "C","b": 43},
      {"a": "D","b": 91}, {"a": "E","b": 81}, {"a": "F","b": 53},
      {"a": "G","b": 19}, {"a": "H","b": 87}, {"a": "I","b": 52}
    ]')

vegalite(viewport_height=250) %>%
  view_size(400, 200) %>%
  add_data(dat) %>%
  encode_x("a", "ordinal") %>%
  encode_y("b", "quantitative") %>%
  mark_bar()

point mark

vegalite(viewport_width=400, viewport_height=400) %>%
  view_size(400, 400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/cars.json") %>%
  encode_x("Horsepower", "quantitative") %>%
  encode_y("Miles_per_Gallon", "quantitative") %>%
  mark_point()

circle mark

vegalite(viewport_width=400, viewport_height=400) %>%
  view_size(400, 400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/cars.json") %>%
  encode_x("Horsepower", "quantitative") %>%
  encode_y("Miles_per_Gallon", "quantitative") %>%
  mark_circle()

color and shape

vegalite(viewport_width=400, viewport_height=400) %>%
  view_size(400, 400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/cars.json") %>%
  encode_x("Horsepower", "quantitative") %>%
  encode_y("Miles_per_Gallon", "quantitative") %>%
  encode_color("Origin", "nominal") %>%
  encode_shape("Origin", "nominal") %>%
  mark_point()

size

vegalite(viewport_width=400, viewport_height=400) %>%
  view_size(400, 400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/cars.json") %>%
  encode_x("Horsepower", "quantitative") %>%
  encode_y("Miles_per_Gallon", "quantitative") %>%
  encode_size("Acceleration", "quantitative") %>%
  mark_point()

filtered line

vegalite(viewport_width=400, viewport_height=450) %>%
  view_size(400, 450) %>%
  add_data("https://vega.github.io/vega-editor/app/data/stocks.csv") %>%
  add_filter("datum.symbol==='GOOG'") %>%
  encode_x("date", "temporal") %>%
  encode_y("price", "quantitative") %>%
  mark_line()

ticks

vegalite(viewport_height=200) %>%
  view_size(400, 200) %>%
  add_data("https://vega.github.io/vega-editor/app/data/cars.json") %>%
  encode_x("Horsepower", "quantitative") %>%
  encode_y("Cylinders", "ordinal") %>%
  mark_tick()

multi-series line

vegalite(viewport_height=500) %>%
  view_size(400, 400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/stocks.csv") %>%
  encode_x("date", "temporal") %>%
  encode_y("price", "quantitative") %>%
  encode_color("symbol", "nominal") %>%
  mark_line()

facet col

vegalite(viewport_height=350) %>%
  add_data("https://vega.github.io/vega-editor/app/data/movies.json") %>%
  encode_x("Worldwide_Gross", "quantitative") %>%
  encode_y("US_DVD_Sales", "quantitative") %>%
  facet_col("MPAA_Rating", "ordinal") %>%
  mark_point()

facet row

vegalite(viewport_height=1400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/movies.json") %>%
  encode_x("Worldwide_Gross", "quantitative") %>%
  encode_y("US_DVD_Sales", "quantitative") %>%
  facet_row("MPAA_Rating", "ordinal") %>%
  mark_point()

facet both

vegalite(viewport_height=2900) %>%
  add_data("https://vega.github.io/vega-editor/app/data/movies.json") %>%
  encode_x("Worldwide_Gross", "quantitative") %>%
  encode_y("US_DVD_Sales", "quantitative") %>%
  facet_col("MPAA_Rating", "ordinal") %>%
  facet_row("Major_Genre", "ordinal") %>%
  mark_point()

log scale

dat <- jsonlite::fromJSON('[
      {"x": 0, "y": 1}, {"x": 1, "y": 10},
      {"x": 2, "y": 100}, {"x": 3, "y": 1000},
      {"x": 4, "y": 10000}, {"x": 5, "y": 100000},
      {"x": 6, "y": 1000000}, {"x": 7, "y": 10000000}
    ]')

vegalite(viewport_height=300) %>%
  add_data(dat) %>%
  encode_x("x", "quantitative") %>%
  encode_y("y", "quantitative") %>%
  mark_point() %>%
  scale_y_log_vl()

aggregate bar chart

vegalite(viewport_width=500, viewport_height=400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/population.json") %>%
  encode_x("people", "quantitative", aggregate="sum") %>%
  encode_y("age", "ordinal") %>%
  scale_y_ordinal_vl(range_step=17) %>%
  add_filter("datum.year == 2000") %>%
  mark_bar()

binned scatterplot

vegalite(viewport_width=400, viewport_height=400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/movies.json") %>%
  encode_x("IMDB_Rating", "quantitative") %>%
  encode_y("Rotten_Tomatoes_Rating", "quantitative") %>%
  encode_size("*", "quantitative", aggregate="count") %>%
  bin_x(maxbins=10) %>%
  bin_y(maxbins=10) %>%
  mark_point()

slope graph

vegalite(viewport_width=400, viewport_height=600) %>%
  add_data("https://vega.github.io/vega-editor/app/data/barley.json") %>%
  encode_x("year", "ordinal") %>%
  encode_y("yield", "quantitative", aggregate="median") %>%
  encode_color("site", "nominal") %>%
  scale_x_ordinal_vl(range_step=50, padding=0.5) %>%
  mark_line()

histogram

vegalite(viewport_width=400, viewport_height=400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/movies.json") %>%
  encode_x("IMDB_Rating", "quantitative") %>%
  encode_y("*", "quantitative", aggregate="count") %>%
  bin_x(maxbins=10) %>%
  mark_bar()

stacked bar chart

vegalite(viewport_width=400, viewport_height=400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/seattle-weather.csv") %>%
  encode_x("date", "temporal") %>%
  encode_y("*", "quantitative", aggregate="count") %>%
  encode_color("weather", "nominal") %>%
  scale_color_nominal_vl(domain=c("sun","fog","drizzle","rain","snow"),
                      range=c("#e7ba52","#c7c7c7","#aec7e8","#1f77b4","#9467bd")) %>%
  timeunit_x("month") %>%
  mark_bar()

horizontal stacked bar chart

vegalite(viewport_width=400, viewport_height=400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/barley.json") %>%
  encode_x("yield", "quantitative", aggregate="sum") %>%
  encode_y("variety", "nominal") %>%
  encode_color("site", "nominal") %>%
  mark_bar()

stacked area chart

vegalite() %>%
  view_size(300, 200) %>%
  add_data("https://vega.github.io/vega-editor/app/data/unemployment-across-industries.json") %>%
  encode_x("date", "temporal") %>%
  encode_y("count", "quantitative", aggregate="sum") %>%
  encode_color("series", "nominal") %>%
  scale_color_nominal_vl(scheme="category20b") %>%
  timeunit_x("yearmonth") %>%
  scale_x_time_vl(nice="month") %>%
  axis_x(format="%Y", labelAngle=0) %>%
  mark_area()

streamgraph!

vegalite() %>%
  view_size(300, 200) %>%
  add_data("https://vega.github.io/vega-editor/app/data/unemployment-across-industries.json") %>%
  encode_x("date", "temporal") %>%
  encode_y("count", "quantitative", aggregate="sum", stack="center") %>%
  encode_color("series", "nominal") %>%
  scale_color_nominal_vl(scheme="category20b") %>%
  timeunit_x("yearmonth") %>%
  scale_x_time_vl(nice="month") %>%
  axis_x(format="%Y", labelAngle=0) %>%
  mark_area(interpolate="basis")

scatter text

vegalite() %>%
  view_size(300, 200) %>%
  add_data("https://vega.github.io/vega-editor/app/data/cars.json") %>%
  encode_x("Horsepower", "quantitative") %>%
  encode_y("Miles_per_Gallon", "quantitative") %>%
  encode_color("Origin", "nominal") %>%
  calculate("OriginInitial", "datum.Origin[0]") %>%
  encode_text("OriginInitial", "nominal") %>%
  mark_text()

area chart

vegalite() %>%
  view_size(300, 200) %>%
  add_data("https://vega.github.io/vega-editor/app/data/unemployment-across-industries.json") %>%
  encode_x("date", "temporal") %>%
  timeunit_x("yearmonth") %>%
  axis_x(format="%Y", labelAngle=0) %>%
  encode_y("count", "quantitative", aggregate="sum") %>%
  mark_area()

grouped bar chart

vegalite(viewport_width=600, viewport_height=400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/population.json") %>%
  add_filter("datum.year == 2000") %>%
  calculate("gender", 'datum.sex == 2 ? "Female" : "Male"') %>%
  encode_x("gender", "nominal") %>%
  encode_y("people", "quantitative", aggregate="sum") %>%
  encode_color("gender", "nominal") %>%
  scale_x_ordinal_vl(range_step = 8) %>%
  scale_color_nominal_vl(range=c("#EA98D2", "#659CCA")) %>%
  facet_col("age", "ordinal") %>%
  axis_x(remove=TRUE) %>%
  axis_y(title="population", grid=FALSE) %>%
  view_config(stroke_width=0) %>%
  mark_bar()

normalized stacked bar chart

vegalite(viewport_width=400, viewport_height=400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/population.json") %>%
  calculate("gender", 'datum.sex == 2 ? "Female" : "Male"') %>%
  encode_x("age", "ordinal") %>%
  encode_y("people", "quantitative", aggregate="sum", stack="normalize") %>%
  encode_color("gender", "nominal") %>%
  scale_x_ordinal_vl(range_step=17) %>%
  scale_color_nominal_vl(range=c("#EA98D2", "#659CCA")) %>%
  mark_bar()

normalized stacked bar chart

vegalite() %>%
  view_size(300, 300) %>%
  add_data("https://vega.github.io/vega-editor/app/data/unemployment-across-industries.json") %>%
  encode_x("date", "temporal") %>%
  encode_y("count", "quantitative", aggregate="sum", stack="normalize") %>%
  encode_color("series", "nominal") %>%
  scale_x_time_vl(nice="month") %>%
  scale_color_nominal_vl(scheme="category20b") %>%
  axis_x(format="%Y", labelAngle=0) %>%
  axis_y(remove=TRUE) %>%
  timeunit_x("yearmonth") %>%
  mark_area()

layered bar chart

vegalite(viewport_width=400, viewport_height=400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/population.json") %>%
  add_filter("datum.year == 2000") %>%
  calculate("gender", 'datum.sex == 2 ? "Female" : "Male"') %>%
  encode_x("age", "ordinal") %>%
  encode_y("people", "quantitative", aggregate="sum", stack=NA) %>%
  encode_color("gender", "nominal") %>%
  scale_x_ordinal_vl(range_step=17) %>%
  scale_color_nominal_vl(range=c("#e377c2","#1f77b4")) %>%
  axis_y(title="Population") %>%
  mark_bar(opacity=0.6)

trellis bar chart

vegalite(viewport_width=400, viewport_height=400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/population.json") %>%
  add_filter("datum.year == 2000") %>%
  calculate("gender", 'datum.sex == 2 ? "Female" : "Male"') %>%
  encode_x("age", "ordinal") %>%
  encode_y("people", "quantitative", aggregate="sum") %>%
  encode_color("gender", "nominal") %>%
  facet_row("gender", "nominal") %>%
  scale_x_ordinal_vl(range_step=17) %>%
  scale_color_nominal_vl(range=c("#EA98D2","#659CCA")) %>%
  axis_y(title="Population") %>%
  mark_bar()

trellis stacked bar chart

vegalite(viewport_width=400, viewport_height=400) %>%
  add_data("https://vega.github.io/vega-editor/app/data/barley.json") %>%
  encode_x("yield", "quantitative", aggregate="sum") %>%
  encode_y("variety", "nominal") %>%
  encode_color("site", "nominal") %>%
  facet_col("year", "ordinal") %>%
  mark_bar()

trellis histograms

vegalite(viewport_height=700) %>%
  add_data("https://vega.github.io/vega-editor/app/data/cars.json") %>%
  encode_x("Horsepower", "quantitative") %>%
  encode_y("*", "quantitative", aggregate="count") %>%
  encode_color("site", "nominal") %>%
  facet_row("Origin", "nominal") %>%
  bin_x(maxbins=15) %>%
  mark_bar()

becker’s barley trellis plot

vegalite(viewport_height=1200) %>%
  add_data("https://vega.github.io/vega-editor/app/data/barley.json") %>%
  encode_x("yield", "quantitative", aggregate="mean") %>%
  encode_y("variety", "ordinal", sort=sort_def("yield", "mean")) %>%
  encode_color("year", "nominal") %>%
  facet_row("site", "ordinal") %>%
  scale_y_ordinal_vl(range_step=12) %>%
  mark_point()

sorting line order

vegalite(viewport_width=300, viewport_height=300) %>%
  view_size(300, 300) %>%
  add_data("https://vega.github.io/vega-editor/app/data/driving.json") %>%
  encode_x("miles", "quantitative") %>%
  encode_y("gas", "quantitative") %>%
  encode_order("year", "temporal") %>%
  scale_x_linear_vl(zero=FALSE) %>%
  scale_y_linear_vl(zero=FALSE) %>%
  mark_line()

sort layer scatterplot

vegalite(viewport_width=200, viewport_height=200) %>%
  view_size(200, 200) %>%
  add_data("https://vega.github.io/vega-editor/app/data/cars.json") %>%
  encode_x("Horsepower", "quantitative") %>%
  encode_y("Miles_per_Gallon", "quantitative") %>%
  encode_color("Origin", "nominal") %>%
  encode_order("Origin", "ordinal", sort="descending") %>%
  mark_point()

detail lines

vegalite(viewport_width=200, viewport_height=200) %>%
  view_size(200, 200) %>%
  add_data("https://vega.github.io/vega-editor/app/data/stocks.csv") %>%
  encode_x("date", "temporal") %>%
  encode_y("price", "quantitative") %>%
  encode_detail("symbol", "nominal") %>%
  mark_line()

detail points

vegalite() %>%
  view_size(200, 200) %>%
  add_data("https://vega.github.io/vega-editor/app/data/cars.json") %>%
  encode_x("Horsepower", "quantitative", aggregate="mean") %>%
  encode_y("Displacement", "quantitative", aggregate="mean") %>%
  encode_detail("Origin", "nominal") %>%
  mark_point()