library(tidyverse)
isi_donor_url <- "https://www.isi-stats.com/isi/data/prelim/OrganDonor.txt"
donor <- read_delim(isi_donor_url) %>%
select(Default, Choice) %>%
dplyr::mutate(decision = ifelse(Choice == "donor", "donor (1)", "not (0)")) %>%
dplyr::mutate(decision = fct_rev(decision))
head(donor)
#> # A tibble: 6 × 3
#> Default Choice decision
#> <chr> <chr> <fct>
#> 1 opt-in donor donor (1)
#> 2 opt-in donor donor (1)
#> 3 opt-in donor donor (1)
#> 4 opt-in donor donor (1)
#> 5 opt-in donor donor (1)
#> 6 opt-in donor donor (1)library(ggprop.test)
snapshot <- ggplyr::intercept
donor |>
ggplot() +
aes(x = decision) +
geom_stack() +
geom_stack_label() + snapshot("p1") +
geom_support() + snapshot("p2") +
geom_prop() +
geom_prop_label() + snapshot("p3") +
stamp_prop() +
stamp_prop_label() + snapshot("p4") +
stamp_eq_norm_prop() +
geom_normal_prop_null() + snapshot("p5") +
geom_normal_prop_null_sds() + snapshot("p6")If we discuss each of the snapshot points, we could write something like this:
library(patchwork)
(p1 + p2) /
(p3 + p4) /
(p5 + p6) +
patchwork::plot_annotation(
tag_levels = 1,
title = "A prop test 'graphical poem' addressing the question:
For survey of 161 individuals on willingness to serve as
organ doners in the case of an accident, is there evidence
that responses rate differs from a 50/50 split,
when 53 individuals respond 'no' and 108 individuals respond 'yes'?
",
subtitle = "1. Draw bar chart (stacks) for each category.
2. Draw theoretical limits for proportion (0 to 1).
3. Calculate proportion and place (also the balancing point for stacks!).
4. Place null (look at the initial question, 'differs from 50/50' in this case)
5. Draw distribution for NULL, where SD = sqrt((p * (1-p))/n)
6. Calc z-score - where does observed prop fit into NULL distributions
(i.e. how many standard deviations fit between .5 and .67)"
) &
ggchalkboard:::theme_blackboard(base_size = 14) Some convenience functions from {statexpress} are used, because we want this to be a bit more self-contained at this point, so we just clone them for now. statexpress is evolving and is not on CRAN.
Details
qlayer <- function (mapping = NULL, data = NULL, geom = ggplot2::GeomPoint, stat = StatIdentity,
position = position_identity(), ..., na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE)
{
ggplot2::layer(data = data, mapping = mapping, geom = geom,
stat = stat, position = position, show.legend = show.legend,
inherit.aes = inherit.aes, params = rlang::list2(na.rm = na.rm,
...))
}
qstat <- function (compute_group = ggplot2::Stat$compute_group, ...)
{
ggplot2::ggproto(NULL, Stat, compute_group = compute_group,
...)
}
qstat_panel <- function (compute_panel, ...)
{
ggplot2::ggproto(NULL, Stat, compute_panel = compute_panel,
...)
}
proto_update <- function (`_class`, `_inherit`, default_aes_update = NULL, ...)
{
if (!is.null(default_aes_update)) {
default_aes <- aes(!!!modifyList(`_inherit`$default_aes,
default_aes_update))
}
ggplot2::ggproto(`_class` = `_class`, `_inherit` = `_inherit`,
default_aes = default_aes, ...)
}
qproto_update <- function (`_inherit`, default_aes_update = NULL, ...)
{
proto_update(NULL, `_inherit`, default_aes_update = default_aes_update,
...)
}Now let’s see the compute…
library(tidyverse)
# 1. layer stack of bricks
compute_group_bricks <- function(data, scales, width = .2){
data %>%
dplyr::mutate(row = row_number()) %>%
dplyr::mutate(y = row - .5) %>%
dplyr::mutate(width = width)
}
# 2. layer label stack with count
compute_group_count <- function(data, scales){
data %>%
dplyr::count(x) %>%
dplyr::mutate(y = n,
label = n)
}
# 3. layer add x span
compute_balance <- function(data, scales){
data %>%
dplyr::summarise(min_x = min(x),
xend = max(x),
y = 0,
yend = 0) %>%
dplyr::rename(x = min_x)
}
# 4. layer add balancing point
compute_xmean_at_y0 <- function(data, scales){
data %>%
dplyr::summarise(x = mean(x),
y = 0,
label = "^")
}
# 5. layer add balancing point value label
compute_xmean_at_y0_label <- function(data, scales){
data %>%
dplyr::summarise(x = mean(x),
y = 0,
label = after_stat(round(x, 2)))
}
# 6. Add 'point' for asserted balancing point (null)
compute_panel_prop_asserted <- function(data, scales, null = .5){
# stamp type layer - so ignor input data
data.frame(y = 0,
x = null,
label = "^"
)
}
# 6. Add label for asserted balancing point (null)
compute_panel_prop_asserted_label <- function(data, scales, null = .5){
# stamp type layer - so ignor input data
data.frame(y = 0,
x = null,
label = round(null, 2)
)
}
# 7. normal distribution based on null and n
compute_dnorm_prop <- function(data, scales, null = .5, dist_sds = seq(-3.5, 3.5, by = .1)
){
n <- data |> nrow()
n_max <- data |> dplyr::count(.by = x) |> dplyr::pull() |> max()
sd = sqrt(null * (1 - null)/n) # sd of the null distribution
q <- dist_sds * sd + null
data.frame(x = q) %>%
dplyr::mutate(height = dnorm(q, sd = sd, mean = null)) %>%
dplyr::mutate(height_max = dnorm(0, sd = sd, mean = 0)) %>%
dplyr::mutate(y = .55*n_max*height/height_max) %>% # This is a bit fragile...
dplyr::mutate(xend = x,
yend = 0) %>%
# @teunbrand ggplot2::GeomArea$setup_data() requires a group column. Your panel computation does not preserve groups, but it should.
dplyr::mutate(group = 1)
}
# 8. normal distribution mean and sds based on null and n
compute_dnorm_prop_sds <- function(data, scales, null = .5,
dist_sds = -4:4){
n <- data |> nrow()
n_max <- data |> dplyr::count(.by = x) |> dplyr::pull() |> max()
sd = sqrt(null * (1 - null)/n) # sd of the null distribution
q <- dist_sds * sd + null
data.frame(x = q) %>%
dplyr::mutate(height = dnorm(q, sd = sd, mean = null)) %>%
dplyr::mutate(height_max = dnorm(0, sd = sd, mean = 0)) %>%
dplyr::mutate(y = .55*n_max*height/height_max) %>% # This is a bit fragile...
dplyr::mutate(xend = x,
yend = 0)
} #' @export
stamp_eq_norm_prop <- function(x = I(.125),
y = I(.8), size = 3.5){
annotate(
"text",
x = x,
y = y,
label = latex2exp::TeX("sd = \\sqrt{\\frac{p*(1-p)}{n}}", output = "character"),
parse = TRUE,
size = size
)
}Sketch mode - where you don’t actually define functions, we just have proposals commented out.
donor |>
ggplot() +
aes(x = decision) +
# 1 geom_stack
qlayer(geom = qproto_update(GeomTile, aes(color = from_theme(paper))),
stat = qstat(compute_group_bricks)) +
# 2 geom_stack_label()
qlayer(geom = qproto_update(GeomText, aes(vjust = 0)),
stat = qstat(compute_group_count)) +
# 3 geom_xrange, show scale, range at y is zero
qlayer(geom = GeomSegment,
stat = qstat_panel(compute_balance)) +
# 4. geom_prop, show prop, i.e. balancing point
qlayer(geom = qproto_update(GeomText, aes(size = 6, vjust = 1)),
stat = qstat_panel(compute_xmean_at_y0)) +
scale_x_discrete(palette = scales::pal_manual(0:1)) +
# 5. geom_prop_label, labeling prop, balancing point
qlayer(geom = qproto_update(GeomLabel, aes(fill = from_theme(colour %||% paper), label.size = NA, vjust = 0)),
stat = qstat_panel(compute_xmean_at_y0_label)) +
# 6. stamp_prop, assertion, point
qlayer(geom = qproto_update(GeomText, aes(size = 6, vjust = 1, color = from_theme(colour %||% accent))),
stat = qstat_panel(compute_panel_prop_asserted)) +
# 7. stamp_prop_label, assertion, label
qlayer(geom = qproto_update(GeomLabel,
aes(fill = from_theme(colour %||% paper),
label.size = NA, vjust = 0,
color = from_theme(colour %||% accent))),
stat = qstat_panel(compute_panel_prop_asserted_label)) +
stamp_eq_norm_prop() +
# 8. geom_norm on prop plot
qlayer(geom = qproto_update(GeomArea, aes(alpha = .2)),
stat = qstat_panel(compute_dnorm_prop)) +
# 9. geom_prop_norm w/ sd marks
qlayer(geom = qproto_update(GeomSegment, aes(linetype = "dotted")),
stat = qstat_panel(compute_dnorm_prop_sds)) +
labs(title = "Is there statistical evidence that choice to between being an\nan organ donar or not differs from 50/50") Details
#' @export
geom_stack <- function(...){
qlayer(geom = qproto_update(ggplot2::GeomTile, ggplot2::aes(color = "white")),
stat = qstat(compute_group_bricks),
...)
}
#' @export
geom_stack_label <- function(...){
qlayer(geom = qproto_update(ggplot2::GeomText, ggplot2::aes(vjust = 0)),
stat = qstat(compute_group_count),
...)
}
#' @export
geom_support <- function(...){
qlayer(geom = ggplot2::GeomSegment,
stat = qstat_panel(compute_balance),
...)
}
scale_x_prop <- function(...){
scale_x_discrete(palette = scales::pal_manual(0:1), ...)
}
#' @export
geom_prop <- function(...){
list(
qlayer(geom = qproto_update(ggplot2::GeomText, ggplot2::aes(size = 6, vjust = 1)),
stat = qstat_panel(compute_xmean_at_y0),
...),
scale_x_prop()
)
}
#' @export
geom_prop_label <- function(...){
qlayer(geom = qproto_update(ggplot2::GeomLabel,
ggplot2::aes(fill = ggplot2::from_theme(colour %||% paper), label.size = NA, vjust = 0)),
stat = qstat_panel(compute_xmean_at_y0_label),
...)
}
#' @export
stamp_prop <- function(...){
qlayer(geom = qproto_update(ggplot2::GeomText,
ggplot2::aes(size = 6,
vjust = 1,
color = ggplot2::from_theme(colour %||% accent))),
stat = qstat_panel(compute_panel_prop_asserted),
...)
}
#' @export
stamp_prop_label <- function(...){
qlayer(geom = qproto_update(ggplot2::GeomLabel,
ggplot2::aes(fill = ggplot2::from_theme(colour %||% paper),
label.size = NA, vjust = 0,
color = ggplot2::from_theme(colour %||% accent))),
stat = qstat_panel(compute_panel_prop_asserted_label),
...)
}
#' @export
geom_normal_prop_null <- function(...){
qlayer(geom = qproto_update(ggplot2::GeomArea, ggplot2::aes(alpha = .2)),
stat = qstat_panel(compute_dnorm_prop),
...)
}
#' @export
geom_normal_prop_null_sds <- function(...){
qlayer(geom = qproto_update(ggplot2::GeomSegment, ggplot2::aes(linetype = "dotted")),
stat = qstat_panel(compute_dnorm_prop_sds),
...)
}Done! See below for all layers, and the introduction of packag for potential break-points along the prop test train of thought.
donor |>
ggplot() +
aes(x = decision) +
geom_stack() +
geom_stack_label() +
geom_support() +
geom_prop() +
geom_prop_label() +
stamp_prop() +
stamp_prop_label() +
geom_normal_prop_null() +
geom_normal_prop_null_sds() +
stamp_eq_norm_prop()# ?prop.test
prop.test(x = sum(donor$Choice == "donor"), # n true
n = length(donor$Choice), # num observations
p = .5) # null
#>
#> 1-sample proportions test with continuity correction
#>
#> data: sum(donor$Choice == "donor") out of length(donor$Choice), null probability 0.5
#> X-squared = 18.112, df = 1, p-value = 2.083e-05
#> alternative hypothesis: true p is not equal to 0.5
#> 95 percent confidence interval:
#> 0.5917808 0.7415370
#> sample estimates:
#> p
#> 0.6708075knitrExtra::chunk_to_dir("statexpress")
knitrExtra::chunk_to_dir("compute_for_prop_story")
knitrExtra::chunk_to_dir("prop_poem_functions")
knitrExtra::chunk_to_dir("stamp_eq_norm_prop")usethis::use_package("dplyr")
usethis::use_package("ggplot2")
devtools::document()
devtools::check(".")
devtools::install(pkg = ".", upgrade = "never")


