mtcars dataNote: The type argument in generate() is automatically filled based on the entries for specify() and hypothesize(). It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed.
library(infer)
library(dplyr)
mtcars <- mtcars %>%
mutate(cyl = factor(cyl),
vs = factor(vs),
am = factor(am),
gear = factor(gear),
carb = factor(carb))
# For reproducibility
set.seed(2018) One numerical variable (mean)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", mu = 25) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 24.7
## 2 2 23.1
## 3 3 26.9
## 4 4 24.8
## 5 5 25.6
## 6 6 23.2
## 7 7 24.2
## 8 8 24.9
## 9 9 23.3
## 10 10 26.5
## # … with 90 more rows
One numerical variable (median)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", med = 26) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 26.5
## 2 2 26.5
## 3 3 24.6
## 4 4 25.0
## 5 5 26
## 6 6 26
## 7 7 25.0
## 8 8 27.2
## 9 9 25.2
## 10 10 28.2
## # … with 90 more rows
One categorical (2 level) variable
mtcars %>%
specify(response = am, success = "1") %>% # formula alt: am ~ NULL
hypothesize(null = "point", p = .25) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "prop")## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 0.375
## 2 2 0.0625
## 3 3 0.125
## 4 4 0.25
## 5 5 0.188
## 6 6 0.406
## 7 7 0.219
## 8 8 0.375
## 9 9 0.344
## 10 10 0.188
## # … with 90 more rows
Two categorical (2 level) variables
mtcars %>%
specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in props", order = c("0", "1"))## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0397
## 2 2 0.0873
## 3 3 0.214
## 4 4 -0.167
## 5 5 -0.167
## 6 6 -0.0397
## 7 7 0.0873
## 8 8 -0.0397
## 9 9 -0.0397
## 10 10 -0.294
## # … with 90 more rows
One categorical (>2 level) - GoF
mtcars %>%
specify(cyl ~ NULL) %>% # alt: response = cyl
hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "Chisq")## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 0.688
## 2 2 1.69
## 3 3 1.69
## 4 4 1.69
## 5 5 10.2
## 6 6 4.5
## 7 7 3
## 8 8 2.69
## 9 9 0.5
## 10 10 1.5
## # … with 90 more rows
Two categorical (>2 level) variables
mtcars %>%
specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "Chisq")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 3.90
## 2 2 3.68
## 3 3 1.01
## 4 4 0.557
## 5 5 1.34
## 6 6 2.93
## 7 7 1.45
## 8 8 0.557
## 9 9 0.557
## 10 10 1.01
## # … with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in means", order = c("0", "1"))## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 3.12
## 2 2 -1.01
## 3 3 0.813
## 4 4 1.46
## 5 5 0.0101
## 6 6 1.94
## 7 7 -0.00283
## 8 8 -1.84
## 9 9 -2.24
## 10 10 -3.59
## # … with 90 more rows
One numerical variable one categorical (2 levels) (diff in medians)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in medians", order = c("0", "1"))## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -5
## 2 2 -2.3
## 3 3 4.10
## 4 4 0
## 5 5 0
## 6 6 1
## 7 7 1.90
## 8 8 -0.5
## 9 9 2.90
## 10 10 1.90
## # … with 90 more rows
One numerical one categorical (>2 levels) - ANOVA
mtcars %>%
specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "F")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.842
## 2 2 0.800
## 3 3 0.232
## 4 4 0.0158
## 5 5 0.0488
## 6 6 0.466
## 7 7 1.26
## 8 8 5.13
## 9 9 1.67
## 10 10 0.469
## # … with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "slope")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0158
## 2 2 -0.0104
## 3 3 0.00876
## 4 4 0.0291
## 5 5 -0.0000981
## 6 6 -0.0206
## 7 7 -0.00727
## 8 8 0.0167
## 9 9 0.00682
## 10 10 0.0116
## # … with 90 more rows
One numerical variable (standard deviation)
Not currently implemented
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", sigma = 5) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")One numerical (one mean)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 19.2
## 2 2 18.9
## 3 3 18.7
## 4 4 19.3
## 5 5 20.9
## 6 6 18.5
## 7 7 20.5
## 8 8 18.8
## 9 9 23.1
## 10 10 18.6
## # … with 90 more rows
One numerical (one median)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 17.0
## 2 2 19.2
## 3 3 19.4
## 4 4 19.4
## 5 5 17.1
## 6 6 18.2
## 7 7 20.4
## 8 8 22.8
## 9 9 19.0
## 10 10 21
## # … with 90 more rows
One numerical (standard deviation)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 6.11
## 2 2 5.27
## 3 3 4.82
## 4 4 4.35
## 5 5 5.55
## 6 6 7.83
## 7 7 6.28
## 8 8 5.68
## 9 9 7.19
## 10 10 5.67
## # … with 90 more rows
One categorical (one proportion)
mtcars %>%
specify(response = am, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "prop")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.25
## 2 2 0.5
## 3 3 0.344
## 4 4 0.531
## 5 5 0.438
## 6 6 0.5
## 7 7 0.312
## 8 8 0.438
## 9 9 0.656
## 10 10 0.406
## # … with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c("0", "1"))## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -4.36
## 2 2 -5.64
## 3 3 -8.54
## 4 4 -9.26
## 5 5 -5.24
## 6 6 -5.55
## 7 7 -7.71
## 8 8 -7.68
## 9 9 -9.21
## 10 10 -7.17
## # … with 90 more rows
Two categorical variables (diff in proportions)
mtcars %>%
specify(am ~ vs, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in props", order = c("0", "1"))## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0648
## 2 2 -0.189
## 3 3 -0.208
## 4 4 -0.0952
## 5 5 -0.317
## 6 6 0.0317
## 7 7 0.143
## 8 8 -0.453
## 9 9 -0.212
## 10 10 -0.312
## # … with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "slope")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0878
## 2 2 -0.0691
## 3 3 -0.0866
## 4 4 -0.0518
## 5 5 -0.0593
## 6 6 -0.0711
## 7 7 -0.0588
## 8 8 -0.0776
## 9 9 -0.0615
## 10 10 -0.0464
## # … with 90 more rows
Two numerical vars - correlation
mtcars %>%
specify(mpg ~ hp) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "correlation")## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.765
## 2 2 -0.846
## 3 3 -0.789
## 4 4 -0.718
## 5 5 -0.748
## 6 6 -0.800
## 7 7 -0.744
## 8 8 -0.832
## 9 9 -0.752
## 10 10 -0.824
## # … with 90 more rows