nycflights13 flights dataNote: The type argument in generate() is automatically filled based on the entries for specify() and hypothesize(). It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed.
library(nycflights13)
library(dplyr)
library(ggplot2)
library(stringr)
library(infer)
set.seed(2017)
fli_small <- flights %>%
na.omit() %>%
sample_n(size = 500) %>%
mutate(season = case_when(
month %in% c(10:12, 1:3) ~ "winter",
month %in% c(4:9) ~ "summer"
)) %>%
mutate(day_hour = case_when(
between(hour, 1, 12) ~ "morning",
between(hour, 13, 24) ~ "not morning"
)) %>%
select(arr_delay, dep_delay, season,
day_hour, origin, carrier)arr_delay, dep_delayseason ("winter", "summer"),day_hour ("morning", "not morning")origin ("EWR", "JFK", "LGA")carrierThe recommended approach is to use specify() %>% calculate():
obs_chisq <- fli_small %>%
specify(origin ~ season) %>% # alt: response = origin, explanatory = season
calculate(stat = "Chisq")| stat |
|---|
| 0.571898 |
.
Or using chisq_test in infer
obs_chisq <- fli_small %>%
chisq_test(formula = origin ~ season) %>%
dplyr::select(statistic)| statistic |
|---|
| 0.571898 |
.
Or using another shortcut function in infer:
obs_chisq <- fli_small %>%
chisq_stat(formula = origin ~ season)| stat |
|---|
| 0.571898 |
.
chisq_null_distn <- fli_small %>%
specify(origin ~ season) %>% # alt: response = origin, explanatory = season
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "Chisq")
chisq_null_distn %>% visualize(obs_stat = obs_chisq, direction = "greater")chisq_null_distn %>%
get_pvalue(obs_stat = obs_chisq, direction = "greater")| p_value |
|---|
| 0.748 |
fli_small %>%
specify(origin ~ season) %>%
hypothesize(null = "independence") %>%
# generate() ## Not used for theoretical
calculate(stat = "Chisq") %>%
visualize(method = "theoretical", obs_stat = obs_chisq, direction = "right")## Warning: Check to make sure the conditions have been met for the
## theoretical method. `infer` currently does not check these for you.
fli_small %>%
specify(origin ~ season) %>% %>% # alt: response = origin, explanatory = season
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "Chisq") %>%
visualize(method = "both", obs_stat = obs_chisq, direction = "right")## Warning: Check to make sure the conditions have been met for the
## theoretical method. `infer` currently does not check these for you.
fli_small %>%
chisq_test(formula = origin ~ season) %>%
dplyr::select(p_value) %>%
dplyr::pull()## [1] 0.7513009