Gallery showing various tables possible with the {gtsummary} package. If you have created an interesting table using {gtsummary}, please submit it to the gallery via a pull request to the GitHub repository.
library(gtsummary); library(gt); library(survival)
library(dplyr); library(stringr); library(purrr); library(forcats); library(tidyr)Add a spanning header over the group columns for increased clarity, and modify column headers.
trial %>%
select(trt, age, grade) %>%
tbl_summary(
by = trt,
missing = "no",
statistic = all_continuous() ~ "{median} ({p25}, {p75})"
) %>%
modify_header(all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p)}%)") %>%
add_n() %>%
bold_labels() %>%
modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**")| Characteristic | N | Chemotherapy Treatment | |
|---|---|---|---|
| Drug A N = 98 (49%)1 |
Drug B N = 102 (51%)1 |
||
| Age | 189 | 46 (37, 59) | 48 (39, 56) |
| Grade | 200 | ||
| I | 35 (36%) | 33 (32%) | |
| II | 32 (33%) | 36 (35%) | |
| III | 31 (32%) | 33 (32%) | |
|
1
Median (IQR); n (%)
|
|||
Show continuous summary statistics on multiple lines.
trial %>%
select(trt, age, marker) %>%
tbl_summary(
by = trt,
type = all_continuous() ~ "continuous2",
statistic = all_continuous() ~ c("{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"),
missing = "no"
) %>%
italicize_levels()| Characteristic | Drug A, N = 98 | Drug B, N = 102 |
|---|---|---|
| Age | ||
| N | 91 | 98 |
| Mean (SD) | 47 (15) | 47 (14) |
| Median (IQR) | 46 (37, 59) | 48 (39, 56) |
| Range | 6, 78 | 9, 83 |
| Marker Level (ng/mL) | ||
| N | 92 | 98 |
| Mean (SD) | 1.02 (0.89) | 0.82 (0.83) |
| Median (IQR) | 0.84 (0.24, 1.57) | 0.52 (0.19, 1.20) |
| Range | 0.00, 3.87 | 0.00, 3.64 |
Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.
trial %>%
select(response, age, grade) %>%
mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) %>%
tbl_summary(
by = response,
missing = "no",
label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
) %>%
add_p(pvalue_fun = ~style_pvalue(.x, digits = 2)) %>%
add_q()| Characteristic | No Tumor Response, N = 1321 | Tumor Responded, N = 611 | p-value2 | q-value3 |
|---|---|---|---|---|
| Patient Age | 46 (36, 55) | 49 (43, 59) | 0.091 | 0.18 |
| Tumor Grade | 0.93 | 0.93 | ||
| I | 46 (35%) | 21 (34%) | ||
| II | 44 (33%) | 19 (31%) | ||
| III | 42 (32%) | 21 (34%) | ||
|
1
Median (IQR); n (%)
2
Wilcoxon rank sum test; Pearson's Chi-squared test
3
False discovery rate correction for multiple testing
|
||||
Include missing tumor response as column using fct_explicit_na().
trial %>%
select(response, age, grade) %>%
mutate(
response = factor(response, labels = c("No Tumor Response", "Tumor Responded")) %>%
fct_explicit_na(na_level = "Missing Response Status")
) %>%
tbl_summary(
by = response,
label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
) | Characteristic | No Tumor Response, N = 1321 | Tumor Responded, N = 611 | Missing Response Status, N = 71 |
|---|---|---|---|
| Patient Age | 46 (36, 55) | 49 (43, 59) | 52 (44, 57) |
| Unknown | 7 | 3 | 1 |
| Tumor Grade | |||
| I | 46 (35%) | 21 (34%) | 1 (14%) |
| II | 44 (33%) | 19 (31%) | 5 (71%) |
| III | 42 (32%) | 21 (34%) | 1 (14%) |
|
1
Median (IQR); n (%)
|
|||
Report treatment differences between two groups. This is often needed in randomized trials. In this example, we report the difference in tumor response and marker level between two chemotherapy treatments.
trial %>%
select(response, marker, trt) %>%
tbl_summary(
by = trt,
statistic = list(all_continuous() ~ "{mean} ({sd})",
all_categorical() ~ "{p}%"),
missing = "no"
) %>%
add_difference() %>%
add_n() %>%
modify_header(all_stat_cols() ~ "**{level}**") %>%
modify_footnote(all_stat_cols() ~ NA)| Characteristic | N | Drug A | Drug B | Difference1 | 95% CI1,2 | p-value1 |
|---|---|---|---|---|---|---|
| Tumor Response | 193 | 29% | 34% | -4.2% | -18%, 9.9% | 0.6 |
| Marker Level (ng/mL) | 190 | 1.02 (0.89) | 0.82 (0.83) | 0.20 | -0.05, 0.44 | 0.12 |
|
1
Two sample test for equality of proportions; Welch Two Sample t-test
2
CI = Confidence Interval
|
||||||
Paired t-test and McNemar’s test. The data is expected in a long format with 2 rows per participant.
# imagine that each patient received Drug A and Drug B (adding ID showing their paired measurements)
trial_paired <-
trial %>%
select(trt, marker, response) %>%
group_by(trt) %>%
mutate(id = row_number()) %>%
ungroup()
# you must first delete incomplete pairs from the data, then you can build the table
trial_paired %>%
# delete missing values
filter(complete.cases(.)) %>%
# keep IDs with both measurements
group_by(id) %>%
filter(n() == 2) %>%
ungroup() %>%
# summarize data
tbl_summary(by = trt, include = -id) %>%
add_p(test = list(marker ~ "paired.t.test",
response ~ "mcnemar.test"),
group = id)| Characteristic | Drug A, N = 831 | Drug B, N = 831 | p-value2 |
|---|---|---|---|
| Marker Level (ng/mL) | 0.82 (0.22, 1.63) | 0.53 (0.18, 1.26) | 0.2 |
| Tumor Response | 21 (25%) | 28 (34%) | 0.3 |
|
1
Median (IQR); n (%)
2
Paired t-test; McNemar's Chi-squared test with continuity correction
|
|||
Include p-values comparing all groups to a single reference group.
# table summarizing data with no p-values
small_trial <- trial %>% select(grade, age, response)
t0 <- small_trial %>%
tbl_summary(by = grade, missing = "no") %>%
modify_header(all_stat_cols() ~ "**{level}**")
# table comparing grade I and II
t1 <- small_trial %>%
filter(grade %in% c("I", "II")) %>%
tbl_summary(by = grade, missing = "no") %>%
add_p() %>%
modify_header(p.value ~ md("**I vs. II**")) %>%
# hide summary stat columns
modify_column_hide(all_stat_cols())
# table comparing grade I and II
t2 <- small_trial %>%
filter(grade %in% c("I", "III")) %>%
tbl_summary(by = grade, missing = "no") %>%
add_p() %>%
modify_header(p.value ~ md("**I vs. III**")) %>%
# hide summary stat columns
modify_column_hide(all_stat_cols())
# merging the 3 tables together, and adding additional gt formatting
tbl_merge(list(t0, t1, t2)) %>%
modify_spanning_header(
list(
all_stat_cols() ~ "**Tumor Grade**",
starts_with("p.value") ~ "**p-values**"
)
)| Characteristic | Tumor Grade | p-values | |||
|---|---|---|---|---|---|
| I1 | II1 | III1 | I vs. II2 | I vs. III2 | |
| Age | 47 (37, 56) | 48 (37, 57) | 47 (38, 58) | 0.7 | 0.5 |
| Tumor Response | 21 (31%) | 19 (30%) | 21 (33%) | >0.9 | 0.9 |
|
1
Median (IQR); n (%)
2
Wilcoxon rank sum test; Fisher's exact test
|
|||||
Add additional statistics as additional columns.
# define function for lower and upper bounds of the mean CI
ll <- function(x) t.test(x)$conf.int[1]
ul <- function(x) t.test(x)$conf.int[2]
t1 <-
trial %>%
select(age, marker) %>%
tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", missing = "no") %>%
modify_header(stat_0 ~ "**Mean (SD)**")
t2 <-
trial %>%
select(age, marker) %>%
tbl_summary(statistic = all_continuous() ~ "{ll}, {ul}", missing = "no") %>%
modify_header(stat_0 ~ "**95% CI for Mean**")
tbl_merge(list(t1, t2)) %>%
modify_footnote(everything() ~ NA_character_) %>%
modify_spanning_header(everything() ~ NA_character_)| Characteristic | Mean (SD) | 95% CI for Mean |
|---|---|---|
| Age | 47 (14) | 45, 49 |
| Marker Level (ng/mL) | 0.92 (0.86) | 0.79, 1.04 |
It’s often needed to summarize a continuous by one, two, or more categorical variables.
trial %>%
select(trt, grade, marker) %>%
tbl_continuous(variable = marker, by = trt) %>%
modify_spanning_header(all_stat_cols() ~ "**Treatment Assignment**")| Characteristic | Treatment Assignment | |
|---|---|---|
| Drug A, N = 981 | Drug B, N = 1021 | |
| Grade | ||
| I | 0.96 (0.24, 1.70) | 1.05 (0.29, 1.49) |
| II | 0.66 (0.31, 1.23) | 0.21 (0.10, 0.94) |
| III | 0.84 (0.16, 1.91) | 0.58 (0.35, 1.36) |
|
1
Median (IQR)
|
||
Build a summary table stratified by more than one variable.
trial %>%
select(trt, grade, age, stage) %>%
mutate(grade = paste("Grade", grade)) %>%
tbl_strata(
strata = grade,
~.x %>%
tbl_summary(by = trt, missing = "no") %>%
modify_header(all_stat_cols() ~ "**{level}**")
)| Characteristic | Grade I | Grade II | Grade III | |||
|---|---|---|---|---|---|---|
| Drug A1 | Drug B1 | Drug A1 | Drug B1 | Drug A1 | Drug B1 | |
| Age | 46 (36, 60) | 48 (42, 55) | 44 (31, 54) | 50 (43, 57) | 52 (42, 60) | 45 (36, 52) |
| T Stage | ||||||
| T1 | 8 (23%) | 9 (27%) | 14 (44%) | 9 (25%) | 6 (19%) | 7 (21%) |
| T2 | 8 (23%) | 10 (30%) | 8 (25%) | 9 (25%) | 9 (29%) | 10 (30%) |
| T3 | 11 (31%) | 7 (21%) | 5 (16%) | 6 (17%) | 6 (19%) | 8 (24%) |
| T4 | 8 (23%) | 7 (21%) | 5 (16%) | 12 (33%) | 10 (32%) | 8 (24%) |
|
1
Median (IQR); n (%)
|
||||||
Include number of observations and the number of events in a univariate regression table.
trial %>%
select(response, age, grade) %>%
tbl_uvregression(
method = glm,
y = response,
method.args = list(family = binomial),
exponentiate = TRUE
) %>%
add_nevent()| Characteristic | N | Event N | OR1 | 95% CI1 | p-value |
|---|---|---|---|---|---|
| Age | 183 | 58 | 1.02 | 1.00, 1.04 | 0.10 |
| Grade | 193 | 61 | |||
| I | — | — | |||
| II | 0.95 | 0.45, 2.00 | 0.9 | ||
| III | 1.10 | 0.52, 2.29 | 0.8 | ||
|
1
OR = Odds Ratio, CI = Confidence Interval
|
|||||
Include two related models side-by-side with descriptive statistics. We also use the compact table theme that reduces cell padding and font size.
gt_r1 <- glm(response ~ trt + grade, trial, family = binomial) %>%
tbl_regression(exponentiate = TRUE)
gt_r2 <- coxph(Surv(ttdeath, death) ~ trt + grade, trial) %>%
tbl_regression(exponentiate = TRUE)
gt_t1 <- trial[c("trt", "grade")] %>%
tbl_summary(missing = "no") %>%
add_n() %>%
modify_header(stat_0 ~ "**n (%)**") %>%
modify_footnote(stat_0 ~ NA_character_)
theme_gtsummary_compact()
#> Setting theme `Compact`
tbl_merge(
list(gt_t1, gt_r1, gt_r2),
tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**")
)| Characteristic | N | n (%) | Tumor Response | Time to Death | ||||
|---|---|---|---|---|---|---|---|---|
| OR1 | 95% CI1 | p-value | HR1 | 95% CI1 | p-value | |||
| Chemotherapy Treatment | 200 | |||||||
| Drug A | 98 (49%) | — | — | — | — | |||
| Drug B | 102 (51%) | 1.21 | 0.66, 2.24 | 0.5 | 1.25 | 0.86, 1.81 | 0.2 | |
| Grade | 200 | |||||||
| I | 68 (34%) | — | — | — | — | |||
| II | 68 (34%) | 0.94 | 0.44, 1.98 | 0.9 | 1.28 | 0.80, 2.06 | 0.3 | |
| III | 64 (32%) | 1.09 | 0.52, 2.27 | 0.8 | 1.69 | 1.07, 2.66 | 0.024 | |
|
1
OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio
|
||||||||
Include the number of events at each level of a categorical predictor.
trial %>%
select(ttdeath, death, stage, grade) %>%
tbl_uvregression(
method = coxph,
y = Surv(ttdeath, death),
exponentiate = TRUE,
hide_n = TRUE
) %>%
add_nevent(location = "level")| Characteristic | Event N | HR1 | 95% CI1 | p-value |
|---|---|---|---|---|
| T Stage | ||||
| T1 | 24 | — | — | |
| T2 | 27 | 1.18 | 0.68, 2.04 | 0.6 |
| T3 | 22 | 1.23 | 0.69, 2.20 | 0.5 |
| T4 | 39 | 2.48 | 1.49, 4.14 | <0.001 |
| Grade | ||||
| I | 33 | — | — | |
| II | 36 | 1.28 | 0.80, 2.05 | 0.3 |
| III | 43 | 1.69 | 1.07, 2.66 | 0.024 |
|
1
HR = Hazard Ratio, CI = Confidence Interval
|
||||
Regression model where the covariate remains the same, and the outcome changes.
trial %>%
select(age, marker, trt) %>%
tbl_uvregression(
method = lm,
x = trt,
show_single_row = "trt",
hide_n = TRUE
) %>%
modify_header(list(
label ~"**Model Outcome**",
estimate ~ "**Treatment Coef.**"
)) %>%
modify_footnote(estimate ~ "Values larger than 0 indicate larger values in the Drug B group.")| Model Outcome | Treatment Coef.1 | 95% CI2 | p-value |
|---|---|---|---|
| Age | 0.44 | -3.7, 4.6 | 0.8 |
| Marker Level (ng/mL) | -0.20 | -0.44, 0.05 | 0.12 |
|
1
Values larger than 0 indicate larger values in the Drug B group.
2
CI = Confidence Interval
|
|||
Implement a custom tidier to report Wald confidence intervals. The Wald confidence intervals are calculated using confint.default().
my_tidy <- function(x, exponentiate = FALSE, conf.level = 0.95, ...) {
dplyr::bind_cols(
broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE),
# calculate the confidence intervals, and save them in a tibble
stats::confint.default(x) %>%
tibble::as_tibble() %>%
rlang::set_names(c("conf.low", "conf.high")) )
}
lm(age ~ grade + marker, trial) %>%
tbl_regression(tidy_fun = my_tidy)| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| Grade | |||
| I | — | — | |
| II | 0.64 | -4.6, 5.9 | 0.8 |
| III | 2.4 | -2.8, 7.6 | 0.4 |
| Marker Level (ng/mL) | -0.04 | -2.6, 2.5 | >0.9 |
|
1
CI = Confidence Interval
|
|||
Use significance stars on estimates with low p-values.
trial %>%
select(ttdeath, death, stage, grade) %>%
tbl_uvregression(
method = coxph,
y = Surv(ttdeath, death),
exponentiate = TRUE,
) %>%
add_significance_stars()| Characteristic | N | HR1,2 | SE2 |
|---|---|---|---|
| T Stage | 200 | ||
| T1 | — | — | |
| T2 | 1.18 | 0.281 | |
| T3 | 1.23 | 0.295 | |
| T4 | 2.48*** | 0.260 | |
| Grade | 200 | ||
| I | — | — | |
| II | 1.28 | 0.241 | |
| III | 1.69* | 0.232 | |
|
1
*p<0.05; **p<0.01; ***p<0.001
2
HR = Hazard Ratio, SE = Standard Error
|
|||