Skip to content

Commit d1d0d58

Browse files
committed
Re-adding data pre-processing
1 parent 9f98ce1 commit d1d0d58

File tree

8 files changed

+114
-46
lines changed

8 files changed

+114
-46
lines changed

DESCRIPTION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: gtsummary
22
Title: Presentation-Ready Data Summary and Analytic Result Tables
3-
Version: 2.1.0.9011
3+
Version: 2.1.0.9012
44
Authors@R: c(
55
person("Daniel D.", "Sjoberg", , "[email protected]", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0003-0862-2018")),

NEWS.md

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# gtsummary (development version)
22

3+
* Data pre-processing has now been re-introduced for calculations in `add_p()` and `add_difference()`. Data pre-processing steps were removed in the v2.0 release; however, in some cases---particularly `add_difference()` for dichotomous variables---the reduced functionality was affecting the user experience. See `?tests` for details on data pre-processing. (#2165)
4+
35
* The `add_variable_group_header()` function has been generalized to work with gtsummary tables, where previously only `'tbl_summary'` were accepted. (#2197)
46

57
* The footnote placed on the p-value column by `add_significance_stars()` no longer replaces any existing footnote. Rather the footnote is added to any existing footnote. (#2184)

R/add_p.R

+2-1
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,8 @@ calculate_and_add_test_results <- function(x, include, group = NULL, test.args,
252252
test.args = test.args[[variable]],
253253
adj.vars = adj.vars,
254254
conf.level = conf.level,
255-
continuous_variable = continuous_variable
255+
continuous_variable = continuous_variable,
256+
tbl = x
256257
)
257258
)
258259
)

R/sysdata.rda

87 Bytes
Binary file not shown.

R/utils-add_p_tests.R

+26-22
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
1+
.data_pre_processing <- function(data, numeric = character(), factor = character()) {
2+
for (v in numeric) {
3+
data[[v]] <- as.numeric(data[[v]])
4+
}
5+
for (v in factor) {
6+
data[[v]] <- as.factor(data[[v]])
7+
}
8+
data
9+
}
10+
111
# add_p.tbl_summary ------------------------------------------------------------
212
add_p_test_t.test <- function(data, variable, by, test.args, conf.level = 0.95, ...) {
313
check_pkg_installed("cardx")
414
check_empty(c("group", "adj.vars"), ...)
515

616
rlang::inject(
717
cardx::ard_stats_t_test(
8-
data = data,
18+
data = .data_pre_processing(data, factor = by),
919
variable = all_of(variable),
1020
by = all_of(by),
1121
conf.level = conf.level,
@@ -20,7 +30,7 @@ add_p_test_wilcox.test <- function(data, variable, by, test.args, conf.level = 0
2030

2131
rlang::inject(
2232
cardx::ard_stats_wilcox_test(
23-
data = data,
33+
data = .data_pre_processing(data, factor = by, numeric = variable),
2434
variable = all_of(variable),
2535
by = all_of(by),
2636
conf.int = TRUE,
@@ -75,7 +85,7 @@ add_p_test_chisq.test <- function(data, variable, by, test.args, ...) {
7585

7686
rlang::inject(
7787
cardx::ard_stats_chisq_test(
78-
data = data,
88+
data = .data_pre_processing(data, factor = by),
7989
variable = all_of(variable),
8090
by = all_of(by),
8191
!!!test.args
@@ -103,7 +113,7 @@ add_p_test_mood.test <- function(data, variable, by, test.args, ...) {
103113

104114
rlang::inject(
105115
cardx::ard_stats_mood_test(
106-
data = data,
116+
data = .data_pre_processing(data, factor = by),
107117
variable = all_of(variable),
108118
by = all_of(by),
109119
!!!test.args
@@ -117,7 +127,7 @@ add_p_test_kruskal.test <- function(data, variable, by, ...) {
117127
check_empty(c("group", "adj.vars", "test.args"), ...)
118128

119129
cardx::ard_stats_kruskal_test(
120-
data = data,
130+
data = .data_pre_processing(data, factor = by),
121131
variable = all_of(variable),
122132
by = all_of(by)
123133
)
@@ -129,7 +139,7 @@ add_p_test_fisher.test <- function(data, variable, by, test.args, conf.level = 0
129139

130140
rlang::inject(
131141
cardx::ard_stats_fisher_test(
132-
data = data,
142+
data = .data_pre_processing(data, factor = by),
133143
variable = all_of(variable),
134144
by = all_of(by),
135145
conf.level = conf.level,
@@ -152,7 +162,8 @@ add_p_test_aov <- function(data, variable, by, ...) {
152162
i = "The same functionality is covered in {.val oneway.test} with argument `var.equal = TRUE`."
153163
))
154164

155-
add_p_test_oneway.test(data = data, variable = variable, by = by, test.args = list(var.equal = TRUE))
165+
add_p_test_oneway.test(data = .data_pre_processing(data, factor = by),
166+
variable = variable, by = by, test.args = list(var.equal = TRUE))
156167
}
157168

158169
add_p_test_oneway.test <- function(data, variable, by, test.args, ...) {
@@ -162,25 +173,12 @@ add_p_test_oneway.test <- function(data, variable, by, test.args, ...) {
162173
rlang::inject(
163174
cardx::ard_stats_oneway_test(
164175
formula = cardx::reformulate2(termlabels = by, response = variable),
165-
data = data,
176+
data = .data_pre_processing(data, factor = by),
166177
!!!test.args
167178
)
168179
)
169180
}
170181

171-
add_p_test_mood.test <- function(data, variable, by, ...) {
172-
check_pkg_installed("cardx")
173-
check_empty(c("group", "adj.vars", "test.args"), ...)
174-
175-
rlang::inject(
176-
cardx::ard_stats_mood_test(
177-
data = data,
178-
variable = all_of(variable),
179-
by = all_of(by)
180-
)
181-
)
182-
}
183-
184182
add_p_test_lme4 <- function(data, variable, by, group, ...) {
185183
check_pkg_installed("cardx")
186184
check_pkg_installed("lme4", ref = "cardx")
@@ -247,10 +245,16 @@ add_p_tbl_summary_paired.wilcox.test <- function(data, variable, by, group, test
247245
)
248246
}
249247

250-
add_p_test_prop.test <- function(data, variable, by, test.args, conf.level = 0.95, ...) {
248+
add_p_test_prop.test <- function(data, variable, by, test.args, conf.level = 0.95, tbl, ...) {
251249
check_pkg_installed("cardx")
252250
check_empty(c("adj.vars", "group"), ...)
253251

252+
# convert variable to lgl using the `value` argument
253+
if (identical(tbl$inputs$type[[variable]], "dichotomous") &&
254+
!is_empty(tbl$inputs$value[[variable]])) {
255+
data[[variable]] <- data[[variable]] == tbl$inputs$value[[variable]]
256+
}
257+
254258
rlang::inject(
255259
cardx::ard_stats_prop_test(
256260
data = data,

data-raw/gtsummary_tests.csv

+10-10
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
class,add_p,add_difference,diff_statistic,test_package,test_name,test_fun,fun_to_run,accept_dots,pseudo_code,description,details
2-
tbl_summary,TRUE,TRUE,mean difference,stats,t.test,stats::t.test,gtsummary:::add_p_test_t.test,TRUE,"t.test(variable ~ by, data = data, conf.level = 0.95, ...)",t-test,
3-
tbl_summary,TRUE,FALSE,,stats,aov,stats::aov,gtsummary:::add_p_test_aov,FALSE,"aov(variable ~ by, data = data) %>% summary()",One-way ANOVA,
4-
tbl_summary,TRUE,FALSE,,stats,mood.test,stats::mood.test,gtsummary:::add_p_test_mood.test,TRUE,"mood.test(variable ~ by, data = data, ...) ",Mood two-sample test of scale,Not to be confused with the Brown-Mood test of medians
5-
tbl_summary,TRUE,FALSE,,stats,oneway.test,stats::oneway.test,gtsummary:::add_p_test_oneway.test,TRUE,"oneway.test(variable ~ by, data = data, ...) ",One-way ANOVA,
6-
tbl_summary,TRUE,FALSE,,stats,kruskal.test,stats::kruskal.test,gtsummary:::add_p_test_kruskal.test,FALSE,"kruskal.test(x = data[[variable]], g = data[[by]])",Kruskal-Wallis test,
7-
tbl_summary,TRUE,TRUE,,stats,wilcox.test,stats::wilcox.test,gtsummary:::add_p_test_wilcox.test,TRUE,"wilcox.test(variable ~ by, data = data, conf.int = TRUE, conf.level = conf.level, ...)",Wilcoxon rank-sum test,
8-
tbl_summary,TRUE,FALSE,,stats,chisq.test,stats::chisq.test,gtsummary:::add_p_test_chisq.test,TRUE,"chisq.test(x = data[[variable]], y = data[[by]], ...)",chi-square test of independence,
9-
tbl_summary,TRUE,FALSE,,stats,chisq.test.no.correct,,gtsummary:::add_p_test_chisq.test.no.correct,FALSE,"chisq.test(x = data[[variable]], y = data[[by]], correct = FALSE)",chi-square test of independence,
10-
tbl_summary,TRUE,FALSE,,stats,fisher.test,stats::fisher.test,gtsummary:::add_p_test_fisher.test,TRUE,"fisher.test(x = data[[variable]], y = data[[by]], conf.level = 0.95, ...)",Fisher's exact test,
2+
tbl_summary,TRUE,TRUE,mean difference,stats,t.test,stats::t.test,gtsummary:::add_p_test_t.test,TRUE,"t.test(variable ~ as.factor(by), data = data, conf.level = 0.95, ...)",t-test,
3+
tbl_summary,TRUE,FALSE,,stats,aov,stats::aov,gtsummary:::add_p_test_aov,FALSE,"aov(variable ~ as.factor(by), data = data) %>% summary()",One-way ANOVA,
4+
tbl_summary,TRUE,FALSE,,stats,mood.test,stats::mood.test,gtsummary:::add_p_test_mood.test,TRUE,"mood.test(variable ~ as.factor(by), data = data, ...)",Mood two-sample test of scale,Not to be confused with the Brown-Mood test of medians
5+
tbl_summary,TRUE,FALSE,,stats,oneway.test,stats::oneway.test,gtsummary:::add_p_test_oneway.test,TRUE,"oneway.test(variable ~ as.factor(by), data = data, ...)",One-way ANOVA,
6+
tbl_summary,TRUE,FALSE,,stats,kruskal.test,stats::kruskal.test,gtsummary:::add_p_test_kruskal.test,FALSE," kruskal.test(data[[variable]], as.factor(data[[by]]))",Kruskal-Wallis test,
7+
tbl_summary,TRUE,TRUE,,stats,wilcox.test,stats::wilcox.test,gtsummary:::add_p_test_wilcox.test,TRUE,"wilcox.test(as.numeric(variable) ~ as.factor(by), data = data, conf.int = TRUE, conf.level = conf.level, ...)",Wilcoxon rank-sum test,
8+
tbl_summary,TRUE,FALSE,,stats,chisq.test,stats::chisq.test,gtsummary:::add_p_test_chisq.test,TRUE," chisq.test(x = data[[variable]], y = as.factor(data[[by]]), ...)",chi-square test of independence,
9+
tbl_summary,TRUE,FALSE,,stats,chisq.test.no.correct,,gtsummary:::add_p_test_chisq.test.no.correct,FALSE,"chisq.test(x = data[[variable]], y = as.factor(data[[by]]), correct = FALSE)",chi-square test of independence,
10+
tbl_summary,TRUE,FALSE,,stats,fisher.test,stats::fisher.test,gtsummary:::add_p_test_fisher.test,TRUE,"fisher.test(data[[variable]], as.factor(data[[by]]), conf.level = 0.95, ...)",Fisher's exact test,
1111
tbl_summary,TRUE,FALSE,,stats,mcnemar.test,stats::mcnemar.test,gtsummary:::add_p_test_mcnemar.test,TRUE,"tidyr::pivot_wider(id_cols = group, ...); mcnemar.test(by_1, by_2, conf.level = 0.95, ...)",McNemar's test,
1212
tbl_summary,TRUE,FALSE,,stats,mcnemar.test.wide,,gtsummary:::add_p_test_mcnemar.test_wide,TRUE,"mcnemar.test(data[[variable]], data[[by]], conf.level = 0.95, ...)",McNemar's test,
1313
tbl_summary,TRUE,FALSE,,lme4,lme4,lme4::glmer,gtsummary:::add_p_test_lme4,FALSE,"lme4::glmer(by ~ (1 \UFF5C group), data, family = binomial) %>% anova(lme4::glmer(by ~ variable + (1 \UFF5C group), data, family = binomial))",random intercept logistic regression,
1414
tbl_summary,TRUE,TRUE,mean difference,stats,paired.t.test,,gtsummary:::add_p_tbl_summary_paired.t.test,TRUE,"tidyr::pivot_wider(id_cols = group, ...); t.test(by_1, by_2, paired = TRUE, conf.level = 0.95, ...)",Paired t-test,
1515
tbl_summary,TRUE,FALSE,,stats,paired.wilcox.test,,gtsummary:::add_p_tbl_summary_paired.wilcox.test,TRUE,"tidyr::pivot_wider(id_cols = group, ...); wilcox.test(by_1, by_2, paired = TRUE, conf.int = TRUE, conf.level = 0.95, ...)",Paired Wilcoxon rank-sum test,
16-
tbl_summary,TRUE,TRUE,rate difference,stats,prop.test,stats::prop.test,gtsummary:::add_p_test_prop.test,TRUE,"prop.test(x, n, conf.level = 0.95, ...)",Test for equality of proportions,
16+
tbl_summary,TRUE,TRUE,rate difference,stats,prop.test,stats::prop.test,gtsummary:::add_p_test_prop.test,TRUE,"prop.test(x, n, conf.level = 0.95, ...)",Test for equality of proportions,"For dichotomous comparisons, the 'variable' is first converted to a logical."
1717
tbl_summary,TRUE,TRUE,mean difference,stats,ancova,,gtsummary:::add_p_test_ancova,FALSE,lm(variable ~ by + adj.vars),ANCOVA,
1818
tbl_summary,FALSE,TRUE,mean difference,stats,ancova_lme4,,gtsummary:::add_p_test_ancova_lme4,FALSE,"lme4::lmer(variable ~ by + adj.vars + (1 \UFF5C group), data)",ANCOVA with random intercept,
1919
tbl_summary,FALSE,TRUE,standardized mean difference,effectsize,cohens_d,effectsize::cohens_d,gtsummary:::add_p_test_cohens_d,TRUE,"effectsize::cohens_d(variable ~ by, data, ci = conf.level, verbose = FALSE, ...)",Cohen's D,

man/tests.Rd

+12-12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-add_difference.tbl_summary.R

+61
Original file line numberDiff line numberDiff line change
@@ -634,3 +634,64 @@ test_that("ordering in add_difference.tbl_summary() with paired tests", {
634634
as.data.frame()
635635
)
636636
})
637+
638+
test_that("addressing GH #2165: Non-logical dichotomous comparisons using prop.test()", {
639+
# check the results are correct by matching ARDs
640+
expect_equal(
641+
trial |>
642+
dplyr::mutate(response = factor(response, levels = c(0, 1), labels = c("no", "yes"))) |>
643+
tbl_summary(
644+
by = trt,
645+
include = response
646+
) |>
647+
add_difference() |>
648+
gather_ard() |>
649+
getElement("add_difference") |>
650+
getElement("response") |>
651+
dplyr::select(-"fmt_fn"),
652+
trial |>
653+
dplyr::mutate(response = response == 1) |>
654+
cardx::ard_stats_prop_test(by = trt, variable = response) |>
655+
dplyr::select(-"fmt_fn")
656+
)
657+
658+
# check when the value presented is the opposite (FALSE)
659+
expect_equal(
660+
trial |>
661+
dplyr::mutate(response = as.logical(response)) |>
662+
tbl_summary(
663+
by = trt,
664+
include = response,
665+
value = list(response = FALSE)
666+
) |>
667+
add_difference() |>
668+
gather_ard() |>
669+
getElement("add_difference") |>
670+
getElement("response") |>
671+
dplyr::select(-"fmt_fn"),
672+
trial |>
673+
dplyr::mutate(response = response == 0) |>
674+
cardx::ard_stats_prop_test(by = trt, variable = response) |>
675+
dplyr::select(-"fmt_fn")
676+
)
677+
678+
# check results when variable has >2 levels
679+
expect_equal(
680+
trial |>
681+
tbl_summary(
682+
by = trt,
683+
include = grade,
684+
value = list(grade = "I")
685+
) |>
686+
add_difference() |>
687+
gather_ard() |>
688+
getElement("add_difference") |>
689+
getElement("grade") |>
690+
dplyr::select(-"fmt_fn"),
691+
trial |>
692+
dplyr::mutate(grade = grade == "I") |>
693+
cardx::ard_stats_prop_test(by = trt, variable = grade) |>
694+
dplyr::select(-"fmt_fn")
695+
)
696+
})
697+

0 commit comments

Comments
 (0)