library("testthat") library("stringr") library("dplyr") # I need to include this for unknown reason or the test fails in R CMD check mode data("Loblolly") set.seed(1) Loblolly$young <- Loblolly$age < 10 Loblolly$young <- factor(Loblolly$young, label = c("Yes", "No")) Loblolly$fvar <- factor(sample(letters[1:3], size = nrow(Loblolly), replace = TRUE)) Loblolly$young_w_missing <- Loblolly$young Loblolly$young_w_missing[sample(1:nrow(Loblolly), size = 4)] <- NA Loblolly$fvar_w_missing <- Loblolly$fvar Loblolly$fvar_w_missing[sample(1:nrow(Loblolly), size = 4)] <- NA Loblolly$height_w_missing <- Loblolly$height Loblolly$height_w_missing[sample(1:nrow(Loblolly), size = 4)] <- NA test_that("Check mean function", { stats <- by(Loblolly$height, Loblolly$young, mean) a <- getDescriptionStatsBy(Loblolly$height, Loblolly$young, statistics = TRUE, digits = 2, statistics.sig_lim = 10^-4) # Check that it contains the true mean expect_true(grepl(round(stats[["No"]], 2), a[1, "No"]), info = "Expected the mean" ) expect_true(grepl(round(stats[["Yes"]], 2), a[1, "Yes"]), info = "Expected the mean" ) # Check that it contains the sd stats <- by(Loblolly$height, Loblolly$young, sd) expect_true(grepl(round(stats[["No"]], 2), a[1, "No"]), info = "Expected the sd" ) expect_true(grepl(round(stats[["Yes"]], 2), a[1, "Yes"]), info = "Expected the sd" ) true_wilc_pv <- txtPval(wilcox.test(Loblolly$height ~ Loblolly$young)$p.value, statistics.sig_lim = 10^-4 ) expect_equal( as.character(a[1, "P-value"]), true_wilc_pv ) # Check P-value without truncation a <- getDescriptionStatsBy(Loblolly$height, Loblolly$age == 10, statistics = TRUE, digits = 2, statistics.sig_lim = 10^-4 ) true_wilc_pv <- txtPval(wilcox.test(Loblolly$height ~ Loblolly$age == 10)$p.value, statistics.sig_lim = 10^-4 ) expect_equal( as.character(a[1, "P-value"]), true_wilc_pv ) }) test_that("Check median function", { stats <- by(Loblolly$height, Loblolly$young, median) a <- getDescriptionStatsBy(Loblolly$height, Loblolly$young, continuous_fn = describeMedian, statistics = TRUE, digits = 2, statistics.sig_lim = 10^-4 ) # Check that it contains the true mean expect_true(grepl(round(stats[["No"]], 2), a[1, "No"]), info = "Expected the median" ) expect_true(grepl(round(stats[["Yes"]], 2), a[1, "Yes"]), info = "Expected the median" ) # Check that it contains the sd stats <- by( Loblolly$height, Loblolly$young, function(x) { str_trim(paste(format(quantile(x, probs = c(.25, .75)), digits = 2, nsmall = 2 ), collapse = " - ")) } ) expect_true(grepl(stats[["No"]], a[1, "No"]), info = "Expected the iqr range" ) expect_true(grepl(stats[["Yes"]], a[1, "Yes"]), info = "Expected the iqr range" ) true_wilc_pv <- txtPval(wilcox.test(Loblolly$height ~ Loblolly$young)$p.value, statistics.sig_lim = 10^-4 ) expect_equal( as.character(a[1, "P-value"]), true_wilc_pv ) a <- getDescriptionStatsBy(Loblolly$height, Loblolly$young, continuous_fn = function(...) { describeMedian(..., iqr = FALSE) }, statistics = TRUE, digits = 2, statistics.sig_lim = 10^-4 ) # Check that it contains the sd stats <- by( Loblolly$height, Loblolly$young, function(x) paste(round(range(x), 2), collapse = " - ") ) expect_true(grepl(stats[["No"]], a[1, "No"]), info = "Expected the range" ) expect_true(grepl(stats[["Yes"]], a[1, "Yes"]), info = "Expected the range" ) }) test_that("Check small proportions", { n <- 1e4 a <- getDescriptionStatsBy(LETTERS[c(rep(1, times = (n - 4)), rep(2, times = 4))], letters[c(rep(1, times = (n - 10)), rep(2, times = 10))], hrzl_prop = TRUE, statistics = TRUE, digits = 0, digits.nonzero = 2, statistics.sig_lim = 10^-4) expect_equivalent(as.character(a["A", "b"]), "6 (0.06%)") a <- getDescriptionStatsBy(LETTERS[c(rep(1, times = (n - 4)), rep(2, times = 4))], letters[c(rep(1, times = (n - 10)), rep(2, times = 10))], hrzl_prop = TRUE, statistics = TRUE, digits = 0, statistics.sig_lim = 10^-4) expect_equivalent(as.character(a["A", "b"]), "6 (0%)") # Make sure to handle character input and make it default to the # first factor level set.seed(1) fake_data <- data.frame(Large = sample(LETTERS[1:2], size = n, replace = TRUE), Small = sample(letters[1:2], size = n, replace = TRUE)) fake_data$Small_factor <- factor(fake_data$Small) a <- getDescriptionStatsBy(fake_data$Large, by = fake_data$Small) b <- getDescriptionStatsBy(fake_data$Large, by = fake_data$Small_factor) attr(a, "raw_data") <- NULL attr(b, "raw_data") <- NULL expect_identical(a, b) }) test_that("Check factor function", { stats <- table(Loblolly$fvar, Loblolly$young) a <- getDescriptionStatsBy(Loblolly$fvar, Loblolly$young, statistics = TRUE, digits = 2, statistics.sig_lim = 10^-4) # Check that it contains the true mean for (rn in rownames(a)) { for (cn in levels(Loblolly$young)) { expect_match(a[rn, cn], as.character(stats[rn, cn]), info = "Factor count don't match" ) } } # Check that character input is handled similarly b <- getDescriptionStatsBy(as.character(Loblolly$fvar), as.character(Loblolly$young), statistics = TRUE, digits = 2, statistics.sig_lim = 10^-4) label(a) <- "" label(b) <- "" for (colname in colnames(b)) { expect_equal(a[,colname], b[, colname]) } vertical_perc_stats <- format(apply(stats, 2, function(x) { x / sum(x) * 100 }), nsmall = 2, digits = 2) horizontal_perc_stats <- t(format(apply(stats, 1, function(x) { x / sum(x) * 100 }), nsmall = 2, digits = 2)) for (rn in rownames(a)) { for (cn in levels(Loblolly$young)) { expect_match(a[rn, cn], sprintf("%s%%", vertical_perc_stats[rn, cn]), info = "Factor percentagess don't match in vertical mode" ) } } a <- getDescriptionStatsBy(Loblolly$fvar, Loblolly$young, hrzl_prop = TRUE, statistics = TRUE, digits = 2, statistics.sig_lim = 10^-4) for (rn in rownames(a)) { for (cn in levels(Loblolly$young)) { expect_match(a[rn, cn], sprintf("%s%%", horizontal_perc_stats[rn, cn]), info = "Factor percentagess don't match in horizontal mode" ) } } }) test_that("Check total column position", { a <- getDescriptionStatsBy(Loblolly$fvar, Loblolly$young, hrzl_prop = TRUE, add_total_col = TRUE, continuous_fn = describeMedian, statistics = TRUE, digits = 2, statistics.sig_lim = 10^-4) expect_equivalent(colnames(a)[1], "Total") expect_equivalent(ncol(a), 4) a <- getDescriptionStatsBy(Loblolly$fvar, Loblolly$young, hrzl_prop = TRUE, add_total_col = "last", continuous_fn = describeMedian, digits = 2, statistics.sig_lim = 10^-4 ) expect_equivalent(tail(colnames(a), 1), "Total", info = "The last column without statistics should be the total column when the add_total_col is set to last" ) expect_equivalent(ncol(a), 3) a <- getDescriptionStatsBy(Loblolly$fvar, Loblolly$young, statistics = TRUE, hrzl_prop = TRUE, add_total_col = "last" ) expect_equivalent(tail(colnames(a), 2)[1], "Total", info = "The last should be the p-value if statistics is specified" ) expect_equivalent(ncol(a), 4) }) test_that("Check factor function with missing", { stats <- table(Loblolly$fvar, Loblolly$young_w_missing, useNA = "ifany") a <- expect_warning(getDescriptionStatsBy(Loblolly$fvar, Loblolly$young_w_missing, statistics = TRUE, digits = 2, statistics.sig_lim = 10^-4 )) for (rn in rownames(a)) { for (cn in levels(Loblolly$young)) { expect_match(a[rn, cn], as.character(stats[rn, cn]), info = "Factor count don't match" ) } } stats <- table(Loblolly$fvar, Loblolly$young_w_missing, useNA = "no") vertical_perc_stats <- format(apply(stats, 2, function(x) { x / sum(x) * 100 }), nsmall = 2, digits = 2) horizontal_perc_stats <- t(format(apply(stats, 1, function(x) { x / sum(x) * 100 }), nsmall = 2, digits = 2)) for (rn in rownames(a)) { for (cn in levels(Loblolly$young)) { expect_match(a[rn, cn], sprintf("%s%%", vertical_perc_stats[rn, cn]), info = "Factor vertical percentages don't match" ) } } a <- suppressWarnings(getDescriptionStatsBy(Loblolly$fvar, Loblolly$young_w_missing, hrzl_prop = TRUE, statistics = TRUE, digits = 2, statistics.sig_lim = 10^-4)) for (rn in rownames(a)) { for (cn in levels(Loblolly$young)) { expect_match(a[rn, cn], sprintf("%s%%", horizontal_perc_stats[rn, cn]), info = "Factor percentages don't match in horizontal mode" ) } } a <- suppressWarnings(getDescriptionStatsBy(Loblolly$fvar_w_missing, Loblolly$young_w_missing, useNA = "no", digits = 2, statistics.sig_lim = 10^-4)) stats <- table(Loblolly$fvar_w_missing, Loblolly$young_w_missing, useNA = "no") vertical_perc_stats <- format(apply(stats, 2, function(x) { x / sum(x) * 100 }), nsmall = 2, digits = 2) for (rn in rownames(a)) { for (cn in levels(Loblolly$young)) { expect_match(a[rn, cn], as.character(stats[rn, cn]), info = sprintf( "Factor '%s':'%s' count don't match", rn, cn ) ) expect_match(a[rn, cn], sprintf("%s%%", vertical_perc_stats[rn, cn]), info = sprintf( "Factor '%s':'%s' vertical percentages don't match", rn, cn ) ) } } a <- suppressWarnings(getDescriptionStatsBy(Loblolly$fvar_w_missing, Loblolly$young_w_missing, digits = 2, statistics.sig_lim = 10^-4)) stats <- table(Loblolly$fvar_w_missing, Loblolly$young_w_missing, useNA = "ifany") stats <- stats[, !is.na(colnames(stats))] rownames(stats)[is.na(rownames(stats))] <- "Missing" vertical_perc_stats <- format(apply(stats, 2, function(x) { x / sum(x) * 100 }), nsmall = 2, digits = 2) for (rn in rownames(a)) { for (cn in levels(Loblolly$young)) { expect_match(a[rn, cn], as.character(stats[rn, cn]), info = sprintf( "Factor '%s':'%s' count don't match", rn, cn ) ) expect_match(a[rn, cn], sprintf("%s%%", str_trim(vertical_perc_stats[rn, cn])), info = sprintf( "Factor '%s':'%s' vertical percentages don't match", rn, cn ) ) } } a <- suppressWarnings(getDescriptionStatsBy(Loblolly$fvar_w_missing, Loblolly$young_w_missing, hrzl_prop = TRUE, digits = 2, statistics.sig_lim = 10^-4)) horizontal_perc_stats <- t(format(apply(stats, 1, function(x) { x / sum(x) * 100 }), nsmall = 2, digits = 2)) for (rn in rownames(a)) { for (cn in levels(Loblolly$young)) { expect_match(a[rn, cn], sprintf("%s%%", str_trim(horizontal_perc_stats[rn, cn])), info = "Factor vertical percentages don't match" ) } } # When # - `x` has exactly 2 levels and some NAs # - add_total_col = TRUE # - show_missing = "no" # - show_all_values = FALSE # Then prGetStatistics should return the count of just the first factor level # use example: # a <- getDescriptionStatsBy(Loblolly$young_w_missing, Loblolly$fvar, # useNA = "no", digits = 2, # add_total_col = TRUE) a <- prGetStatistics(Loblolly$young_w_missing, useNA = "no", show_all_values = FALSE) lvl <- levels(Loblolly$young_w_missing)[1] target <- sum(stats[, lvl]) names(target) <- lvl expect_equal(a, target) }) test_that("Check txtInt application", { # Check for factors set.seed(10) test_var <- factor(sample(1:3, size = 10^4, replace = TRUE)) out <- prGetStatistics(test_var, useNA = "no", show_all_values = FALSE) expect_true(any(grepl(",", out))) }) test_that("Problem with boolean x", { set.seed(1) aa <- factor(sample(c("No", "Yes"), size = 50, replace = TRUE)) aaa <- sample(c(TRUE, FALSE), size = 50, replace = TRUE) ret <- getDescriptionStatsBy(x = aaa, by = aa, numbers_first = TRUE) expect_equivalent(nrow(ret), 1, info = "There should only be one alternative returned" ) expect_equivalent(ncol(ret), 2, info = "There should be two columns" ) expect_match(ret[TRUE, "No"], sprintf("^%d", table(aaa, aa)["TRUE", "No"]), info = "The value does not seem to match the raw table" ) }) test_that("test header", { data(mtcars) mtcars$am <- factor(mtcars$am, levels = 0:1, labels = c("Automatic", "Manual")) Hmisc::label(mtcars$am) <- "Transmission" set.seed(666) mtcars$col <- factor(sample(c("red", "black", "silver"), size = NROW(mtcars), replace = TRUE )) out <- getDescriptionStatsBy( x = mtcars$col, by = mtcars$am, header_count = TRUE, add_total_col = TRUE, statistics = TRUE ) expect_match( colnames(out)[1], sprintf("No. %d", nrow(mtcars)) ) expect_match( colnames(out)[2], sprintf( "No. %d", sum(mtcars$am == levels(mtcars$am)[1]) ) ) expect_match( colnames(out)[3], sprintf( "No. %d", sum(mtcars$am != levels(mtcars$am)[1]) ) ) out <- getDescriptionStatsBy( x = mtcars$col, by = mtcars$am, header_count = "(n = %s)", # custom header add_total_col = TRUE, statistics = TRUE ) expect_match( colnames(out)[1], sprintf("\\(n = %d\\)", nrow(mtcars)) ) expect_match( colnames(out)[2], sprintf( "\\(n = %d\\)", sum(mtcars$am == levels(mtcars$am)[1]) ) ) expect_match( colnames(out)[3], sprintf( "\\(n = %d\\)", sum(mtcars$am != levels(mtcars$am)[1]) ) ) }) test_that("Test use_units", { data(mtcars) mtcars$am <- factor(mtcars$am, levels = 0:1, labels = c("Automatic", "Manual")) Hmisc::label(mtcars$am) <- "Transmission" set.seed(666) units(mtcars$mpg) <- "mpg" out1 <- suppressWarnings( getDescriptionStatsBy( x = mtcars$mpg, by = mtcars$am, header_count = TRUE, add_total_col = TRUE, statistics = TRUE ) ) out2 <- suppressWarnings( getDescriptionStatsBy( x = mtcars$mpg, by = mtcars$am, use_units = TRUE, header_count = TRUE, add_total_col = TRUE, statistics = TRUE ) ) expect_equal(ncol(out1) + 1, ncol(out2)) out3 <- suppressWarnings( getDescriptionStatsBy( x = mtcars$mpg, by = mtcars$am, use_units = "name", header_count = TRUE, add_total_col = TRUE, statistics = TRUE ) ) expect_match(Hmisc::label(out3), "\\(mpg\\)") expect_false(grepl("\\(mpg\\)", label(out2))) }) data("mtcars") test_that("Non-factor variables where values are missing in only one of the by-groups", { table(as.character(mtcars$am), mtcars$gear) retAll <- getDescriptionStatsBy( as.character(mtcars$am), mtcars$gear, show_all_values = TRUE ) retOne <- getDescriptionStatsBy( as.character(mtcars$am), mtcars$gear, show_all_values = FALSE ) retAllRowDefault <- retAll["0", ] retAllRowOther <- retAll["1", ] # Delete label as the attributes(retAllRowDefault) <- NULL attributes(retAllRowOther) <- NULL attributes(retOne) <- NULL expect_equal(retAllRowDefault, retOne) expect_false(all(retAllRowOther == retOne)) }) ### checks for issue #32: display of p-values for multi-row summaries cars_missing <- mtcars cars_missing$mpg[3] <- NA test_that("p-values are displayed in multi-row summaries when rgroup and n.rgroup are specified", { expected <- structure( c( "27.1 (±4.6)", "1 (9.1%)", "19.7 (±1.5)", "0 (0%)", "15.1 (±2.6)", "0 (0%)", "", "" ), .Dim = c(2L, 4L), .Dimnames = list( c("Mean (SD)", "Missing"), c("4", "6", "8", "P-value") ), rgroup = structure("Gas", add = structure(list(`1` = "< 0.0001"), .Names = "1" ) ), n.rgroup = 2, htmlTable_args = structure(list(), .Names = character(0)), class = c("descMrg", class(matrix(1))) ) out <- mergeDesc(getDescriptionStatsBy( x = cars_missing$mpg, by = cars_missing$cyl, statistics = TRUE ), htmlTable_args = list(rgroup = c("Gas"), n.rgroup = 2) ) expect_equivalent(out, expected) }) test_that("Factors with 0 observations should be reported when show_all_values=TRUE, issue #61", { set.seed(1) n = 20 d <- data.frame(single = sample(LETTERS[1], size = n, replace = TRUE), two = sample(LETTERS[1:2], size = n, replace = TRUE), three = sample(LETTERS[1:3], size = n, replace = TRUE), by = sample(letters[1:2], size = n, replace = TRUE)) %>% mutate(across(everything(), function(x) factor(x, levels = c(unique(x), "no observation")))) out <- d %>% getDescriptionStatsBy(single, two, three, by = by, show_all_values = TRUE) expect_true(all(sapply(out, function(x) "no observation" %in% rownames(x)))) out <- d %>% getDescriptionStatsBy(single, two, three, by = by, show_all_values = FALSE) expect_false("no observation" %in% rownames(out[[1]])) expect_true(all(sapply(out[2:3], function(x) "no observation" %in% rownames(x)))) })