context("Compute summary statistics table with row variable") test_that("A summary table is correctly computed for a continuous variable by a row variable", { data <- data.frame( parent = c("A", "A", "A", "A", "B", "B"), child = c("a", "a", "a", "b", "c", "c"), x = rnorm(n = 6), USUBJID = seq.int(6) ) sumTableRowVar <- computeSummaryStatisticsTable( data, rowVar = c("parent", "child"), var = "x" ) expect_s3_class(sumTableRowVar, "summaryTable") stats <- c( "statN", "statm", "statMean", "statSD", "statSE", "statMedian", "statMin", "statMax", "statPercTotalN", "statPercN" ) expect_identical( names(sumTableRowVar), c("parent", "child", "isTotal", stats) ) lastRowIdx <- nrow(sumTableRowVar) expect_true(sumTableRowVar$isTotal[lastRowIdx]) expect_identical( sumTableRowVar$statPercN[lastRowIdx], 100 ) # check if summary statistics table with rowVar specification # corresponds to summary statistics computed for each unique rowVar rowVarUnique <- unique(data[, c("parent", "child")]) statsToCompare <- setdiff(stats , c("statPercTotalN", "statPercN")) for(iRow in seq_len(nrow(rowVarUnique))){ expect_equal( object = { dataIRow <- merge(rowVarUnique[!!iRow, ], data) sumTableIRow <- computeSummaryStatisticsTable(dataIRow, var = "x") subset(sumTableIRow, !isTotal)[, statsToCompare] }, expected = merge(rowVarUnique[!!iRow, ], sumTableRowVar)[, statsToCompare], check.attributes = FALSE ) } }) test_that("A warning is generated if the row variable is not available", { data <- data.frame( parent = c("A", "A", "A", "A", "B", "B"), child = c("a", "a", "a", "b", "c", "c"), USUBJID = seq.int(6) ) expect_warning( computeSummaryStatisticsTable(data, rowVar = "Y"), "Y.* in rowVar.*ignored" ) }) test_that("Row variables are labelled with the variable name by default", { data <- data.frame( parent = c("A", "A", "A", "A", "B", "B"), child = c("a", "a", "a", "b", "c", "c"), USUBJID = seq.int(6) ) rowVar <- c("parent", "child") summaryTable <- computeSummaryStatisticsTable(data, rowVar = rowVar) expect_identical( object = attr(summaryTable, "summaryTable")$rowVarLab[rowVar], expected = setNames(rowVar, rowVar) ) }) test_that("The labels of the row variables are correctly extracted from a specified labels", { data <- data.frame( parent = c("A", "A", "A", "A", "B", "B"), child = c("a", "a", "a", "b", "c", "c"), USUBJID = seq.int(6) ) rowVar <- c("parent", "child") rowVarLab <- c(parent = "Parent variable", child = "Child variable") summaryTable <- computeSummaryStatisticsTable( data = data, rowVar = rowVar, rowVarLab = rowVarLab ) expect_identical( object = attr(summaryTable, "summaryTable")$rowVarLab[rowVar], expected = rowVarLab ) }) test_that("The labels of the row variables are correctly extracted from the labels of all variables", { data <- data.frame( parent = c("A", "A", "A", "A", "B", "B"), child = c("a", "a", "a", "b", "c", "c"), USUBJID = seq.int(6) ) rowVar <- c("parent", "child") labelVars <- c(parent = "Parent variable", child = "Child variable") summaryTable <- computeSummaryStatisticsTable( data = data, rowVar = rowVar, labelVars = labelVars ) expect_identical( attr(summaryTable, "summaryTable")$rowVarLab[rowVar], labelVars[rowVar] ) }) test_that("The labels of the row variables are correctly extracted from the labels of all variables", { data <- data.frame( parent = c("A", "A", "A", "A", "B", "B"), child = c("a", "a", "a", "b", "c", "c"), USUBJID = seq.int(6) ) rowVar <- c("parent", "child") summaryTable <- computeSummaryStatisticsTable( data = data, rowVar = rowVar, rowVarLab = c(child = "Child variable") ) # no errors if labels are not specified for all row variables expect_identical( object = attr(summaryTable, "summaryTable")$rowVarLab[rowVar], expected = c(parent = "parent", child = "Child variable") ) }) test_that("Row elements are ordered alphabetically if the row variable is a character", { data <- data.frame( USUBJID = seq.int(6), SEX = rep(c("M", "F"), times = 3), stringsAsFactors = FALSE ) expect_silent( resAuto <- computeSummaryStatisticsTable( data, rowVar = "SEX", rowOrder = "auto" ) ) expect_identical( levels(resAuto$SEX), c("F", "M") ) }) test_that("Row elements are ordered alphabetically when requested", { data <- data.frame( USUBJID = seq.int(6), SEX = factor(rep(c("M", "F"), times = 3), levels = c("M", "F")) ) expect_silent( resAlphabet <- computeSummaryStatisticsTable( data, rowVar = "SEX", rowOrder = "alphabetical" ) ) expect_identical( levels(resAlphabet$SEX), c("F", "M") ) }) test_that("An error is generated if the order for the rows is not correctly specified", { data <- data.frame( USUBJID = seq.int(6), SEX = rep(c("M", "F"), times = 3), stringsAsFactors = FALSE ) expect_error( resAuto <- computeSummaryStatisticsTable( data = data, rowVar = "SEX", rowOrder = "test" ), '.*should be one of.*auto.*alphabetical.*total.*' ) }) test_that("Row elements are ordered as specified when the row variable is a factor", { data <- data.frame( USUBJID = seq.int(6), SEX = factor(rep(c("M", "F"), times = 3), levels = c("M", "F")), stringsAsFactors = FALSE ) expect_silent( resAuto <- computeSummaryStatisticsTable( data, rowVar = "SEX", rowOrder = "auto" ) ) expect_identical(levels(resAuto$SEX), c("M", "F")) }) test_that("Row elements are correctly ordered based on the total", { data <- data.frame( USUBJID = seq.int(6), TRT = c("B", "B", "B", "B", "A", "A"), stringsAsFactors = FALSE ) # Total expect_silent( resTotal <- computeSummaryStatisticsTable( data, rowVar = "TRT", rowOrder = "total" ) ) expect_identical( levels(resTotal$TRT), c("B", "A") ) }) test_that("Row elements are correctly ordered based on the column totals", { data <- data.frame( USUBJID = seq.int(6), SEX = c("F", "F", "M", "M", "M", "M"), TRT = c("B", "B", "B", "B", "A", "A"), stringsAsFactors = FALSE ) expect_silent( resTotal <- computeSummaryStatisticsTable( data, rowVar = "SEX", colVar = "TRT", rowOrder = "total", ) ) expect_identical( levels(resTotal$SEX), c("M", "F") ) # Total across columns included internally but not returned by the function expect_identical( levels(resTotal$TRT), c("A", "B") ) }) test_that("Row elements are correctly ordered when the order is specified for each row variable", { data <- data.frame( USUBJID = seq.int(6), SEX = c("F", "F", "M", "M", "M", "M"), TRT = c("B", "B", "B", "B", "A", "A"), stringsAsFactors = FALSE ) # Different for each row variable expect_silent( resDoubleOrder <- computeSummaryStatisticsTable( data, rowVar = c("SEX", "TRT"), rowOrder = c(SEX = "auto", TRT = "total") ) ) expect_identical( levels(resDoubleOrder$TRT), c("B", "A") ) expect_identical( levels(resDoubleOrder$SEX), c("F", "M") ) }) test_that("Row elements are correctly ordered based on a specified function", { data <- data.frame( USUBJID = seq.int(6), TRT = c("B", "B", "B", "B", "A", "A"), stringsAsFactors = FALSE ) resFct <- computeSummaryStatisticsTable( data, rowVar = "TRT", rowOrder = function(summaryTable){ data <- subset(summaryTable, !isTotal) data[order(data$statN, decreasing = TRUE), "TRT"] } ) expect_identical(levels(resFct$TRT), c("B", "A")) }) test_that("Row elements are correctly ordered when not all elements are returned by a specified function", { data <- data.frame( USUBJID = seq.int(6), TRT = c("B", "B", "B", "B", "A", "A"), stringsAsFactors = FALSE ) # if function is wrong, e.g. doesn't return all values in summaryTable # the remaining values are added anyway expect_silent( resFct <- computeSummaryStatisticsTable( data, rowVar = "TRT", rowOrderTotalFilterFct = function(summaryTable){ c("") } ) ) expect_identical(levels(resFct$TRT), c("A", "B")) }) test_that("Row elements are correctly ordered based on the total of a subset of the data", { # example order based on total would be different than based on specified subset data total data <- data.frame( USUBJID = seq.int(5), TRT = c("B", "B", "B", "A", "A"), SEX = c("M", "M", "F", "F", "F"), stringsAsFactors = FALSE ) expect_silent( resTotal <- computeSummaryStatisticsTable( data = data, rowVar = c("TRT", "SEX"), rowOrder = "total", rowOrderTotalFilterFct = function(x) subset(x, TRT == "B") ) ) expect_identical(levels(resTotal$SEX), c("M", "F")) }) test_that("The last element of the rows is correctly set when specified", { data <- data.frame( USUBJID = seq.int(5), TRT = c("Z", "B", "Other", "Y", "M"), catLast = "Other", stringsAsFactors = FALSE ) expect_silent( res <- computeSummaryStatisticsTable( data = data, rowVar = "TRT", rowOrder = "auto", rowOrderCatLast = "Other" ) ) expect_identical(levels(res$TRT), c("B", "M", "Y", "Z", "Other")) }) test_that("Specific combinations of row elements are included when requested", { # for example: table of lab abnormalities data <- data.frame( USUBJID = seq.int(7), PARAM = factor(c("A", "A", "A", "B", "B", "B", "B")), AVALC = factor(c(">10", ">10", ">10", "<=2", "]2, 10]", "]2, 10]", "]2, 10]")), stringsAsFactors = FALSE ) avalcFact <- c("<=10", ">10", "<=2", "]2, 10]", "> 10") rowVarDataLevels <- data.frame( PARAM = c(rep("A", 2), rep("B", 3)), AVALC = factor(avalcFact, levels = avalcFact) ) expect_silent( resSpecLevels <- computeSummaryStatisticsTable( data, rowVar = c("PARAM", "AVALC"), rowVarDataLevels = rowVarDataLevels ) ) # all levels specified in rowVarDataLevels are present: expect_identical(levels(resSpecLevels$AVALC), levels(rowVarDataLevels$AVALC)) # categories not in the data are also present with a count of 0: avalcNotInData <- setdiff(avalcFact, data$AVALC) expect_true(all(avalcNotInData %in% resSpecLevels$AVALC)) expect_identical( subset(resSpecLevels, AVALC %in% avalcNotInData)$statN, as.integer(rep(0, length(avalcNotInData))) ) }) test_that("The row totals are correctly computed", { data <- data.frame( USUBJID = c(1, 2, 3, 4, 5, 6, 6), SEX = c("M", "M", "M", "F", "F", "F", "F"), stringsAsFactors = FALSE ) sumTableRowTotal <- computeSummaryStatisticsTable( data = data, rowVar = "SEX", rowVarTotalInclude = "SEX" ) expect_true("Total" %in% sumTableRowTotal$SEX) sumTableTotal <- subset(sumTableRowTotal, SEX == "Total") expect_equal(sumTableTotal$statN, 6) expect_equal(sumTableTotal$statm, 7) }) test_that("A warning is generated when the variable for row totals is not available", { data <- data.frame( USUBJID = c(1, 2, 3, 4, 5, 6, 6), SEX = c("M", "M", "M", "F", "F", "F", "F"), stringsAsFactors = FALSE ) sumTableBase <- computeSummaryStatisticsTable(data = data, rowVar = "SEX") expect_warning( sumTableRowTotalWrong <- computeSummaryStatisticsTable( data = data, rowVar = "SEX", rowVarTotalInclude = TRUE ), "Variable.*TRUE.* are not available" ) expect_identical(sumTableRowTotalWrong, sumTableBase) }) test_that("The row totals are correctly extracted in separated rows when requested", { data <- data.frame( USUBJID = seq.int(6), SEX = rep(c("M", "F"), times = 3), stringsAsFactors = FALSE ) expect_silent( summaryTable <- computeSummaryStatisticsTable( data = data, rowVar = "SEX", rowVarTotalInclude = "SEX", rowVarTotalInSepRow = "SEX" ) ) # inclusion of row var in separated row during export step # currently only stored in the output expect_true("SEX" %in% attr(summaryTable, "summaryTable")$rowVarTotalInSepRow) }) test_that("A warning is generated if the variable for row totals to be included in separated rows is not among the row total variables", { data <- data.frame( USUBJID = seq.int(6), SEX = rep(c("M", "F"), times = 3), stringsAsFactors = FALSE ) expect_warning( summaryTable <- computeSummaryStatisticsTable( data = data, rowVar = "SEX", rowVarTotalInSepRow = "SEX" ), "SEX.*in rowVarTotalInSepRow are ignored.*" ) }) test_that("The row totals are correctly computed by variable", { # example where a subject: 'a' has same severity for 2 adverse events # so should be counted only once for the 'Mild' category data <- data.frame( USUBJID = c("a", "a", "b", "c", "c", "a"), ABODSYS = rep("X", 6), AEDECOD = c("A", "A", "A", "A", "B", "B"), AESEV = c("Mild", "Moderate", "Mild", "Severe", "Moderate", "Mild"), stringsAsFactors = FALSE ) sumTableRVBV <- computeSummaryStatisticsTable( data = data, rowVar = c("AEDECOD", "AESEV"), rowVarTotalInclude = "AEDECOD", rowVarTotalByVar = "AESEV" ) expect_true("Total" %in% sumTableRVBV$AEDECOD) sumTableRVBVRowTotal <- subset(sumTableRVBV, AEDECOD == "Total") expect_equal(as.character(sumTableRVBVRowTotal$AESEV), c("Mild", "Moderate", "Severe")) # subject 'a' is only counted once in the 'Mild' category expect_equal(subset(sumTableRVBVRowTotal, AESEV == "Mild")$statN, 2) # but 3 records are reported expect_equal(subset(sumTableRVBVRowTotal, AESEV == "Mild")$statm, 3) }) test_that("The row totals are correctly computed by variable for only a subset of the variable to summarize", { # example where a subject: 'a' has same severity for 2 adverse events # so should be counted only once for the 'Mild' category data <- data.frame( USUBJID = c("a", "a", "b", "c", "c", "a"), ABODSYS = rep("X", 6), AEDECOD = c("A", "A", "A", "A", "B", "B"), AESEV = c("Mild", "Moderate", "Mild", "Severe", "Moderate", "Mild"), stringsAsFactors = FALSE ) expect_silent( sumTableRVBVSpec <- computeSummaryStatisticsTable( data = data, rowVar = c("ABODSYS", "AEDECOD", "AESEV"), rowVarTotalInclude = c("ABODSYS", "AEDECOD"), rowVarTotalByVar = c(AEDECOD = "AESEV") ) ) # if rowVarTotalByVar is not specified, only one total row is included sumTableTotalRowVar1 <- subset(sumTableRVBVSpec, ABODSYS == "Total") expect_length(nrow(sumTableTotalRowVar1), 1) # otherwise, total row(s) for each element in rowVarTotalByVar sumTableTotalRowVar2 <- subset(sumTableRVBVSpec, ABODSYS == "X" & AEDECOD == "Total") # should match stats based on table on ADECOD sumTableRVBV <- computeSummaryStatisticsTable( data = data, rowVar = c("AEDECOD", "AESEV"), rowVarTotalInclude = "AEDECOD", rowVarTotalByVar = "AESEV" ) sumTableRVBVRowTotal <- subset(sumTableRVBV, AEDECOD == "Total") expect_equal( subset(sumTableTotalRowVar2, select = -ABODSYS), sumTableRVBVRowTotal, check.attributes = FALSE # row names might differ ) }) test_that("A warning is generated if the variable to compute row totals by is not available", { # example where a subject: 'a' has same severity for 2 adverse events # so should be counted only once for the 'Mild' category data <- data.frame( USUBJID = c("a", "a", "b", "c", "c", "a"), AEDECOD = c("A", "A", "A", "A", "B", "B"), stringsAsFactors = FALSE ) expect_warning( summaryTable <- computeSummaryStatisticsTable( data = data, rowVar = "AEDECOD", rowVarTotalInclude = "AEDECOD", rowVarTotalByVar = "a" ), ".*rowVarTotalByVar.*ignored" ) }) test_that("The row percentages are correctly computed by a row variable", { data <- data.frame( USUBJID = c("a", "b", "c", "d", "a", "b"), AEDECOD = c("A", "A", "A", "A", "B", "B"), stringsAsFactors = FALSE ) sumTableRVTPerc <- computeSummaryStatisticsTable( data = data, rowVar = "AEDECOD", rowVarTotalPerc = "AEDECOD" ) # sum percentage by rowVar == 100 expect_equal(subset(sumTableRVTPerc, AEDECOD == "A")$statPercTotalN, 4) expect_equal(subset(sumTableRVTPerc, AEDECOD == "B")$statPercTotalN, 2) expect_equal( unique(as.numeric(na.omit(sumTableRVTPerc$statPercN))), 100 ) }) test_that("A warning is generated if the variable to compute row percentages is not available", { data <- data.frame( USUBJID = c("a", "b", "c", "d", "a", "b"), AEDECOD = c("A", "A", "A", "A", "B", "B"), stringsAsFactors = FALSE ) # variable not in rowVar expect_warning( summaryTable <- computeSummaryStatisticsTable( data = data, rowVar = "AEDECOD", rowVarTotalPerc = "a" ), "rowVarTotalPerc.*ignored.*not available in: rowVar" ) }) test_that("The row percentages are correctly computed by the variable to summarize", { data <- data.frame( USUBJID = c("a", "b", "c", "d", "a", "b"), AEDECOD = c("A", "A", "A", "A", "B", "B"), AESEV = c("Mild", "Moderate", "Moderate", "Severe", "Moderate", "Mild"), AESEV2 = c("Mild", "Moderate", NA_character_, NA_character_, "Moderate", "Mild"), stringsAsFactors = FALSE ) expect_silent( sumTableRVTPerc <- computeSummaryStatisticsTable( data = data, rowVar = "AEDECOD", var = c("AESEV", "AESEV2"), rowVarTotalPerc = "variable", varLabInclude = TRUE # because 'variable' not included if only one variable ) ) expect_equal( unique(subset(sumTableRVTPerc, variable == "AESEV")$statPercTotalN), 4 ) expect_equal( unique(subset(sumTableRVTPerc, variable == "AESEV2")$statPercTotalN), 2 ) }) test_that("A warning is generated if row percentages should be computed by the variable to summarize but the variable is not included", { data <- data.frame( USUBJID = c("a", "b", "c", "d", "a", "b"), AEDECOD = c("A", "A", "A", "A", "B", "B"), AESEV = c("Mild", "Moderate", "Moderate", "Severe", "Moderate", "Mild"), AESEV2 = c("Mild", "Moderate", NA_character_, NA_character_, "Moderate", "Mild"), stringsAsFactors = FALSE ) # variable not included in summary table: expect_warning( computeSummaryStatisticsTable( data = data, rowVar = "AEDECOD", var = "AESEV", rowVarTotalPerc = "variable" ), "Percentages cannot be computed by.*variable.* because variable not included" ) }) test_that("The row percentages are correctly computed by the variable to summarize if only one variable is specified.", { data <- data.frame( USUBJID = c("a", "b", "c", "d", "a", "b"), AESEV = c("Mild", "Moderate", "Moderate", "Severe", "Moderate", "Mild"), stringsAsFactors = FALSE ) # variable not included in summary table: expect_silent( summaryTable <- computeSummaryStatisticsTable( data = data, var = "AESEV", varLabInclude = TRUE, rowVarTotalPerc = "variable" ) ) expect_setequal( object = na.omit(summaryTable$statPercTotalN), expect = length(unique(data$USUBJID)) ) })