test_that("check summariseObservationPeriod works", { # helper function removeSettings <- function(x) { attr(x, "settings") <- NULL return(x) } nPoints <- 512 # Load mock database cdm <- omopgenerics::cdmFromTables( tables = list( person = dplyr::tibble( person_id = as.integer(1:4), gender_concept_id = c(8507L, 8532L, 8532L, 8507L), year_of_birth = 2010L, month_of_birth = 1L, day_of_birth = 1L, race_concept_id = 0L, ethnicity_concept_id = 0L ), observation_period = dplyr::tibble( observation_period_id = as.integer(1:8), person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), observation_period_start_date = as.Date(c( "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", "2020-03-01", "2020-04-10", "2020-03-10" )), observation_period_end_date = as.Date(c( "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", "2020-03-09", "2020-05-08", "2020-12-10" )), period_type_concept_id = 0L ) ), cdmName = "mock data" ) cdm <- CDMConnector::copyCdmTo( con = connection(), cdm = cdm, schema = schema()) # simple run expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period)) expect_no_error( resAllD <- summariseObservationPeriod(cdm$observation_period, estimates = "density")) expect_no_error( resAllN <- summariseObservationPeriod(cdm$observation_period, estimates = c( "mean", "sd", "min", "q05", "q25", "median", "q75", "q95", "max"))) expect_equal( resAllD |> dplyr::filter(!is.na(variable_level)) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings(), resAll |> dplyr::filter(!is.na(variable_level)) |> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings() ) # test estimates expect_no_error( resEst <- cdm$observation_period |> summariseObservationPeriod(estimates = c("mean", "median"))) expect_true(all( resEst |> dplyr::filter(!.data$variable_name %in% c("number records", "number subjects")) |> dplyr::pull("estimate_name") |> unique() %in% c("mean", "median") )) # counts expect_identical(resAll$estimate_value[resAll$variable_name == "number records"], "8") x <- dplyr::tibble( group_level = c("overall", "1st", "2nd", "3rd"), variable_name = "number subjects", estimate_value = c("4", "4", "3", "1")) expect_identical(nrow(x), resAll |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) # records per person expect_identical( resAll |> dplyr::filter( variable_name == "records per person", estimate_name == "mean") |> dplyr::pull("estimate_value"), "2" ) # duration expect_identical( resAll |> dplyr::filter(variable_name == "duration in days", estimate_name == "mean") |> dplyr::pull("estimate_value"), as.character(c( mean(c(20, 6, 113, 144, 18, 9, 29, 276)), mean(c(20, 18, 9, 276)), mean(c(6, 29, 144)), 113 )) ) # days to next observation period expect_identical( resAll |> dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean") |> dplyr::pull("estimate_value"), as.character(c( mean(c(5, 32, 136, 26)), mean(c(5, 32, 136)), 26, NA )) ) # duration - density xx <- resAllD |> dplyr::filter(variable_name == "duration in days", !is.na(variable_level)) |> dplyr::group_by(group_level) |> dplyr::summarise( n = dplyr::n(), area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * ( max(as.numeric(estimate_value[estimate_name == "density_x"])) - min(as.numeric(estimate_value[estimate_name == "density_x"])) )/(nPoints - 1) ) expect_identical(xx$n |> unique() |> sort(decreasing = TRUE), c(as.integer(nPoints*2L),6L)) expect_identical(xx$area |> round(2) |> unique() |> sort(decreasing = TRUE), c(1,0)) # days to next observation period - density xx <- resAll |> dplyr::filter(variable_name == "days to next observation period", !is.na(variable_level)) |> dplyr::group_by(group_level) |> dplyr::summarise( n = dplyr::n(), area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * ( max(as.numeric(estimate_value[estimate_name == "density_x"])) - min(as.numeric(estimate_value[estimate_name == "density_x"])) )/(nPoints - 1) ) expect_identical(xx$n |> unique() |> sort(decreasing = TRUE) , c(as.integer(nPoints*2L),6L)) expect_identical(xx$area[xx$group_level != "2nd"] |> round(2) |> unique(), 1) # only one exposure per individual cdm$observation_period <- cdm$observation_period |> dplyr::group_by(person_id) |> dplyr::filter(observation_period_id == min(observation_period_id, na.rm = TRUE)) |> dplyr::ungroup() |> dplyr::compute(name = "observation_period", temporary = FALSE) expect_no_error(resOne <- summariseObservationPeriod(cdm$observation_period)) # counts expect_identical(resOne$estimate_value[resOne$variable_name == "number records"], "4") x <- dplyr::tibble( group_level = c("overall", "1st"), variable_name = "number subjects", estimate_value = c("4", "4")) expect_identical(nrow(x), resOne |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) # Check result type checkResultType(resOne, "summarise_observation_period") # empty observation period cdm$observation_period <- cdm$observation_period |> dplyr::filter(person_id == 0) |> dplyr::compute(name = "observation_period", temporary = FALSE) expect_no_error(resEmpty <- summariseObservationPeriod(cdm$observation_period)) expect_true(nrow(resEmpty) == 2) expect_identical(unique(resEmpty$estimate_value), "0") # table works expect_no_error(tableObservationPeriod(resAll)) expect_no_error(tableObservationPeriod(resOne)) expect_no_error(tableObservationPeriod(resEmpty)) # plot works expect_no_error(plotObservationPeriod(resAll)) expect_no_error(plotObservationPeriod(resOne)) # expect_warning(plotObservationPeriod(resEmpty)) THIS TEST NEEDS DISCUSSION # check all plots combinations expect_no_error( resAll |> plotObservationPeriod( variableName = "number subjects", plotType = "barplot") ) expect_error( resAll |> plotObservationPeriod( variableName = "number subjects", plotType = "boxplot") ) expect_error( resAll |> plotObservationPeriod( variableName = "number subjects", plotType = "densityplot") ) expect_error( resAll |> plotObservationPeriod( variableName = "number subjects", plotType = "random") ) expect_error( resAll |> plotObservationPeriod( variableName = "duration in days", plotType = "barplot") ) expect_no_error( resAll |> plotObservationPeriod( variableName = "duration in days", plotType = "boxplot") ) expect_error( resAllN |> plotObservationPeriod( variableName = "duration in days", plotType = "densityplot") ) expect_no_error( resAllD |> plotObservationPeriod( variableName = "duration in days", plotType = "densityplot") ) expect_error( resAll |> plotObservationPeriod( variableName = "duration in days", plotType = "random") ) expect_error( resAll |> plotObservationPeriod( variableName = "records per person", plotType = "barplot") ) expect_no_error( resAll |> plotObservationPeriod( variableName = "records per person", plotType = "boxplot") ) expect_error( resAllN |> plotObservationPeriod( variableName = "records per person", plotType = "densityplot") ) expect_no_error( resAllD |> plotObservationPeriod( variableName = "records per person", plotType = "densityplot") ) expect_error( resAll |> plotObservationPeriod( variableName = "records per person", plotType = "random") ) expect_error( resAll |> plotObservationPeriod( variableName = "days to next observation period", plotType = "barplot") ) expect_no_error( resAll |> plotObservationPeriod( variableName = "days to next observation period", plotType = "boxplot") ) expect_error( resAllN |> plotObservationPeriod( variableName = "days to next observation period", plotType = "densityplot") ) expect_no_error( resAllD |> plotObservationPeriod( variableName = "days to next observation period", plotType = "densityplot") ) expect_error( resAll |> plotObservationPeriod( variableName = "days to next observation period", plotType = "random") ) PatientProfiles::mockDisconnect(cdm = cdm) }) test_that("check it works with mockOmopSketch", { cdm <- mockOmopSketch(numberIndividuals = 5, seed = 1) sop <- summariseObservationPeriod(cdm$observation_period) # counts expect_identical(sop$estimate_value[sop$variable_name == "number records"], "5") x <- dplyr::tibble( strata_level = c("overall", "1st"), variable_name = "number subjects", estimate_value = c("5","5")) expect_identical(nrow(x), sop |> dplyr::inner_join(x, by = colnames(x)) |> nrow()) # records per person expect_identical( sop |> dplyr::filter( variable_name == "records per person", estimate_name != "sd", !grepl("density", estimate_name)) |> dplyr::pull("estimate_value"), c(rep("1",8)) ) # duration expect_identical( sop |> dplyr::filter(variable_name == "duration in days", estimate_name %in% c("min","q25","median","q75","max")) |> dplyr::pull("estimate_value") |> unique() |> sort(), as.character( cdm$observation_period |> dplyr::mutate(duration = observation_period_end_date - observation_period_start_date + 1) |> dplyr::pull(duration) |> as.character() |> sort() ) ) # days to next observation period expect_identical( sop |> dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean") |> dplyr::pull("estimate_value"), as.character(c(NA,NA)) ) # Check result type omopgenerics::validateResultArgument(sop) # table works expect_no_error(tableObservationPeriod(sop)) # plot works expect_no_error(plotObservationPeriod(sop)) PatientProfiles::mockDisconnect(cdm = cdm) }) test_that("check summariseObservationPeriod strata works", { # helper function removeSettings <- function(x) { attr(x, "settings") <- NULL return(x) } nPoints <- 512 # Load mock database cdm <- omopgenerics::cdmFromTables( tables = list( person = dplyr::tibble( person_id = as.integer(1:4), gender_concept_id = c(8507L, 8532L, 8532L, 8507L), year_of_birth = c(2010L, 2010L, 2011L, 2012L), month_of_birth = 1L, day_of_birth = 1L, race_concept_id = 0L, ethnicity_concept_id = 0L ), observation_period = dplyr::tibble( observation_period_id = as.integer(1:8), person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), observation_period_start_date = as.Date(c( "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", "2020-03-01", "2020-04-10", "2020-03-10" )), observation_period_end_date = as.Date(c( "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", "2020-03-09", "2020-05-08", "2020-12-10" )), period_type_concept_id = 0L ) ), cdmName = "mock data" ) cdm <- CDMConnector::copyCdmTo( con = connection(), cdm = cdm, schema = schema()) # simple run expect_no_error(summariseObservationPeriod(cdm$observation_period, estimates = c("mean"), ageGroup = list(c(0,9), c(10, Inf)))) expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period, estimates = c("mean", "sd", "min", "max", "median", "density"))) expect_no_error(resStrata <- summariseObservationPeriod(cdm$observation_period, estimates = c("mean", "sd", "min", "max", "median", "density"), ageGroup = list("<10" = c(0,9), ">=10" = c(10, Inf)), sex = TRUE)) # test overall x <- resStrata |> dplyr::filter(strata_name == "overall", strata_level == "overall") |> dplyr::rename("strata" = "estimate_value") |> dplyr::inner_join( resAll |> dplyr::rename("all" = "estimate_value") ) expect_identical(x$strata, x$all) # check strata groups have the expected value expect_identical(resStrata |> dplyr::filter(variable_name == "number subjects", strata_level == "Female", group_level == "2nd") |> dplyr::pull("estimate_value"),"2") expect_identical(resStrata |> dplyr::filter(variable_name == "number subjects", strata_level == ">=10 &&& Male", group_level == "3rd") |> dplyr::pull("estimate_value"),"1") # duration expect_identical( resStrata |> dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == ">=10") |> dplyr::pull("estimate_value"), as.character(c( mean(c(20, 18)), mean(c(6, 144)), mean(113))) ) expect_identical( resStrata |> dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == "<10") |> dplyr::pull("estimate_value"), as.character(c( mean(c(9, 276)), mean(c(29)))) ) # days to next observation period expect_identical( resStrata |> dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean", strata_level == "<10 &&& Female", group_level == "1st") |> dplyr::pull("estimate_value"), "32" ) PatientProfiles::mockDisconnect(cdm = cdm) })