test_generate_concept_cohort_set <- function(con, cdm_schema, write_schema) { # if (dbms(con) == "bigquery") return(skip("failing test")) if (dbms(con) == "bigquery") { return(skip("failing test")) } # withr::local_options("CDMConnector.cohort_as_temp" = FALSE) # temp cohort tables are not implemented yet cdm <- cdm_from_con(con, cdm_schema = cdm_schema, write_schema = write_schema ) # check that we have records cdm$condition_occurrence %>% dplyr::filter(condition_concept_id == 192671) %>% dplyr::count() %>% dplyr::pull("n") %>% expect_gt(10) # default (no descendants) ---- cdm <- generateConceptCohortSet( cdm = cdm, conceptSet = list(gibleed = 192671), name = "gibleed", overwrite = TRUE ) cohort <- readCohortSet(system.file("cohorts3", package = "CDMConnector")) %>% dplyr::filter(cohort_name == "GiBleed_default") %>% dplyr::mutate(cohort_definition_id = 1L) cdm <- generateCohortSet(cdm, cohortSet = cohort, name = "gibleed2", overwrite = TRUE) expected <- dplyr::collect(cdm$gibleed2) %>% dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>% dplyr::mutate_if(~ "integer64" %in% class(.), as.integer) actual <- dplyr::collect(cdm$gibleed) %>% dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>% dplyr::mutate_if(~ "integer64" %in% class(.), as.integer) expect_true(nrow(expected) > 0) expect_true(nrow(actual) == nrow(expected)) # setdiff(unique(expected$subject_id), unique(actual$subject_id)) # setdiff(unique(actual$subject_id), unique(expected$subject_id)) expect_setequal(unique(expected$subject_id), unique(actual$subject_id)) expect_equal(actual, expected) expect_error({ # should be fail fast case generateConceptCohortSet( cdm = cdm, conceptSet = list(gibleed = 192671), name = "gibleed", overwrite = FALSE ) }) cdm <- generateConceptCohortSet(cdm, conceptSet = list(gibleed = 192671), name = "gibleed3", requiredObservation = c(2, 2), overwrite = TRUE ) cdm <- generateConceptCohortSet(cdm, conceptSet = list(gibleed = 192671), name = "gibleed4", requiredObservation = c(2, 200), overwrite = TRUE ) expect_identical(cohortSet(cdm$gibleed4)$limit, "first") expect_identical(cohortSet(cdm$gibleed4)$end, "observation_period_end_date") expect_identical(cohortSet(cdm$gibleed4)$prior_observation, 2) expect_identical(cohortSet(cdm$gibleed4)$future_observation, 200) expect_true({ cohort_count(cdm$gibleed3)$number_records >= cohort_count(cdm$gibleed4)$number_records }) # default (with descendants) ---- if (FALSE) { # if (rlang::is_installed("Capr")) { # failing for some reason. gives different results. # we need Capr to include descendants cdm <- generateConceptCohortSet( cdm = cdm, conceptSet = list(gibleed = Capr::cs(Capr::descendants(192671), name = "gibleed")), name = "gibleed", overwrite = TRUE ) cohort <- readCohortSet(system.file("cohorts3", package = "CDMConnector")) %>% dplyr::filter(cohort_name == "GiBleed_default_with_descendants") %>% dplyr::mutate(cohort_definition_id = 1L) cdm <- generateCohortSet(cdm, cohortSet = cohort, name = "gibleed2", overwrite = TRUE) expected <- dplyr::collect(cdm$gibleed2) %>% dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>% dplyr::mutate_if(~ "integer64" %in% class(.), as.integer) actual <- dplyr::collect(cdm$gibleed) %>% dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>% dplyr::mutate_if(~ "integer64" %in% class(.), as.integer) # setdiff(unique(expected$subject_id), unique(actual$subject_id)) # setdiff(unique(actual$subject_id), unique(expected$subject_id)) expect_true(nrow(expected) > 0) expect_true(nrow(actual) == nrow(expected)) expect_setequal(unique(expected$subject_id), unique(actual$subject_id)) expect_equal(actual, expected) } # all occurrences (no descendants) ---- cdm <- generateConceptCohortSet( cdm = cdm, conceptSet = list(gibleed = 192671), name = "gibleed", limit = "all", overwrite = TRUE ) cohort <- readCohortSet(system.file("cohorts3", package = "CDMConnector")) %>% dplyr::filter(cohort_name == "GiBleed_all") %>% dplyr::mutate(cohort_definition_id = 1L) cdm <- generateCohortSet(cdm, cohortSet = cohort, name = "gibleed2", overwrite = TRUE) expect_equal( as.integer(dplyr::pull(dplyr::tally(cdm$gibleed2), "n")), as.integer(dplyr::pull(dplyr::tally(cdm$gibleed), "n")) ) expected <- dplyr::collect(cdm$gibleed2) %>% dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>% dplyr::mutate_if(~ "integer64" %in% class(.), as.integer) actual <- dplyr::collect(cdm$gibleed) %>% dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>% dplyr::mutate_if(~ "integer64" %in% class(.), as.integer) # setdiff(unique(expected$subject_id), unique(actual$subject_id)) # setdiff(unique(actual$subject_id), unique(expected$subject_id)) expect_true(nrow(expected) > 0) expect_true(nrow(actual) == nrow(expected)) expect_setequal(unique(expected$subject_id), unique(actual$subject_id)) expect_equal(actual, expected) # all occurrences (no descendants) fixed end date ---- cdm <- generateConceptCohortSet( cdm = cdm, conceptSet = list(gibleed = 192671), name = "gibleed", limit = "all", end = 10, overwrite = TRUE ) cohort <- readCohortSet(system.file("cohorts3", package = "CDMConnector")) %>% dplyr::filter(cohort_name == "GiBleed_all_end10") %>% dplyr::mutate(cohort_definition_id = 1L) cdm <- generateCohortSet(cdm, cohortSet = cohort, name = "gibleed2", overwrite = TRUE) expect_equal( as.integer(dplyr::pull(dplyr::tally(cdm$gibleed2), "n")), as.integer(dplyr::pull(dplyr::tally(cdm$gibleed), "n")) ) expected <- dplyr::collect(cdm$gibleed2) %>% dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>% dplyr::mutate_if(~ "integer64" %in% class(.), as.integer) actual <- dplyr::collect(cdm$gibleed) %>% dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date, .data$cohort_end_date) %>% dplyr::mutate_if(~ "integer64" %in% class(.), as.integer) # setdiff(unique(expected$subject_id), unique(actual$subject_id)) # setdiff(unique(actual$subject_id), unique(expected$subject_id)) expect_true(nrow(expected) > 0) expect_true(nrow(actual) == nrow(expected)) expect_setequal(unique(expected$subject_id), unique(actual$subject_id)) expect_equal(actual, expected) # clean up CDMConnector::dropTable(cdm, dplyr::contains("gibleed")) } for (dbtype in dbToTest) { test_that(glue::glue("{dbtype} - generateConceptCohortSet"), { skip_if_not_installed("CirceR") con <- get_connection(dbtype) cdm_schema <- get_cdm_schema(dbtype) write_schema <- get_write_schema(dbtype) skip_if(any(write_schema == "") || any(cdm_schema == "") || is.null(con)) test_generate_concept_cohort_set(con, cdm_schema, write_schema) disconnect(con) }) } test_that("missing domains produce warning", { con <- DBI::dbConnect(duckdb::duckdb(), eunomia_dir()) cdm <- cdm_from_con(con, "main", "main") %>% cdm_select_tbl(-drug_exposure) expect_warning({ cdm <- generateConceptCohortSet(cdm, conceptSet = list(celecoxib = 1118084)) }) DBI::dbDisconnect(con, shutdown = TRUE) })