# ---- Helpers ---- # Build a minimal characterizationSettings with 1 domain, 1 window make_plan_for_sn <- function( baseFeatures = list( condition_occurrence = list(include = TRUE, type = "start"), drug_exposure = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), windows = defineAnalysisWindows(startDays = c(-30), endDays = c(-1)), cohort = list(include = FALSE), cs = list(include = FALSE, conceptSets = NULL, type = "binary") ) { planAnalysis( analysisWindows = windows, useBaseFeatures = baseFeatures, useCohortFeatures = cohort, useConceptSetFeatures = cs ) } # =========================================================================== # singleNodeSetting — input validation # =========================================================================== test_that("singleNodeSetting rejects non-characterizationSettings plan", { expect_error( singleNodeSetting( plan = list(foo = 1), cohortId = 1L, cohortDatabaseSchema = "s", cohortTable = "t", cdmDatabaseSchema = "cdm" ), "characterizationSettings" ) }) test_that("singleNodeSetting validates cohortId type", { plan <- make_plan_for_sn() expect_error( singleNodeSetting( plan = plan, cohortId = "not_int", cohortDatabaseSchema = "s", cohortTable = "t", cdmDatabaseSchema = "cdm" ), "cohortId|integerish" ) }) test_that("singleNodeSetting validates schema strings", { plan <- make_plan_for_sn() expect_error( singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = 123, cohortTable = "t", cdmDatabaseSchema = "cdm" ), "cohortDatabaseSchema|character" ) }) # =========================================================================== # singleNodeSetting — base features # =========================================================================== test_that("singleNodeSetting produces specs for single base domain", { plan <- make_plan_for_sn() specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "results", cohortTable = "cohort", cdmDatabaseSchema = "cdm" ) expect_s3_class(specs, "singleNodeSettingList") expect_length(specs, 1L) # 1 domain * 1 window s <- specs[[1]] expect_s3_class(s, "singleNodeSpec") expect_equal(s$domainTable, "condition_occurrence") expect_equal(s$conceptIdCol, "condition_concept_id") expect_equal(s$startDay, -30L) expect_equal(s$endDay, -1L) expect_equal(s$source, "base") expect_false(s$atc) expect_false(s$conceptSet) expect_equal(s$cohortId, 1L) expect_equal(s$cohortDatabaseSchema, "results") expect_equal(s$type, "start") expect_false(s$overlap) }) test_that("singleNodeSetting creates specs per domain x window", { plan <- make_plan_for_sn( baseFeatures = list( condition_occurrence = list(include = TRUE, type = "start"), measurement = list(include = TRUE), drug_exposure = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE) ), windows = defineAnalysisWindows( startDays = c(-30, 0), endDays = c(-1, 30) ) ) specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) # 2 domains * 2 windows = 4 specs expect_length(specs, 4L) }) test_that("singleNodeSetting handles ATC — one spec per level per window", { plan <- make_plan_for_sn( baseFeatures = list( drug_exposure = list(include = TRUE, atc = TRUE, atcLevels = c(3L, 5L)), condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), windows = defineAnalysisWindows(startDays = -30, endDays = -1) ) specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) # 1 window * 2 ATC levels = 2 specs expect_length(specs, 2L) # Check ATC-related fields for (s in specs) { expect_true(s$atc) expect_true(grepl("ATC", s$analysisName)) } atcLevels <- vapply(specs, function(s) s$atcLevels, integer(1L)) expect_setequal(atcLevels, c(3L, 5L)) }) test_that("singleNodeSetting resolves conceptIdCol for drug tables without ATC", { plan <- make_plan_for_sn( baseFeatures = list( drug_exposure = list(include = TRUE, atc = FALSE), condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ) ) specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) expect_equal(specs[[1]]$conceptIdCol, "drug_concept_id") }) test_that("singleNodeSetting overlap falls back to start when no end date", { # condition_occurrence has no dateColEnd plan <- make_plan_for_sn( baseFeatures = list( condition_occurrence = list(include = TRUE, type = "overlap"), drug_exposure = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ) ) expect_warning( specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ), "Overlap type requested.*no end date" ) expect_false(specs[[1]]$overlap) }) test_that("singleNodeSetting overlap works for era tables", { plan <- make_plan_for_sn( baseFeatures = list( condition_era = list(include = TRUE, type = "overlap"), drug_exposure = list(include = FALSE), condition_occurrence = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ) ) specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) expect_true(specs[[1]]$overlap) expect_equal(specs[[1]]$type, "overlap") expect_equal(specs[[1]]$dateColEnd, "condition_era_end_date") }) test_that("singleNodeSetting aggregated param propagates", { plan <- make_plan_for_sn() specs_agg <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm", aggregated = TRUE ) expect_true(specs_agg[[1]]$aggregated) specs_raw <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm", aggregated = FALSE ) expect_false(specs_raw[[1]]$aggregated) }) test_that("singleNodeSetting analysis IDs are unique", { plan <- make_plan_for_sn( baseFeatures = list( condition_occurrence = list(include = TRUE, type = "start"), measurement = list(include = TRUE), drug_exposure = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE) ), windows = defineAnalysisWindows( startDays = c(-365, -30, 1), endDays = c(-1, -1, 30) ) ) specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) ids <- vapply(specs, function(s) s$analysisId, integer(1L)) expect_equal(length(ids), length(unique(ids))) }) # =========================================================================== # singleNodeSetting — cohort features # =========================================================================== test_that("singleNodeSetting generates cohort feature specs", { plan <- make_plan_for_sn( baseFeatures = list( drug_exposure = list(include = FALSE), condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), cohort = list( include = TRUE, type = "start", cohortIds = c(101L, 102L), cohortNames = c("T2DM", "HTN"), cohortTable = "feat_cohort", covariateSchema = "results" ) ) specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) # 2 cohorts * 1 window = 2 expect_length(specs, 2L) expect_true(all(vapply(specs, function(s) s$source, character(1)) == "cohort")) expect_true(all(grepl("Cohort:", vapply(specs, function(s) s$analysisName, "")))) expect_equal(specs[[1]]$conceptIdCol, "cohort_definition_id") # Check table name and schema are separate expect_equal(specs[[1]]$domainTable, "feat_cohort") expect_equal(specs[[1]]$tableDatabaseSchema, "results") }) test_that("singleNodeSetting auto-generates cohortNames when NULL", { plan <- make_plan_for_sn( baseFeatures = list(drug_exposure = list(include = FALSE), condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), cohort = list( include = TRUE, type = "start", cohortIds = c(5L), cohortNames = NULL, cohortTable = "ct", covariateSchema = "s" ) ) specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) expect_true(grepl("Cohort_5", specs[[1]]$analysisName)) }) # =========================================================================== # singleNodeSetting — concept set features # =========================================================================== test_that("singleNodeSetting generates concept set specs", { plan <- make_plan_for_sn( baseFeatures = list( drug_exposure = list(include = FALSE), condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), cs = list( include = TRUE, type = "binary", conceptSets = list( diabetes = list( items = list( list(concept = list(CONCEPT_ID = 201820L), includeDescendants = TRUE) ), tables = c("condition_occurrence") ) ) ) ) specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) # 1 concept set * 1 table * 1 window = 1 expect_length(specs, 1L) s <- specs[[1]] expect_equal(s$source, "conceptSet") expect_true(s$conceptSet) expect_equal(s$conceptSetName, "diabetes") expect_true(grepl("Concept set: diabetes", s$analysisName)) }) test_that("singleNodeSetting concept set multiple tables", { plan <- make_plan_for_sn( baseFeatures = list( drug_exposure = list(include = FALSE), condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), cs = list( include = TRUE, type = "counts", conceptSets = list( test_cs = list( items = list(list(concept = list(CONCEPT_ID = 100L))), tables = c("condition_occurrence", "measurement") ) ) ) ) specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) # 1 concept set * 2 tables * 1 window = 2 expect_length(specs, 2L) tables <- vapply(specs, function(s) s$domainTable, character(1L)) expect_setequal(tables, c("condition_occurrence", "measurement")) }) # =========================================================================== # singleNodeSetting — empty specs # =========================================================================== test_that("singleNodeSetting warns when no specs generated", { plan <- make_plan_for_sn( baseFeatures = list( drug_exposure = list(include = FALSE), condition_occurrence = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ) ) expect_warning( specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ), "No analysis specifications" ) expect_length(specs, 0L) }) # =========================================================================== # print methods # =========================================================================== test_that("print.singleNodeSettingList produces readable output", { plan <- make_plan_for_sn() specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) output <- capture.output(print(specs)) expect_true(any(grepl("Single Node Analysis Specifications", output))) expect_true(any(grepl("Total specs:", output))) }) test_that("print.singleNodeSpec produces readable output", { plan <- make_plan_for_sn() specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) output <- capture.output(print(specs[[1]])) expect_true(any(grepl("Single Node Spec", output))) expect_true(any(grepl("Analysis ID", output))) }) test_that("print.singleNodeSettingList returns invisibly", { plan <- make_plan_for_sn() specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) result <- withVisible(print(specs)) expect_false(result$visible) }) test_that("print.singleNodeSpec returns invisibly", { plan <- make_plan_for_sn() specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) result <- withVisible(print(specs[[1]])) expect_false(result$visible) }) test_that("print.singleNodeSettingList truncates at 5", { plan <- make_plan_for_sn( baseFeatures = list( condition_occurrence = list(include = TRUE, type = "start"), measurement = list(include = TRUE), procedure_occurrence = list(include = TRUE), drug_exposure = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE) ), windows = defineAnalysisWindows( startDays = c(-365, -30), endDays = c(-1, -1) ) ) specs <- singleNodeSetting( plan = plan, cohortId = 1L, cohortDatabaseSchema = "r", cohortTable = "c", cdmDatabaseSchema = "cdm" ) output <- capture.output(print(specs)) expect_true(any(grepl("and \\d+ more", output))) })