# ============================================================================ # test-covariateData.R — Tests for FeatureExtraction custom covariate builder # ============================================================================ # ---- Helpers ---- make_plan_for_cd <- function( baseFeatures = list( condition_occurrence = list(include = TRUE, type = "start"), drug_exposure = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ), windows = defineAnalysisWindows(startDays = c(-365), endDays = c(-1)) ) { list( analysisWindows = windows, useBaseFeatures = baseFeatures, useCohortFeatures = list(include = FALSE), useConceptSetFeatures = list(include = FALSE, conceptSets = NULL, type = "binary") ) } # =========================================================================== # createOcmCovariateSettings — class and attributes # =========================================================================== test_that("createOcmCovariateSettings returns correct class", { params <- make_plan_for_cd() settings <- createOcmCovariateSettings( analysisWindows = params$analysisWindows, useBaseFeatures = params$useBaseFeatures, useCohortFeatures = params$useCohortFeatures, useConceptSetFeatures = params$useConceptSetFeatures ) expect_s3_class(settings, "covariateSettings") expect_equal(attr(settings, "fun"), "getDbOcmCovariateData") }) test_that("createOcmCovariateSettings stores all parameters", { params <- make_plan_for_cd() settings <- createOcmCovariateSettings( analysisWindows = params$analysisWindows, useBaseFeatures = params$useBaseFeatures, useCohortFeatures = params$useCohortFeatures, useConceptSetFeatures = params$useConceptSetFeatures ) expect_true("analysisWindows" %in% names(settings)) expect_true("useBaseFeatures" %in% names(settings)) expect_true("useCohortFeatures" %in% names(settings)) expect_true("useConceptSetFeatures" %in% names(settings)) # Windows are preserved expect_equal(settings$analysisWindows[[1]]$startDay, -365L) expect_equal(settings$analysisWindows[[1]]$endDay, -1L) }) test_that("createOcmCovariateSettings with defaults does not error", { settings <- createOcmCovariateSettings() expect_s3_class(settings, "covariateSettings") }) test_that("createOcmCovariateSettings validates bad input via planAnalysis", { expect_error( createOcmCovariateSettings( useBaseFeatures = list(unknown_table = list(include = TRUE)) ), "Unknown domain" ) }) # =========================================================================== # .domainIdFromTable — domain mapping # =========================================================================== test_that(".domainIdFromTable returns correct domain IDs", { expect_equal( OdysseusCharacterizationModule:::.domainIdFromTable("drug_exposure"), "Drug" ) expect_equal( OdysseusCharacterizationModule:::.domainIdFromTable("condition_occurrence"), "Condition" ) expect_equal( OdysseusCharacterizationModule:::.domainIdFromTable("procedure_occurrence"), "Procedure" ) expect_equal( OdysseusCharacterizationModule:::.domainIdFromTable("measurement"), "Measurement" ) expect_equal( OdysseusCharacterizationModule:::.domainIdFromTable("observation"), "Observation" ) expect_equal( OdysseusCharacterizationModule:::.domainIdFromTable("device_exposure"), "Device" ) expect_equal( OdysseusCharacterizationModule:::.domainIdFromTable("visit_occurrence"), "Visit" ) expect_equal( OdysseusCharacterizationModule:::.domainIdFromTable("unknown_table"), "Other" ) }) # =========================================================================== # .assembleCovariateData — structure validation # =========================================================================== test_that(".assembleCovariateData produces empty CovariateData when no results", { skip_if_not_installed("Andromeda") # Mock connection/lookup — we'll test with empty results # We need a mock connection. Use a local in-memory DB. skip_if_not_installed("Eunomia") connectionDetails <- Eunomia::getEunomiaConnectionDetails() conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(conn), add = TRUE) emptyResults <- list() emptySpecs <- list() covData <- OdysseusCharacterizationModule:::.assembleCovariateData( results = emptyResults, specs = emptySpecs, connection = conn, cdmDatabaseSchema = "main", targetDialect = "sqlite", tempEmulationSchema = NULL ) # Should be CovariateData (if FeatureExtraction loaded) or Andromeda expect_true(inherits(covData, "Andromeda") || inherits(covData, "CovariateData")) # Andromeda tables accessible via $ covDf <- covData$covariates %>% as.data.frame() expect_equal(nrow(covDf), 0L) expect_true(all(c("rowId", "covariateId", "covariateValue") %in% names(covDf))) refDf <- covData$covariateRef %>% as.data.frame() expect_equal(nrow(refDf), 0L) expect_true(all(c("covariateId", "covariateName", "analysisId", "conceptId") %in% names(refDf))) analysisDf <- covData$analysisRef %>% as.data.frame() expect_equal(nrow(analysisDf), 0L) expect_true(all(c("analysisId", "analysisName", "domainId", "startDay", "endDay", "isBinary", "missingMeansZero") %in% names(analysisDf))) Andromeda::close(covData) }) test_that(".assembleCovariateData correctly builds covariates from raw data", { skip_if_not_installed("Andromeda") skip_if_not_installed("Eunomia") connectionDetails <- Eunomia::getEunomiaConnectionDetails() conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(conn), add = TRUE) # Simulate raw results from executeSpecs (non-aggregated) mockResult1 <- data.frame( COHORT_DEFINITION_ID = c(1L, 1L), ROW_ID = c(100L, 200L), SUBJECT_ID = c(100L, 200L), TABLE_CONCEPT_ID = c(317009L, 317009L), COHORT_START_DATE = as.Date(c("2020-01-01", "2020-06-01")), ANALYSIS_ID = c(1001L, 1001L), stringsAsFactors = FALSE ) mockResult2 <- data.frame( COHORT_DEFINITION_ID = c(1L), ROW_ID = c(100L), SUBJECT_ID = c(100L), TABLE_CONCEPT_ID = c(4329847L), COHORT_START_DATE = as.Date("2020-01-01"), ANALYSIS_ID = c(1001L), stringsAsFactors = FALSE ) mockResults <- list("1001" = mockResult1, "1001b" = mockResult2) # Build mock specs mockSpec <- list( analysisId = 1001L, analysisName = "Condition occurrence [-365, -1]", domainTable = "condition_occurrence", startDay = -365L, endDay = -1L, aggregated = FALSE ) class(mockSpec) <- "singleNodeSpec" covData <- OdysseusCharacterizationModule:::.assembleCovariateData( results = mockResults, specs = list(mockSpec), connection = conn, cdmDatabaseSchema = "main", targetDialect = "sqlite", tempEmulationSchema = NULL ) expect_true(inherits(covData, "Andromeda") || inherits(covData, "CovariateData")) covDf <- covData$covariates %>% as.data.frame() expect_true(nrow(covDf) > 0L) expect_true(all(c("rowId", "covariateId", "covariateValue") %in% names(covDf))) # Check covariate_id formula: concept_id * 1000 + analysis_id expectedCovId1 <- 317009 * 1000 + 1001 expect_true(expectedCovId1 %in% covDf$covariateId) # All values should be 1 (binary) expect_true(all(covDf$covariateValue == 1)) # Check analysisRef analysisDf <- covData$analysisRef %>% as.data.frame() expect_true(1001L %in% analysisDf$analysisId) expect_equal(analysisDf$domainId[analysisDf$analysisId == 1001L], "Condition") expect_equal(analysisDf$isBinary[analysisDf$analysisId == 1001L], "Y") # Check covariateRef refDf <- covData$covariateRef %>% as.data.frame() expect_true(expectedCovId1 %in% refDf$covariateId) expect_true(nchar(refDf$covariateName[refDf$covariateId == expectedCovId1]) > 0) Andromeda::close(covData) }) # =========================================================================== # getDbOcmCovariateData — input validation # =========================================================================== test_that("getDbOcmCovariateData rejects aggregated = TRUE", { skip_if_not_installed("Andromeda") settings <- createOcmCovariateSettings() # Use a NULL connection placeholder — should fail before connecting expect_error( getDbOcmCovariateData( connection = NULL, cdmDatabaseSchema = "cdm", covariateSettings = settings, aggregated = TRUE ), "Aggregated CovariateData is not yet supported" ) }) # =========================================================================== # Integration test — full pipeline with Eunomia # =========================================================================== test_that("getDbOcmCovariateData produces valid CovariateData with Eunomia", { skip_if_not_installed("Andromeda") skip_if_not_installed("Eunomia") connectionDetails <- Eunomia::getEunomiaConnectionDetails() conn <- DatabaseConnector::connect(connectionDetails) on.exit(DatabaseConnector::disconnect(conn), add = TRUE) # Create Eunomia cohorts Eunomia::createCohorts( connectionDetails = connectionDetails, cdmDatabaseSchema = "main", cohortDatabaseSchema = "main", cohortTable = "cohort" ) settings <- createOcmCovariateSettings( analysisWindows = defineAnalysisWindows( startDays = c(-365), endDays = c(-1) ), useBaseFeatures = list( condition_occurrence = list(include = TRUE, type = "start"), drug_exposure = list(include = FALSE), condition_era = list(include = FALSE), drug_era = list(include = FALSE), procedure_occurrence = list(include = FALSE), observation = list(include = FALSE), device_exposure = list(include = FALSE), visit_occurrence = list(include = FALSE), measurement = list(include = FALSE) ) ) covData <- getDbOcmCovariateData( connection = conn, cdmDatabaseSchema = "main", cohortTable = "main.cohort", cohortIds = c(1L), rowIdField = "subject_id", covariateSettings = settings, aggregated = FALSE ) expect_true(inherits(covData, "Andromeda") || inherits(covData, "CovariateData")) covDf <- covData$covariates %>% as.data.frame() expect_true(nrow(covDf) > 0L) expect_true(all(c("rowId", "covariateId", "covariateValue") %in% names(covDf))) expect_true(all(covDf$covariateValue == 1)) refDf <- covData$covariateRef %>% as.data.frame() expect_true(nrow(refDf) > 0L) analysisDf <- covData$analysisRef %>% as.data.frame() expect_true(nrow(analysisDf) > 0L) Andromeda::close(covData) })