test_that("summariseMissingData() works", { skip_on_cran() # Load mock database ---- cdm <- cdmEunomia() # Check all tables work ---- expect_true(inherits(summariseMissingData(cdm, "drug_exposure"),"summarised_result")) expect_no_error(y<-summariseMissingData(cdm, "observation_period")) checkResultType(y, "summarise_missing_data") expect_no_error(x<-summariseMissingData(cdm, "visit_occurrence")) expect_no_error(summariseMissingData(cdm, "condition_occurrence")) expect_no_error(summariseMissingData(cdm, "drug_exposure")) expect_no_error(summariseMissingData(cdm, "procedure_occurrence", interval = "years")) expect_warning(summariseMissingData(cdm, "device_exposure")) expect_no_error(z<-summariseMissingData(cdm, "measurement")) expect_no_error(s<-summariseMissingData(cdm, "observation")) expect_warning(de <-summariseMissingData(cdm, "death")) checkResultType(de, "summarise_missing_data") expect_warning(p<-summariseMissingData(cdm, "person", ageGroup = list(c(0,50)))) expect_true(omopgenerics::settings(p)$strata == "") expect_no_error(all <- summariseMissingData(cdm, c("observation_period", "visit_occurrence", "measurement"))) expect_equal(all, dplyr::bind_rows(y, x, z)) expect_equal(summariseMissingData(cdm, "observation"), summariseMissingData(cdm, "observation", col = colnames(cdm[['observation']]))) x<-summariseMissingData(cdm, "procedure_occurrence", col = "procedure_date") expect_equal(summariseMissingData(cdm, c("procedure_occurrence","observation" ), col = "procedure_date"), dplyr::bind_rows(x,s)) y<-summariseMissingData(cdm, "observation",col = "observation_date") expect_equal(summariseMissingData(cdm, c("procedure_occurrence","observation" ), col = c("procedure_date", "observation_date")), dplyr::bind_rows(x,y)) # Check inputs ---- expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id")|> dplyr::select(estimate_value)|> dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> dplyr::summarise(sum = sum(estimate_value)) |> dplyr::pull() == 0) expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id", sex = TRUE, ageGroup = list(c(0,50), c(51,Inf)))|> dplyr::distinct(.data$strata_level)|> dplyr::tally()|> dplyr::pull()==9) expect_true(summariseMissingData(cdm, "procedure_occurrence", col="person_id", ageGroup = list(c(0,50)))|> dplyr::distinct(.data$strata_level)|> dplyr::tally()|> dplyr::pull()==3) cdm$procedure_occurrence <- cdm$procedure_occurrence |> dplyr::mutate(procedure_concept_id = NA_integer_) |> dplyr::compute(name = "procedure_occurrence", temporary = FALSE) expect_warning(summariseMissingData(cdm, "procedure_occurrence", col="procedure_concept_id", ageGroup = list(c(0,50)))) expect_no_error(summariseMissingData(cdm, "procedure_occurrence", col="procedure_concept_id", ageGroup = list(c(0,50)), sample=100)) }) test_that("dateRange argument works", { skip_on_cran() # Load mock database ---- cdm <- cdmEunomia() expect_no_error(summariseMissingData(cdm, "condition_occurrence", dateRange = as.Date(c("2012-01-01", "2018-01-01")))) expect_message(x<-summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", "2025-01-01")))) observationRange <- cdm$observation_period |> dplyr::summarise(minobs = min(.data$observation_period_start_date, na.rm = TRUE), maxobs = max(.data$observation_period_end_date, na.rm = TRUE)) expect_no_error(y<- summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2012-01-01", observationRange |>dplyr::pull("maxobs"))))) expect_equal(x,y, ignore_attr = TRUE) expect_false(settings(x)$study_period_end==settings(y)$study_period_end) expect_error(summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2015-01-01", "2014-01-01")))) expect_warning(z<-summariseMissingData(cdm, "drug_exposure", dateRange = as.Date(c("2020-01-01", "2021-01-01")))) expect_equal(z, omopgenerics::emptySummarisedResult(), ignore_attr = TRUE) expect_equal(summariseMissingData(cdm, "drug_exposure",dateRange = as.Date(c("2012-01-01",NA))), y, ignore_attr = TRUE) checkResultType(z, "summarise_missing_data") expect_equal(colnames(settings(z)), colnames(settings(x))) PatientProfiles::mockDisconnect(cdm = cdm) }) test_that("tableMissingData() works", { skip_on_cran() # Load mock database ---- cdm <- cdmEunomia() # Check that works ---- expect_no_error(x <- tableMissingData(summariseMissingData(cdm, "condition_occurrence"))) expect_true(inherits(x,"gt_tbl")) expect_no_error(y <- tableMissingData(summariseMissingData(cdm, c("observation_period", "measurement")))) expect_true(inherits(y,"gt_tbl")) expect_warning(t <- summariseMissingData(cdm, "death")) expect_warning(inherits(tableMissingData(t),"gt_tbl")) PatientProfiles::mockDisconnect(cdm = cdm) }) test_that("col not present in table", { skip_on_cran() # Load mock database ---- # Load mock database cdm <- omopgenerics::cdmFromTables( tables = list( person = dplyr::tibble( person_id = as.integer(1:4), gender_concept_id = c(8507L, 8532L, 8532L, 8507L), year_of_birth = 2010L, month_of_birth = 1L, day_of_birth = 1L, race_concept_id = 0L, ethnicity_concept_id = 0L ), observation_period = dplyr::tibble( observation_period_id = as.integer(1:8), person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(), observation_period_start_date = as.Date(c( "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10", "2020-03-01", "2020-04-10", "2020-03-10" )), observation_period_end_date = as.Date(c( "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27", "2020-03-09", "2020-05-08", "2020-12-10" )), period_type_concept_id = 0L ) ), cdmName = "mock data" ) cdm <- CDMConnector::copyCdmTo( con = connection(), cdm = cdm, schema = schema()) expect_no_error(expect_message(summariseMissingData(cdm, "person", col = NULL))) PatientProfiles::mockDisconnect(cdm = cdm) }) test_that("no tables created", { skip_on_cran() # Load mock database ---- cdm <- cdmEunomia() startNames <- CDMConnector::listSourceTables(cdm) results <- summariseMissingData(cdm = cdm, omopTableName = c("drug_exposure", "condition_occurrence"), interval = "years", sex = TRUE, ageGroup = list(c(0,17), c(18,65), c(66, 100)), dateRange = as.Date(c("2012-01-01", "2018-01-01")), sample = 100) endNames <- CDMConnector::listSourceTables(cdm) expect_true(length(setdiff(endNames, startNames)) == 0) PatientProfiles::mockDisconnect(cdm = cdm) }) test_that("interval argument works", { skip_on_cran() # Load mock database ---- cdm <- cdmEunomia() expect_no_error(y<-summariseMissingData(cdm = cdm, omopTableName = "drug_exposure", interval = "years")) expect_no_error(o<-summariseMissingData(omopTableName = "drug_exposure", cdm = cdm, interval = "overall")) expect_no_error(q<-summariseMissingData(omopTableName = "drug_exposure", cdm = cdm, interval = "quarters")) expect_no_error(m<-summariseMissingData(omopTableName = "drug_exposure", cdm = cdm, interval = "months")) m_quarters <- m|>omopgenerics::splitAdditional()|> omopgenerics::pivotEstimates() |> dplyr::filter(time_interval != "overall") |> dplyr::mutate( start_date = as.Date(sub(" to .*", "", time_interval)), quarter_start = lubridate::quarter(start_date, type = "date_first"), quarter_end = lubridate::quarter(start_date, type = "date_last"), quarter = paste(quarter_start, "to", quarter_end) ) |> dplyr::select(!c("time_interval", "start_date", "quarter_start", "quarter_end")) |> dplyr::group_by(quarter, variable_name)|> dplyr::summarise(na_count = sum(na_count), .groups = "drop") |> dplyr::rename("time_interval" = quarter) |> dplyr::arrange(time_interval) q_quarters <- q|>omopgenerics::splitAdditional()|> omopgenerics::pivotEstimates()|> dplyr::filter(time_interval != "overall")|> dplyr::select(time_interval, variable_name, na_count)|> dplyr::arrange(time_interval) expect_equal(m_quarters |> sortTibble(), q_quarters |> sortTibble()) m_year <- m|> omopgenerics::splitAdditional()|> dplyr::filter(time_interval != "overall")|> dplyr::mutate( # Extract the start date start_date = clock::date_parse(stringr::str_extract(time_interval, "^\\d{4}-\\d{2}-\\d{2}")), # Convert start_date to a year-month-day object and extract the year year = clock::get_year(clock::as_year_month_day(start_date)) )|> omopgenerics::pivotEstimates()|> dplyr::group_by(year, variable_name) |> dplyr::summarise( na_count = sum(na_count), .groups = "drop" )|> dplyr::arrange(year) y_year <- y|> omopgenerics::splitAdditional()|> dplyr::filter(time_interval != "overall")|> dplyr::mutate( # Extract the start date start_date = clock::date_parse(stringr::str_extract(time_interval, "^\\d{4}-\\d{2}-\\d{2}")), # Convert start_date to a year-month-day object and extract the year year = clock::get_year(clock::as_year_month_day(start_date)) )|> omopgenerics::pivotEstimates()|> dplyr::select(year, variable_name, na_count)|> dplyr::arrange(year) expect_equal(m_year |> sortTibble(), y_year |> sortTibble()) o <- o |> omopgenerics::splitAdditional() |> omopgenerics::pivotEstimates() |> dplyr::select(variable_name, na_count) expect_equal(y_year|> dplyr::group_by(variable_name) |> dplyr::summarise(na_count = sum(na_count), .groups = "drop") |> sortTibble(), o |> sortTibble()) q_year <- q|> omopgenerics::splitAdditional()|> dplyr::filter(time_interval != "overall")|> dplyr::mutate( # Extract the start date start_date = clock::date_parse(stringr::str_extract(time_interval, "^\\d{4}-\\d{2}-\\d{2}")), # Convert start_date to a year-month-day object and extract the year year = clock::get_year(clock::as_year_month_day(start_date)) )|> omopgenerics::pivotEstimates()|> dplyr::group_by(year, variable_name) |> dplyr::summarise( na_count = sum(na_count), .groups = "drop" )|> dplyr::arrange(year) expect_equal(q_year |> sortTibble(), y_year |> sortTibble()) PatientProfiles::mockDisconnect(cdm = cdm) })