test_that("do_sampling works as expected when case exist", { data <- as.data.table(TrialEmulation::vignette_switch_data)[trial_period == 272 & followup_time == 5] set.seed(100) sample1 <- do_sampling(data, p_control = 0.1) expect_snapshot_value(as.data.frame(sample1), style = "json2") sample01 <- do_sampling(data, p_control = 0.01) expect_snapshot_value(as.data.frame(sample01), style = "json2") }) test_that("do_sampling works as expected when no cases exist", { data <- as.data.table(TrialEmulation::vignette_switch_data)[trial_period == 5 & followup_time == 5] set.seed(1100) result_rows <- do_sampling(data, p_control = 0.1) expect_snapshot_value(as.data.frame(result_rows), style = "json2") }) test_that("sample_from_period works as expected", { data <- as.data.table(TrialEmulation::vignette_switch_data)[trial_period == 272] set.seed(651) result <- sample_from_period(period_data = data, p_control = 0.01, use_subset = FALSE) expect_data_frame(result, nrow = 87, ncol = 15) expect_identical(unique(result$sample_id), 1L) result_cases <- result[outcome == 1, c("id", "trial_period", "followup_time")] expected_cases <- data[outcome == 1, c("id", "trial_period", "followup_time")] expect_equal( result_cases[order(result_cases$followup_time, result_cases$id)], expected_cases[order(expected_cases$followup_time, expected_cases$id)] ) expect_data_frame(result[followup_time == 2], nrows = 1) expect_snapshot_value( as.data.frame(result[outcome == 0, c("id", "trial_period", "followup_time")]), style = "json2" ) }) test_that("sample_from_period works as expected with multiple proportions", { data <- as.data.table(TrialEmulation::vignette_switch_data)[trial_period == 272] set.seed(209) result <- sample_from_period( period_data = data, p_control = c(0.01, 0.05), use_subset = FALSE ) expect_data_frame(result, nrow = 338, ncol = 15) expect_identical(unique(result$sample_id), c(1L, 2L)) expect_data_frame(result[sample_id == 1], nrow = 87, ncol = 15) expect_data_frame(result[sample_id == 2], nrow = 251, ncol = 15) expect_snapshot_value(as.data.frame(result[1:30, ]), style = "json2") result_cases_1 <- result[outcome == 1 & sample_id == 1, c("id", "trial_period", "followup_time")] expected_cases <- data[outcome == 1, c("id", "trial_period", "followup_time")] expect_equal( result_cases_1[order(result_cases_1$followup_time, result_cases_1$id)], expected_cases[order(expected_cases$followup_time, expected_cases$id)] ) result_cases_2 <- result[outcome == 1 & sample_id == 2, c("id", "trial_period", "followup_time")] expect_equal( result_cases_2[order(result_cases_2$followup_time, result_cases_2$id)], expected_cases[order(expected_cases$followup_time, expected_cases$id)] ) }) test_that("case_control_sampling_trials works with separate_files = TRUE", { set.seed(1001) save_dir <- withr::local_tempdir(pattern = "sampling", tempdir(TRUE)) dat <- trial_example[trial_example$id < 200, ] expanded_data <- data_preparation( data = dat, data_dir = save_dir, outcome_cov = c("nvarA", "nvarB", "nvarC"), estimand_type = "ITT", first_period = 260, last_period = 280, separate_files = TRUE, quiet = TRUE ) samples <- case_control_sampling_trials(expanded_data, p_control = 0.01) expect_data_frame(samples, nrow = 714, ncol = 11) expect_snapshot_value(as.data.frame(samples[1:30, ]), style = "json2") }) test_that("case_control_sampling_trials works with separate_files = FALSE", { set.seed(1001) dat <- trial_example[trial_example$id < 200, ] expanded_data <- data_preparation( data = dat, outcome_cov = c("nvarA", "nvarB", "nvarC"), estimand_type = "ITT", first_period = 260, last_period = 280, separate_files = FALSE, quiet = TRUE ) samples <- case_control_sampling_trials(expanded_data, p_control = 0.01) expect_data_frame(samples, nrow = 714, ncol = 11) expect_snapshot_value(as.data.frame(samples[1:30, ]), style = "json2") }) test_that("case_control_sampling_trials works with separate_files = TRUE is reproducible", { save_dir <- withr::local_tempdir(pattern = "sampling", tempdir(TRUE)) dat <- trial_example[trial_example$id < 200, ] expanded_data <- data_preparation( data = dat, data_dir = save_dir, outcome_cov = c("nvarA", "nvarB", "nvarC"), estimand_type = "ITT", first_period = 260, last_period = 280, separate_files = TRUE, quiet = TRUE ) set.seed(2090) samples_1 <- case_control_sampling_trials(expanded_data, p_control = 0.01) set.seed(2090) samples_2 <- case_control_sampling_trials(expanded_data, p_control = 0.01) expect_identical(samples_1, samples_2) }) test_that("case_control_sampling_trials works with subsetting", { save_dir <- withr::local_tempdir(pattern = "sampling", tempdir(TRUE)) data("te_data_ex") set.seed(2090) samples <- case_control_sampling_trials( te_data_ex, p_control = 0.01, subset_condition = catvarA == 2 ) expect_true(all(samples$catvarA == 2)) }) test_that("case_control_sampling_trials gives errors for arguments", { expect_error( case_control_sampling_trials( trial_example, p_control = 0.01, subset_condition = nvarC > 75 ), "Unknown data_prep object" ) expect_error( case_control_sampling_trials( trial_example, p_control = "a", subset_condition = nvarC > 75 ), "Must be of type 'numeric'" ) }) test_that("case_control_sampling_trials works with multiple p_control", { data("te_data_ex") set.seed(2090) samples <- case_control_sampling_trials( te_data_ex, p_control = c(0.01, 0.1) ) expect_list(samples, types = "data.frame", len = 2) expect_data_frame(samples[[1]], nrow = 696, ncol = 10) expect_data_frame(samples[[2]], nrow = 5259, ncol = 10) }) test_that("case_control_sampling_trials works with sort = TRUE", { skip_on_cran() save_dir <- withr::local_tempdir(pattern = "sampling", tempdir(TRUE)) dat <- trial_example[trial_example$id < 200, ] expanded_data_t <- data_preparation( data = dat, data_dir = save_dir, outcome_cov = c("nvarA", "nvarB", "nvarC"), estimand_type = "ITT", first_period = 260, last_period = 280, separate_files = TRUE, quiet = TRUE ) expanded_data_f <- data_preparation( data = dat, data_dir = save_dir, outcome_cov = c("nvarA", "nvarB", "nvarC"), estimand_type = "ITT", first_period = 260, last_period = 280, separate_files = FALSE, quiet = TRUE ) set.seed(9999) samples_t <- case_control_sampling_trials(expanded_data_t, p_control = 0.01, sort = TRUE) set.seed(9999) samples_f <- case_control_sampling_trials(expanded_data_f, p_control = 0.01, sort = TRUE) expect_identical(samples_f, samples_t) }) test_that("sample_controls works with trial_sequence objects containing te_datastore_datatable objects", { trial_itt_dir <- file.path(tempdir(), "trial_itt") dir.create(trial_itt_dir) trial_itt <- trial_sequence(estimand = "ITT") |> set_data( data = data_censored, id = "id", period = "period", treatment = "treatment", outcome = "outcome", eligible = "eligible" ) |> set_censor_weight_model( censor_event = "censored", numerator = ~ x1 + x2 + x3, denominator = ~x2, pool_models = "numerator", model_fitter = stats_glm_logit(save_path = file.path(trial_itt_dir, "switch_models")) ) |> calculate_weights() |> set_outcome_model(adjustment_terms = ~ x1 + x2) trial_itt_datatable <- set_expansion_options( trial_itt, output = save_to_datatable(), chunk_size = 500 ) |> expand_trials() # sample_controls works without additional arguments sc_01 <- sample_controls(trial_itt_datatable, p_control = 0.01, seed = 1221) expect_equal( sort(sc_01@outcome_data@data$id), c( 10, 14, 14, 15, 17, 21, 27, 29, 32, 38, 38, 44, 44, 49, 49, 54, 54, 54, 59, 61, 68, 71, 71, 71, 74, 74, 89, 98, 98, 99 ) ) random_01 <- sample_controls(trial_itt_datatable, p_control = 0.01) # seed gets reset sc_01_1 <- sample_controls(trial_itt_datatable, p_control = 0.01, seed = 1221) random_02 <- sample_controls(trial_itt_datatable, p_control = 0.01) expect_false(identical(sort(random_01@outcome_data@data$id), sort(random_02@outcome_data@data$id))) # sample_controls works with p_control sc_02 <- sample_controls(trial_itt_datatable, p_control = 0.5, seed = 5678) expect_equal(sc_02@outcome_data@n_rows, 765) # sample_controls works with p_control = 0 sc_03 <- sample_controls(trial_itt_datatable, p_control = 0) expect_equal(sc_03@outcome_data@n_rows, 14) # cases are kept expect_equal(sum(sc_01@outcome_data@data$outcome), 14) expect_equal(sum(sc_02@outcome_data@data$outcome), 14) expect_equal(sum(sc_03@outcome_data@data$outcome), 14) # all columns are kept and sample_weight column is added expect_equal( colnames(sc_01@outcome_data@data), c(colnames(trial_itt_datatable@expansion@datastore@data), "sample_weight") ) # sample_controls subsets data correctly sc_04 <- sample_controls( trial_itt_datatable, period = 1:10, subset_condition = "followup_time <= 20 & treatment == 1", p_control = 0.2, seed = 2332 ) expect_equal( sort(sc_04@outcome_data@data$id), c( 14, 16, 20, 27, 27, 33, 33, 33, 33, 34, 34, 34, 44, 44, 44, 44, 44, 44, 44, 44, 47, 54, 54, 54, 54, 59, 59, 59, 59, 59, 59, 59, 60, 60, 60, 65, 71, 73, 74, 74, 74, 83, 95, 95, 95, 95, 95, 95, 95, 96 ) ) # sample_controls returns the correct classes expect_class(sc_04, "trial_sequence_ITT") expect_class(sc_04@outcome_data, "te_outcome_data") expect_class(sc_04@outcome_data@data, "data.table") unlink(trial_itt_dir, recursive = TRUE) })