context("oc2S: 2-Sample Operating Characteristics") ## test the analytical OC function via brute force simulation set.seed(12354) prior1 <- mixnorm(c(0.3, -0.2, 2), c(0.7, 0, 50), sigma=1) prior2 <- mixnorm(c(1.0, 0, 50), sigma=1) N1 <- 10 N2 <- 20 ## type I error fairly large to 20% to make it easier to test (less ## simulations needed for accurate results) pcrit <- 0.80 qcrit <- 0 ## theta2 set such that we have about 75% power under this truth theta1 <- 0 theta2 <- 0.5 Nsim <- 1e4 run_on_cran <- function() { if (identical(Sys.getenv("NOT_CRAN"), "true")) { return(FALSE) } return(TRUE) } oc2S_normal_MC <- function(prior1, prior2, N1, N2, theta1, theta2, pcrit=0.975, qcrit=0) { mean_sd1 <- sigma(prior1) / sqrt(N1) mean_sd2 <- sigma(prior2) / sqrt(N2) mean_prior1 <- prior1 sigma(mean_prior1) <- mean_sd1 mean_prior2 <- prior2 sigma(mean_prior2) <- mean_sd2 mean_samp1 <- rnorm(Nsim, theta1, mean_sd1) mean_samp2 <- rnorm(Nsim, theta2, mean_sd2) dec <- rep(NA, Nsim) for(i in 1:Nsim) { post1 <- postmix(mean_prior1, m=mean_samp1[i], se=mean_sd1) post2 <- postmix(mean_prior2, m=mean_samp2[i], se=mean_sd2) dec[i] <- as.numeric(pmix(RBesT:::mixnormdiff(post1, post2), qcrit) > pcrit) } mean(dec) } Voc2S_normal_MC <- Vectorize(oc2S_normal_MC, c("theta1", "theta2")) ## first test that the analytic difference distribution for normal ## works as expected test_that("Analytical convolution of normal mixture matches numerical integration result", { skip_on_cran() pdiff <- RBesT:::mixnormdiff(prior1, prior2) x <- seq(-20,20,length=21) d1 <- dmix(pdiff, x) d2 <- dmixdiff(prior1, prior2, x) dres <- abs(d1-d2) expect_equal(sum(dres > 1e-5), 0) p1 <- pmix(pdiff, x) p2 <- pmixdiff(prior1, prior2, x) pres <- 100 * abs(p1-p2) expect_equal(sum(pres > 2), 0) }) ## test that the type I error is matching, i.e. is not off by more than 2% test_that("Type I error is matching between MC and analytical computations in the normal mixture case", { skip_on_cran() x <- c(-2, 0) alpha <- oc2S(prior1, prior2, N1, N2, decision2S(pcrit, qcrit), sigma1=sigma(prior1), sigma2=sigma(prior2))(x,x) alphaMC <- Voc2S_normal_MC(prior1, prior2, N1, N2, x, x, pcrit, qcrit) res <- 100 * abs(alpha - alphaMC) expect_equal(sum(res > 2) , 0) }) ## test that the power is matching, i.e. is not off by more than 2% test_that("Power is matching between MC and analytical computations in the normal mixture case", { skip_on_cran() power <- oc2S(prior1, prior2, N1, N2, decision2S(pcrit, qcrit))(theta1, theta2) powerMC <- oc2S_normal_MC(prior1, prior2, N1, N2, theta1, theta2, pcrit, qcrit) res <- 100 * abs(power - powerMC) expect_equal(sum(res > 2) , 0) }) ## further test by cross-checking with Gsponer et. al, "A practical ## guide to Bayesian group sequential designs", Pharmaceut. Statist. ## (2014), 13 71-80, Table 1, Probability at interim test_that("Gsponer et al. results match (normal end-point)", { skip_on_cran() ocRef <- data.frame(delta=c(0,40,50,60,70), success=c(1.1,32.2,50.0,67.6,82.2), futile=c(63.3,6.8,2.5,0.8,0.2)) sigmaFixed <- 88 priorT <- mixnorm(c(1, 0, 0.001), sigma=sigmaFixed, param="mn") priorP <- mixnorm(c(1, -49, 20 ), sigma=sigmaFixed, param="mn") ## the success criteria is for delta which are larger than some ## threshold value which is why we set lower.tail=FALSE successCrit <- decision2S(c(0.95, 0.5), c(0, 50), FALSE) ## the futility criterion acts in the opposite direction futilityCrit <- decision2S(c(0.90) , c(40), TRUE) nT1 <- 20 nP1 <- 10 oc <- data.frame(delta=c(0,40,50,60,70)) ## Note that due to the fact that only a single mixture component is ## used, the decision boundary is a linear function such that only few ## evaluations of the boundary are needed to estimate reliably the ## spline function ## Table 1, probability for interim for success oc$success <- oc2S(priorP, priorT, nP1, nT1, successCrit, Ngrid=1)(-49, -49-oc$delta) ## Table 1, probability for interim for futility oc$futile <- oc2S(priorP, priorT, nP1, nT1, futilityCrit, Ngrid=1)(-49, -49-oc$delta) ## Table 1, first three columns, page 74 oc[-1] <- lapply(100*oc[-1], round, 1) resFutility <- abs(ocRef$futile - oc$futile) resSuccess <- abs(ocRef$success - oc$success) expect_equal(sum(resFutility > 2) , 0, info="futility") expect_equal(sum(resSuccess > 2) , 0, info="success") }) ## failure when doing repeated evaluations which came up in consulting test_that("Ensure that repeated oc2S evaluation works for normal case", { skip_on_cran() samp_sigma <- 3 n_ia <- 38 n_final <- 2*n_ia n_ia_to_final <- n_final - n_ia sem_ia <- samp_sigma/sqrt(n_ia) theta_ctl <- 0 delta <- 1.04 obs_P <- 0.11 obs_T <- 1.28 prior <- mixnorm(c(1, 0, 0.001), sigma=samp_sigma, param="mn") postP_interim <- postmix(prior, m = obs_P, se=sem_ia) postT_interim <- postmix(prior, m = obs_T, se=sem_ia) successCrit <- decision2S(c(0.9), c(0), FALSE) interim_CP <- oc2S( postT_interim, postP_interim, n_ia_to_final, n_ia_to_final, successCrit, sigma1=samp_sigma, sigma2=samp_sigma) cpd_ia <- interim_CP(obs_T, obs_P) cpd_ia2 <- interim_CP(theta_ctl + delta, theta_ctl) expect_number(cpd_ia, lower=0, upper=1, finite=TRUE) expect_number(cpd_ia2, lower=0, upper=1, finite=TRUE) ## check that when calculating directly that the results ## are close enough interim_CPalt <- oc2S( postT_interim, postP_interim, n_ia_to_final, n_ia_to_final, successCrit, sigma1=samp_sigma, sigma2=samp_sigma) cpd_ia2alt <- interim_CPalt(theta_ctl + delta, theta_ctl) expect_number(abs(cpd_ia2 - cpd_ia2alt), lower=0, upper=1E-3, finite=TRUE) }) ## test against Schmidli et. al, "Robust Meta-Analytic-Predictive ## Priors", Table 2, unif and beta case test_that("Schmidli et al. results (binary end-point)", { skip_on_cran() ocRef_inf <- expand.grid(pc=seq(0.1,0.6, by=0.1),delta=c(0,0.3)) ocRef_inf$ref <- c(0, 1.6, 6.1, 13.7, 26.0, 44.4 ## beta/delta=0 ,81.6, 87.8, 93.4, 97.9, 99.6, 100.0 ## beta/delta=0.3 )/100 ocRef_uni <- expand.grid(pc=seq(0.1,0.6, by=0.1),delta=c(0,0.3)) ocRef_uni$ref <- c(1.8, 2.3, 2.4, 2.6, 2.8, 2.6 ## unif/delta=0 ,89.7, 82.1, 79.5, 79.5, 81.9, 89.8 ## unif/delta=0.3 )/100 dec <- decision2S(0.975, 0, lower.tail=FALSE) N <- 40 prior_inf <- mixbeta(c(1, 4, 16)) prior_uni <- mixbeta(c(1, 1, 1)) N_ctl_uni <- N - round(ess(prior_uni, method="morita")) N_ctl_inf <- N - round(ess(prior_inf, method="morita")) design_uni <- oc2S(prior_uni, prior_uni, N, N_ctl_uni, dec) design_inf <- oc2S(prior_uni, prior_inf, N, N_ctl_inf, dec) res_uni <- design_uni(ocRef_uni$pc + ocRef_uni$delta, ocRef_uni$pc) res_inf <- design_inf(ocRef_inf$pc + ocRef_inf$delta, ocRef_inf$pc) expect_true(all(abs(100 * (res_uni - ocRef_uni$ref)) < 2.5)) expect_true(all(abs(100 * (res_inf - ocRef_inf$ref)) < 2.5)) }) ## some additional, very simple type I error tests and tests for the ## discrete case of correct critical value behavior test_scenario <- function(oc_res, ref) { resA <- oc_res - ref expect_true(all(abs(resA) < eps)) } expect_equal_each <- function(test, expected) { for(elem in test) { expect_equal(elem, expected) } } ## design object, decision function, posterior function must return ## posterior after updatding the prior with the given value; we assume ## that the priors are the same for sample 1 and 2 test_critical_discrete <- function(design, decision, posterior, y2) { lower.tail <- attr(decision, "lower.tail") crit <- design(y2=y2) post2 <- posterior(y2) if(lower.tail) { expect_equal(decision(posterior(crit-1), post2), 1) expect_equal(decision(posterior(crit ), post2), 1) expect_equal(decision(posterior(crit+1), post2), 0) } else { expect_equal(decision(posterior(crit-1), post2), 0) expect_equal(decision(posterior(crit ), post2), 0) expect_equal(decision(posterior(crit+1), post2), 1) } } ## expect results to be 1% exact eps <- 1e-2 alpha <- 0.05 dec <- decision2S(1-alpha, 0, lower.tail=TRUE) decB <- decision2S(1-alpha, 0, lower.tail=FALSE) ## test binary case beta_prior <- mixbeta(c(1, 1, 1)) if(!run_on_cran()) { design_binary <- oc2S(beta_prior, beta_prior, 100, 100, dec) design_binaryB <- oc2S(beta_prior, beta_prior, 100, 100, decB) } posterior_binary <- function(r) postmix(beta_prior, r=r, n=100) p_test <- 1:9 / 10 test_that("Binary type I error rate", { skip_on_cran(); test_scenario(design_binary(p_test, p_test), alpha) }) test_that("Binary crticial value, lower.tail=TRUE", { skip_on_cran(); test_critical_discrete(design_binary, dec, posterior_binary, 30) }) test_that("Binary crticial value, lower.tail=FALSE", { skip_on_cran(); test_critical_discrete(design_binaryB, decB, posterior_binary, 30) }) test_that("Binary boundary case, lower.tail=TRUE", { skip_on_cran(); expect_numeric(design_binary( 1, 1), lower=0, upper=1, finite=TRUE, any.missing=FALSE) }) test_that("Binary boundary case, lower.tail=FALSE", { skip_on_cran(); expect_numeric(design_binaryB(0, 0), lower=0, upper=1, finite=TRUE, any.missing=FALSE) }) ## check case where decision never changes due to prior being too ## strong beta_prior1 <- mixbeta(c(1, 0.9, 1000), param="mn") beta_prior2 <- mixbeta(c(1, 0.1, 1000), param="mn") design_lower <- oc2S(beta_prior1, beta_prior2, 20, 20, dec) ## always 0 design_upper <- oc2S(beta_prior1, beta_prior2, 20, 20, decB) ## always 1 test_that("Binary case, no decision change, lower.tail=TRUE, critical value", { skip_on_cran(); expect_equal_each(design_lower(y2=0:20), -1) }) test_that("Binary case, no decision change, lower.tail=FALSE, critical value", { skip_on_cran(); expect_equal_each(design_upper(y2=0:20), 21) }) test_that("Binary case, no decision change, lower.tail=TRUE, frequency=0", { skip_on_cran(); expect_equal_each(design_lower(p_test, p_test), 0.0) }) test_that("Binary case, no decision change, lower.tail=FALSE, frequency=1", { skip_on_cran(); expect_equal_each(design_upper(p_test, p_test), 1.0) }) if(!run_on_cran()) { design_lower_rev <- oc2S(beta_prior2, beta_prior1, 20, 20, dec) ## always 1 design_upper_rev <- oc2S(beta_prior2, beta_prior1, 20, 20, decB) ## always 0 } test_that("Binary case, no decision change (reversed), lower.tail=TRUE, critical value", { skip_on_cran(); expect_equal_each(design_lower_rev(y2=0:20), 20) }) test_that("Binary case, no decision change (reversed), lower.tail=FALSE, critical value", { skip_on_cran(); expect_equal_each(design_upper_rev(y2=0:20), -1) }) test_that("Binary case, no decision change (reversed), lower.tail=TRUE, frequency=0", { skip_on_cran(); expect_equal_each(design_lower_rev(p_test, p_test), 1.0) }) test_that("Binary case, no decision change (reversed), lower.tail=FALSE, frequency=1", { skip_on_cran(); expect_equal_each(design_upper_rev(p_test, p_test), 0.0) }) test_that("Binary case, log-link", { skip_on_cran() success <- decision2S(pc=c(0.90, 0.50), qc=c(log(1), log(0.50)), lower.tail=TRUE, link="log") prior_pbo <- mixbeta(inf1=c(0.60, 19, 29), inf2=c(0.30, 4, 5), rob=c(0.10, 1, 1)) prior_trt <- mixbeta(c(1, 1/3, 1/3)) n_trt <- 50 n_pbo <- 20 design_suc <- oc2S(prior_trt, prior_pbo, n_trt, n_pbo, success) theta <- seq(0,1,by=0.1) expect_numeric(design_suc(theta, theta), lower=0, upper=1, finite=TRUE, any.missing=FALSE) }) test_that("Binary case, logit-link", { skip_on_cran() success <- decision2S(pc=c(0.90, 0.50), qc=c(log(1), log(0.50)), lower.tail=TRUE, link="logit") prior_pbo <- mixbeta(inf1=c(0.60, 19, 29), inf2=c(0.30, 4, 5), rob=c(0.10, 1, 1)) prior_trt <- mixbeta(c(1, 1/3, 1/3)) n_trt <- 50 n_pbo <- 20 design_suc <- oc2S(prior_trt, prior_pbo, n_trt, n_pbo, success) theta <- seq(0,1,by=0.1) expect_numeric(design_suc(theta, theta), lower=0, upper=1, finite=TRUE, any.missing=FALSE) }) ## check approximate method beta_prior <- mixbeta(c(1, 1, 1)) design_binary_eps <- oc2S(beta_prior, beta_prior, 100, 100, dec, eps=1E-3) p_test <- seq(0.1, 0.9, by=0.1) test_that("Binary type I error rate", { skip_on_cran(); test_scenario(design_binary_eps(p_test, p_test), alpha) }) ## 22 Nov 2017: disabled test as we trigger always calculation of the ## boundaries as of now. ## test_that("Binary results cache expands", { ## design_binary_eps <- oc2S(beta_prior, beta_prior, 100, 100, dec, eps=1E-3) ## design_binary_eps(theta1=0.99, theta2=0.8) ## ## in this case the cache boundaries do not cover the ## ## critical value ## expect_true(is.na(design_binary_eps(theta1=0.99, y2=80))) ## ## while now they do as theta1 is set to 0.1 and 0.9 ## ## internally which triggers recalculation of the ## ## internal boundaries ## expect_true(!is.na(design_binary_eps(y2=80))) ## }) ## test poisson case gamma_prior <- mixgamma(c(1, 2, 2)) design_poisson <- oc2S(gamma_prior, gamma_prior, 100, 100, dec) design_poissonB <- oc2S(gamma_prior, gamma_prior, 100, 100, decB) posterior_poisson <- function(m) postmix(gamma_prior, m=m/100, n=100) lambda_test <- seq(0.5, 1.3, by=0.1) test_that("Poisson type I error rate", { skip_on_cran(); test_scenario(design_poisson(lambda_test, lambda_test), alpha) }) test_that("Poisson crticial value, lower.tail=TRUE", { skip_on_cran(); test_critical_discrete(design_poisson, dec, posterior_poisson, 90) }) test_that("Poisson crticial value, lower.tail=FALSE", { skip_on_cran(); test_critical_discrete(design_poissonB, decB, posterior_poisson, 90) }) ## 22 Nov 2017: disabled test as we trigger always calculation of the ## boundaries as of now. ##test_that("Poisson results cache expands", { ## design_poisson <- oc2S(gamma_prior, gamma_prior, 100, 100, dec) ## design_poisson(theta1=1, theta2=c(0.7,1)) ## expect_true(sum(is.na(design_poisson(y2=70:90)) ) == 4) ## expect_true(sum(is.na(design_poisson(theta1=c(0.01, 1), y2=70:90)) ) == 0) ## }) test_that("Normal OC 2-sample case works for n2=0, crohn-1", { crohn_sigma <- 88 map <- mixnorm(c(0.6,-50,19), c(0.4,-50, 42), sigma=crohn_sigma) ## add a 20% non-informative mixture component map_robust <- robustify(map, weight=0.2, mean=-50, sigma=88) poc <- decision2S(pc=c(0.95,0.5), qc=c(0,-50), lower.tail=TRUE) weak_prior <- mixnorm(c(1,-50,1), sigma=crohn_sigma, param = 'mn') n_act <- 40 ##n_pbo <- 20 design_noprior_b <- oc2S(weak_prior, map, n_act, 0, poc, sigma1=crohn_sigma, sigma2=crohn_sigma) expect_numeric(design_noprior_b(-20, -30), lower=0, upper=1, any.missing=FALSE) }) test_that("Normal OC 2-sample case works for n2=0, crohn-2", { crohn_sigma <- 88 map <- mixnorm(c(1.0,-50,19), sigma=crohn_sigma) ## add a 20% non-informative mixture component map_robust <- robustify(map, weight=0.2, mean=-50, sigma=88) poc <- decision2S(pc=c(0.95,0.5), qc=c(0,-50), lower.tail=TRUE) weak_prior <- mixnorm(c(1,-50,1), sigma=crohn_sigma, param = 'mn') n_act <- 40 ##n_pbo <- 20 design_noprior_b <- oc2S(weak_prior, map, n_act, 0, poc, sigma1=crohn_sigma, sigma2=crohn_sigma) expect_numeric(design_noprior_b(-20, -30), lower=0, upper=1, any.missing=FALSE) }) test_that("Normal OC 2-sample avoids undefined behavior, example 1", { skip_on_cran() sigma_ref <- 3.2 ##map_ref <- mixnorm(c(0.51, -2.1, 0.39), c(0.42, -2.1, 0.995), c(0.06, -1.99, 2.32), sigma=sigma_ref) ## chagned so that weights sum to 1 map_ref <- mixnorm(c(0.52, -2.1, 0.39), c(0.42, -2.1, 0.995), c(0.06, -1.99, 2.32), sigma=sigma_ref) prior_flat <- mixnorm(c(1, 0, 100), sigma=sigma_ref) alpha <- 0.05 dec <- decision2S(1-alpha, 0, lower.tail=FALSE) n <- 58 k <- 2 design_map <- oc2S(prior_flat, map_ref, n, n/k, dec) design_map_2 <- oc2S(prior_flat, map_ref, n, n/k, dec) x <- seq(-2.6, -1.6, by=0.1) expect_numeric(design_map(x, x), lower=0, upper=1, any.missing=FALSE) expect_silent(design_map(-3, -4)) expect_numeric(design_map(-3, -4), lower=0, upper=1, any.missing=FALSE) expect_numeric(design_map(-3, 4), lower=0, upper=1, any.missing=FALSE) expect_numeric(design_map(-1.6, -1.6), lower=0, upper=1, any.missing=FALSE) expect_numeric(design_map_2(-3, -4), lower=0, upper=1, any.missing=FALSE) expect_numeric(design_map_2(-3, 4), lower=0, upper=1, any.missing=FALSE) expect_numeric(design_map_2(-1.6, -1.6), lower=0, upper=1, any.missing=FALSE) expect_numeric(design_map_2(x, x), lower=0, upper=1, any.missing=FALSE) })