R Under development (unstable) (2024-02-05 r85863 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(pcalg) > > cat("doExtras:", (doExtras <- pcalg:::doExtras()), "\n") doExtras: FALSE > > ################################################## > ## Simple 2 Variables x1 -> x2 > ################################################## > set.seed(123) > > ## only 2 Variables > b0 <- 0 > b1 <- 1 > n <- 1000 > x1 <- rbinom(n, 1, p = 1/2) ## == sample(c(0,1), n, replace=TRUE) > x2 <- rbinom(n, 1, p <- plogis(b0 + b1* x1)) > > res1 <- fisher.test(x1,x2)$p.value # 6.14 e-21 > res2 <- gSquareBin(1,2,NULL, cbind(x1,x2)) # 3.74 e-21 > > (eq1 <- all.equal(c(res1, res2), + c(6.142e-21, 3.7401e-21), tol = 1e-5)) [1] TRUE > ok1 <- isTRUE(eq1) > > ################################################## > ## chain of 3 variables: x1 -> x2 -> x3 > ################################################## > b0 <- 0 > b1 <- 1 > b2 <- 1 > n <- 10000 > > set.seed(12) > x1 <- rbinom(n, 1, p=1/2) > x2 <- rbinom(n, 1, p2 <- plogis(b0 + b1* x1)) > x3 <- rbinom(n, 1, p3 <- plogis(b0 + b2* x2)) > ## > dat <- cbind(x1,x2,x3) > ## > system.time( + pv.2 <- c(gSquareBin(3,1, 2, dat), + gSquareBin(1,3, 2, dat), + gSquareBin(1,2, 3, dat), + gSquareBin(3,2, 1, dat)) + ) user system elapsed 0.01 0.00 0.02 > > (eq2 <- c(all.equal(pv.2, c(rep(0.89393365, 2), 0, 0), tol=1e-8), + all.equal(pv.2[1], pv.2[2], tol=1e-6))) [1] TRUE TRUE > > ok2 <- is.logical(eq2) && all(eq2) > > ok3 <- ok4 <- ok5 <- ok6 <- TRUE > if (doExtras) { + suppressWarnings(RNGversion("3.5.0")) + ################################################## + ## collider: x1 -> x3 <- x2 + ################################################## + b0 <- 0 + b1 <- 1.3 + b2 <- 1.7 + n <- 10000 + set.seed(13) + x1 <- rbinom(n, 1, p=1/2) + x2 <- rbinom(n, 1, p=1/2) + x3 <- rbinom(n, 1, p3 <- plogis(b0 + b1*x1 + b2*x2)) + ## + dat <- cbind(x1,x2,x3) + ## + system.time( + pv3 <- c(gSquareBin(1,2, 3, dat), + gSquareBin(1,2,NULL,dat), + gSquareBin(2,1, 3, dat), + gSquareBin(1,3, 2, dat)) + ) + (eq3 <- all.equal(pv3, c(0, 0.9054922, 0,0), tol=1e-7)) + ok3 <- isTRUE(eq3) + + + ################################################## + ## add noise variables to collider model + ################################################## + x4 <- sample(c(0,1),n, replace=TRUE) + x5 <- sample(c(0,1),n, replace=TRUE) + x6 <- sample(c(0,1),n, replace=TRUE) + + dat <- cbind(x1,x2,x3,x4,x5,x6) + + system.time( + pv4 <- c(gSquareBin(1,2, c(3,4,5,6), dat), + gSquareBin(1,3, c(2,4,5,6), dat), + gSquareBin(2,3, c(1,4,5,6), dat), + gSquareBin(2,4, c(1,3,5,6), dat), + gSquareBin(4,2, c(1,3,5,6), dat), # same + gSquareBin(3,4, c(1,2,5,6), dat), + ## + gSquareBin(4,2, c(3,5,6), dat)) + ) + + (eq4 <- c(all.equal(pv4, c(0, 0, 0, 0.0269101, 0.0269101, 0.7260011, 0.1121826), + tol = 2e-7), + all.equal(pv4[4], pv4[5], tol=1e-6))) + + ok4 <- is.logical(eq4) && all(eq4) + + + ################################################## + ## rectangle model + ################################################## + b0 <- 0 + b1 <- 1 + b2 <- 1 + b3 <- 0.5 + b4 <- 0.5 + n <- 10000 + set.seed(31) + x1 <- rbinom(n, size=1, prob = 1/2)# = sample(c(0,1), n, replace=TRUE) + x2 <- rbinom(n, size=1, prob = (p2 <- plogis(b0 + b1* x1))) + x3 <- rbinom(n, size=1, prob = (p3 <- plogis(b0 + b2* x1))) + x4 <- rbinom(n, size=1, prob = (p4 <- plogis(b0 + b3* x2 + b4* x3))) + ## + dat <- cbind(x1,x2,x3,x4) + + system.time( + pv5 <- c(gSquareBin(1,4,c(2,3),dat), + gSquareBin(1,4, 2, dat), + gSquareBin(1,4, 3, dat), + gSquareBin(1,4,NULL, dat), + gSquareBin(2,3, 4, dat), + gSquareBin(2,3, 1, dat), + gSquareBin(2,3,c(1,4),dat)) + ) + + (eq5 <- all.equal(pv5, c(0.38484, 0.13601, 0.10974, 0, 0.00023, 0.56828, 0.43436), 4e-4)) + ok5 <- isTRUE(eq5) + + ################################################## + ## rectangle model -- extended + ################################################## + x5 <- rbinom(n, size=1, prob = (p5 <- plogis(b0 + x2))) + x6 <- rbinom(n, size=1, prob = (p6 <- plogis(b0 + x3/2 - x4))) + x7 <- rbinom(n, size=1, prob = (p7 <- plogis(b0 + x5 - x6))) + x8 <- rbinom(n, size=1, prob = (p8 <- plogis(b0 - x3 + x6/2))) + x9 <- rbinom(n, size=1, prob = (p9 <- plogis(b0 + x2 - x7))) + ## + dat <- cbind(x1,x2,x3,x4,x5,x6,x7,x8,x9) + + ## do many tests: + p <- ncol(dat) # = 9 + ## all (x,y) pairs: + xy <- combn(p,2) + "%w/o%" <- function(x, y) x[!x %in% y] #-- x without y + ip <- 1:p + str(rest <- apply(xy, 2, function(e) ip %w/o% e)) # 7 x 36 + set.seed(101) + in2 <- seq_len(ncol(xy)) + args <- c(lapply(in2, function(i) list(x=xy[1,i], y=xy[2,i], S=rest[,i])), + lapply(in2, function(i) list(x=xy[1,i], y=xy[2,i], S=sample(rest[,i], size= p-2-1))), + lapply(in2, function(i) list(x=xy[1,i], y=xy[2,i], S=sample(rest[,i], size= sample(0:(p-2), 1)))), + lapply(in2, function(i) list(x=xy[1,i], y=xy[2,i], S=sample(rest[,i], size= sample(2:(p-2), 1))))) + + ia <- TRUE + + system.time( + pv6 <- vapply(args, function(L) do.call(gSquareBin, c(L, list(dm=dat))), 1) + )# if(doExtras) about one minute + + if(FALSE) + dput(signif(zapsmall(pv6), 4)) + pv6.ok <- c(0, 0, 0.6427, 0.4745, 0.9698, 0.4586, 0.5739, 0.6701, 0.1889, + 0, 0, 0.6476, 0.00601, 0.3586, 0, 0, 0.08916, 0, 0.7642, 0, 0.04021, + 0.4694, 0, 0.8799, 0.3966, 0.358, 0.01514, 0, 0.3615, 0.3244, + 0, 0, 0.6647, 0.6839, 0, 0.4136, 0, 0, 0.7023, 0.303, 0.9189, + 0.5501, 0.7896, 0.8239, 0.04095, 0, 0, 0.5531, 0.6374, 0.755, + 0, 0, 0.629, 0, 0.8377, 0, 0.5749, 0.6346, 0, 0.9513, 0.3138, + 0.6293, 0.000153, 0, 0.7855, 0.1503, 0, 0, 0.2033, 0.9071, 0, + 0.2587, 0, 0, 1.08e-05, 0.0002067, 0.03795, 0.5501, 3.5e-06, + 0.001827, 0.5683, 0, 0, 0.005468, 0.4085, 0.1476, 0, 0, 0.6822, + 0, 0.7642, 0, 0.7184, 0.4596, 0, 0.8799, 0.02051, 0.03413, 0.01689, + 0, 0.2786, 0.07084, 0, 0, 5.17e-05, 0.4915, 0, 0.5029, 0, 0, + 0.1891, 0.09992, 0.9698, 0.3937, 5.94e-05, 0.5473, 0.3894, 0, + 0, 0.2236, 2.3e-06, 0.784, 0, 0, 0.3504, 0, 0.7642, 0, 0.3572, + 0.4259, 0, 0.8139, 0.3326, 0.01518, 0.01514, 0, 0.3541, 0.021, + 0, 0, 0.6647, 0.6167, 0, 0.4136) + + (eq6 <- all.equal(pv6, pv6.ok[ia], tol= 1e-4)) + ok6 <- isTRUE(eq6) + + } > if (!all(ok1,ok2,ok3,ok4,ok5,ok6)) + stop("Test gSquareBin wrong: Some dependence was not estimated correctly!") > > proc.time() user system elapsed 1.12 0.14 1.25