R Under development (unstable) (2024-09-06 r87103 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(pcalg) > > cat("doExtras:", (doExtras <- pcalg:::doExtras()), "\n") doExtras: FALSE > showProc.time <- local({ + pct <- proc.time() + function() { ## CPU elapsed __since last called__ + ot <- pct ; pct <<- proc.time() + cat('Time elapsed: ', (pct - ot)[1:3],'\n') + } + }) > options(warn = 1)# show warnings immediately > > load("discr100k.rda")# in directory tests/ i.e., typically *not* installed > ## Note: --> ../man/gmD.Rd is _same_ data, only just the first 10'000 obs. > str(dat)# 100'000 x 5 data frame of integers 'data.frame': 100000 obs. of 5 variables: $ X1: int 2 2 1 1 1 2 2 0 2 0 ... $ X2: int 0 1 0 0 0 1 1 0 0 0 ... $ X3: int 0 1 1 2 0 1 0 1 2 0 ... $ X4: int 2 2 3 2 2 0 2 2 2 0 ... $ X5: int 1 1 0 1 1 1 1 1 1 0 ... > sapply(dat, table) $X1 0 1 2 19771 30138 50091 $X2 0 1 79875 20125 $X3 0 1 2 27282 47715 25003 $X4 0 1 2 3 24899 10715 47263 17123 $X5 0 1 20120 79880 > nlev.dat <- sapply(dat, function(v) nlevels(factor(v))) > suffStat <- list(dm = dat, nlev = nlev.dat, adaptDF = TRUE) > dat. <- data.matrix(dat) > > showProc.time() Time elapsed: 0.09 0.02 0.11 > > ## Check for reasonable error message: > Sys.setlocale("LC_MESSAGES", "C") Warning in Sys.setlocale("LC_MESSAGES", "C") : LC_MESSAGES does not exist on Windows [1] "" > (er <- tryCatch(gSquareDis(1,3, S=20, dm=dat.), error=conditionMessage)) [1] "x, y, and S must all be in {1,..,p}, p=5" > ## "x, y, and S must all be in {1,..,p}, p=5" > stopifnot(grepl("must all be in {1,..,p}", er, fixed=TRUE)) > > > ## Tests for |S| = 0 and |S| = 1 > > pv.5 <- c( + ## Collider + ## x y S + disCItest(1,2,NULL,suffStat),# 1 : Must be !=0 + disCItest(1,2, 3, suffStat),# 2 : 0 + disCItest(5,2, 4, suffStat),# 3 : 0 + disCItest(5,2,NULL,suffStat),# 4 : != 0 + ## Non-collider + disCItest(4,3, 2, suffStat),# 5 : !=0 + disCItest(4,3,NULL,suffStat),# 6 : 0 + ## Neighbours + disCItest(4,2,NULL,suffStat),# 7 : 0 + disCItest(1,3,NULL,suffStat),# 8 : 0 + disCItest(1,5,NULL,suffStat))# 9 : !=0 > > stopifnot(all.equal(pv.5, + c(0.5150866, + 0, + 0, + 0.289990516, + ## Non-collider + 0.818905483, + 0, + # Neighbours + 0, + 0, + 0.774158263), tol = 4e-9)) > > showProc.time() Time elapsed: 0.2 0.05 0.25 > > if(!doExtras) ## smaller data set + dat. <- dat.[1:4096,] > > ## |S| = 2 : > pv2.5 <- c( + gSquareDis(3,4, S = c(1,2), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(2,4, S = c(1,3), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(2,3, S = c(1,4), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(2,3, S = c(1,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,4, S = c(2,3), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,3, S = c(2,4), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,3, S = c(2,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,2, S = c(3,4), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,2, S = c(3,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,2, S = c(4,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + + gSquareDis(3,5, S = c(1,2), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(2,5, S = c(1,3), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(2,5, S = c(1,4), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(2,4, S = c(1,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,5, S = c(2,3), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,5, S = c(2,4), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,4, S = c(2,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,5, S = c(3,4), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,4, S = c(3,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,3, S = c(4,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE)) Edge 3 -- 4 with subset S = 1 2 Edge 2 -- 4 with subset S = 1 3 Edge 2 -- 3 with subset S = 1 4 Edge 2 -- 3 with subset S = 1 5 Edge 1 -- 4 with subset S = 2 3 Edge 1 -- 3 with subset S = 2 4 Edge 1 -- 3 with subset S = 2 5 Edge 1 -- 2 with subset S = 3 4 Edge 1 -- 2 with subset S = 3 5 Edge 1 -- 2 with subset S = 4 5 Edge 3 -- 5 with subset S = 1 2 Edge 2 -- 5 with subset S = 1 3 Edge 2 -- 5 with subset S = 1 4 Edge 2 -- 4 with subset S = 1 5 Edge 1 -- 5 with subset S = 2 3 Edge 1 -- 5 with subset S = 2 4 Edge 1 -- 4 with subset S = 2 5 Edge 1 -- 5 with subset S = 3 4 Edge 1 -- 4 with subset S = 3 5 Edge 1 -- 3 with subset S = 4 5 > > showProc.time() Time elapsed: 0.05 0.03 0.07 > > pv2.5.EXP <- { + if(doExtras) + c(0.98726, 0, 0, 0, 0.77097, 0, 0, 0, 0, 0.98974, + 0.95964, 0.99966, 0, 0, 0.99685, 0.99999, 0.42513, 1, 0.11179, 0) + else + c(0.73219, 0, 0.0003349, 0, 0.90758, 0, 0, 0.98106, 0.46011, 1, + 0.93245, 0.99998, 8e-07, 0, 0.99797, 0.99933, 0.86036, 0.99988, 0.84859, 0) + } > > > (eq52 <- all.equal(pv2.5, pv2.5.EXP, tol = 1e-5)) [1] TRUE > if(!isTRUE(eq52)) stop("gSquareDis(|S| = 2) not ok:", eq52) > > > ## |S| = 3 : > pv3.5 <- c( + gSquareDis(1,2, S = c(3,4,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,3, S = c(2,4,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,4, S = c(2,3,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(1,5, S = c(2,3,4), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(2,3, S = c(1,4,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(2,4, S = c(1,3,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(2,5, S = c(1,3,4), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(3,4, S = c(1,2,5), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(3,5, S = c(1,2,4), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE), + gSquareDis(4,5, S = c(1,2,3), dm = dat., nlev = nlev.dat, adaptDF = TRUE, verbose=TRUE)) Edge 1 -- 2 with subset S = 3 4 5 Edge 1 -- 3 with subset S = 2 4 5 Edge 1 -- 4 with subset S = 2 3 5 Edge 1 -- 5 with subset S = 2 3 4 Edge 2 -- 3 with subset S = 1 4 5 Edge 2 -- 4 with subset S = 1 3 5 Edge 2 -- 5 with subset S = 1 3 4 Edge 3 -- 4 with subset S = 1 2 5 Edge 3 -- 5 with subset S = 1 2 4 Edge 4 -- 5 with subset S = 1 2 3 > showProc.time() Time elapsed: 0.09 0.01 0.11 > > pv3.5.EXP <- { + if(doExtras) + c(0, 0, 0.95861, 1, 0, 0, 0, 0.99204, 1, 0) + else + c(0.99999, 0, 0.84316, 1, 0.15977, 0, 0.83432, 0.66329, 1, 0) + } > > (eq53 <- all.equal(pv3.5, pv3.5.EXP, tol=6e-6)) [1] TRUE > if(!isTRUE(eq53)) stop("gSquareDis(|S| = 3) not ok:", eq53) > > ### Extend data, and do (few) tests with |S| in {4, 5, 6} !! > ### --------------------------------------------------------- > > ## Idee: 1) marginal-table fuer die no-incoming (aka 'root') notes > ## 2) bivariate table --> X_j | X_i for all edges i -> j > ## ==> randomly generate generate the discrete data from these. > > ###--> Look at gR CRAN task view -- package 'gRain' > ###--- can easily construct such objects > > ##' Generate discrete (integer, in 0:k) random variable, given one parent (in a DAG) > ##' > ##' @title Generate Discrete Random Variable Given one parent > ##' @param pa.var > ##' @param Pr Markov / conditional probabability matrix Pr[i,j] = Prob{newX = i, pa.X = j} > ##' @return a discrete random variable \code{newX} of the same length as \code{pa.var} > ##' @author Martin Maechler > rDiscr1 <- function(pa.var, Pr, zero.based = TRUE) { + stopifnot(pa.var %% 1 == 0, # <- fast test for integer valued + is.matrix(Pr), is.numeric(Pr), + (nlev <- ncol(Pr)) >= 1, ## #{levels} of the (discrete) parent variable + nlev == nlevels(pa.f <- factor(pa.var))) + + pa.n <- as.integer(pa.f)## values in {1, 2,.., nlev} + X <- integer(n <- length(pa.n)) + FF <- + if(zero.based) + function(.) which(. == 1L) - 1L ## {1,2,3} - 1 ==> {0,1,2} + else function(.) which(. == 1L) + for(j in seq_len(nlev)) { + is.j <- pa.n == j + nj <- sum(is.j) + X[is.j] <- apply(rmultinom(nj, size = 1, prob = Pr[, j]), + 2L, FF) + } + X + } > > nrow(dat.) # 4096 or 100'000 depending on doExtras [1] 4096 > > ## Currently, already |S| = 4 is so slow, we need > ## smaller data set: > dat. <- dat.[ seq_len((if(doExtras) 8192 else 1024)), ] > (n <- nrow(dat.)) [1] 1024 > > set.seed(102) > P6.5 <- cbind(c(1,5,2), + c(5,2,1))/8 ## P6.[i,j] := P[X6 = i | X5 = j]: > X6 <- rDiscr1(dat.[,"X5"], Pr = P6.5) > P7.3 <- cbind(c(1,5,2), + c(5,2,1), + c(1,4,3) )/8 > X7 <- rDiscr1(dat.[,"X3"], Pr = P7.3) > X8 <- rDiscr1( X7 , Pr = P7.3) > X9 <- apply(rmultinom(n, size=1, prob = c(1,2,3,4,6)/16), + 2L, function(.) which(. == 1L) - 1L) # independent > P10.9 <- cbind(c(1,7), + c(6,2), + c(3,5), + c(4,4), + c(2,6))/8 > X10 <- rDiscr1( X9, Pr = P10.9) > X11 <- rDiscr1( X9, Pr = P10.9) > dat10 <- cbind(dat., X6, X7, X8, X9, X10, X11) > (nlev10 <- sapply(as.data.frame(dat10), function(v) nlevels(factor(v)))) X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 3 2 3 4 2 3 3 3 5 2 2 > showProc.time() Time elapsed: 0.04 0.02 0.05 > > ## Testing only (not good for sound inference!): > ## setting n.min = n-1 ==> the tests are also run for "too small" sample sizes > ## =========== > > gSquareDis.. <- function(x,y, S, dm, nlev, verbose = TRUE) + gSquareDis(x,y, S=S, dm=dm, nlev=nlev, + n.min=nrow(dm)-1L, adaptDF=TRUE, verbose=verbose) > > ## |S| = 4 : > pv4.10 <- c( + gSquareDis..(1,2, S = 3:6 , dm= dat10, nlev=nlev10), + gSquareDis..(1,3, S = 4:7 , dm= dat10, nlev=nlev10), + gSquareDis..(1,4, S = 5:8 , dm= dat10, nlev=nlev10), + gSquareDis..(1,5, S = 6:9 , dm= dat10, nlev=nlev10), + gSquareDis..(2,3, S = 4:7 , dm= dat10, nlev=nlev10), + gSquareDis..(2,4, S = c(3,5:7), dm= dat10, nlev=nlev10), + gSquareDis..(2,5, S = c(4,6:8), dm= dat10, nlev=nlev10), + gSquareDis..(2,5, S = c(4,7:9), dm= dat10, nlev=nlev10), + gSquareDis..(2,5, S = 6:9 , dm= dat10, nlev=nlev10), + gSquareDis..(3,4, S = 5:8 , dm= dat10, nlev=nlev10), + gSquareDis..(3,4, S = 6:9 , dm= dat10, nlev=nlev10), + gSquareDis..(3,5, S = 6:9 , dm= dat10, nlev=nlev10), + gSquareDis..(4,5, S = 6:9 , dm= dat10, nlev=nlev10), + gSquareDis..(4,5, S = c(1:3,6), dm= dat10, nlev=nlev10), + gSquareDis..(4,5, S = c(1:3,7), dm= dat10, nlev=nlev10), + gSquareDis..(4,5, S = c(1:3,8), dm= dat10, nlev=nlev10)) Edge 1 -- 2 with subset S = 3 4 5 6 Edge 1 -- 3 with subset S = 4 5 6 7 Edge 1 -- 4 with subset S = 5 6 7 8 Edge 1 -- 5 with subset S = 6 7 8 9 Edge 2 -- 3 with subset S = 4 5 6 7 Edge 2 -- 4 with subset S = 3 5 6 7 Edge 2 -- 5 with subset S = 4 6 7 8 Edge 2 -- 5 with subset S = 4 7 8 9 Edge 2 -- 5 with subset S = 6 7 8 9 Edge 3 -- 4 with subset S = 5 6 7 8 Edge 3 -- 4 with subset S = 6 7 8 9 Edge 3 -- 5 with subset S = 6 7 8 9 Edge 4 -- 5 with subset S = 6 7 8 9 Edge 4 -- 5 with subset S = 1 2 3 6 Edge 4 -- 5 with subset S = 1 2 3 7 Edge 4 -- 5 with subset S = 1 2 3 8 > showProc.time() Time elapsed: 0.09 0.08 0.17 > dput(signif(zapsmall(pv4.10), 4)) c(0.9999, 0, 1e-07, 1, 1, 0.0001146, 1, 1, 1, 5.9e-06, 0, 1, 0.2205, 0, 0, 0) > > pv4.10.EXP <- { + if(doExtras) + c(1, 0, 0.9757, 1, 0.9981, 0, 1,1,1, 0.6949, 0.7201, 1, 0, 0,0,0) + else + c(0.9999, 0, 1e-07, 1, 1, 0.0001146, 1,1,1, 5.9e-06, 0, 1, 0.2205, 0,0,0) + } > > (eq4.10 <- all.equal(pv4.10, pv4.10.EXP, tol=5e-5)) [1] TRUE > if(!isTRUE(eq4.10)) stop("gSquareDis(|S| = 4) not ok:", eq4.10) > > ## |S| = 5 : --- verbose=2 gives many messages > ## -------- Adding a new combination of parents at sample " > pv5.10 <- c( + gSquareDis..(1,2, S = 3:7 , dm= dat10, nlev=nlev10), + gSquareDis..(1,3, S = 4:8 , dm= dat10, nlev=nlev10), + gSquareDis..(1,4, S = 5:9 , dm= dat10, nlev=nlev10), + gSquareDis..(1,5, S = 6:10 , dm= dat10, nlev=nlev10), + gSquareDis..(2,3, S = 4:8 , dm= dat10, nlev=nlev10), + gSquareDis..(2,4, S = c(3,5:8), dm= dat10, nlev=nlev10), + gSquareDis..(2,5, S = c(4,6:9), dm= dat10, nlev=nlev10), + gSquareDis..(3,4, S = 5:9 , dm= dat10, nlev=nlev10), + gSquareDis..(3,4, S = 6:10 , dm= dat10, nlev=nlev10), + gSquareDis..(3,5, S = 5:9 , dm= dat10, nlev=nlev10), + gSquareDis..(4,5, S = 6:10 , dm= dat10, nlev=nlev10)) Edge 1 -- 2 with subset S = 3 4 5 6 7 added a total of 3 new parents Edge 1 -- 3 with subset S = 4 5 6 7 8 added a total of 3 new parents Edge 1 -- 4 with subset S = 5 6 7 8 9 added a total of 3 new parents Edge 1 -- 5 with subset S = 6 7 8 9 10 added a total of 3 new parents Edge 2 -- 3 with subset S = 4 5 6 7 8 added a total of 2 new parents Edge 2 -- 4 with subset S = 3 5 6 7 8 added a total of 2 new parents Edge 2 -- 5 with subset S = 4 6 7 8 9 added a total of 2 new parents Edge 3 -- 4 with subset S = 5 6 7 8 9 added a total of 3 new parents Edge 3 -- 4 with subset S = 6 7 8 9 10 added a total of 3 new parents Edge 3 -- 5 with subset S = 5 6 7 8 9 added a total of 3 new parents Edge 4 -- 5 with subset S = 6 7 8 9 10 added a total of 4 new parents > showProc.time() Time elapsed: 0.14 0.01 0.16 > dput(signif(zapsmall(pv5.10), 5)) c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) > > pv5.10.EXP <- { + if(doExtras) + c(1, 0, 0.037821, 1, 1, 0, 1, 0.0005184, 0.0013204, 1, 0) + else + c(1, 0.10938, 0.94484, 1, 1, 0.79863, 1, 0.99495, 0.68719, 1, 1) + } > > (eq5.10 <- all.equal(pv5.10, pv5.10.EXP, tol= 5e5)) [1] TRUE > if(!isTRUE(eq5.10)) stop("gSquareDis(|S| = 5) not ok:", eq5.10) > > ## |S| = 6 : --- (see |S|=5): > > pv6.10 <- c( + gSquareDis..(1,2, S = 3:8 , dm= dat10, nlev=nlev10), + gSquareDis..(1,3, S = 4:9 , dm= dat10, nlev=nlev10), + gSquareDis..(1,4, S = 5:10 , dm= dat10, nlev=nlev10), + gSquareDis..(1,5, S = c(2:4,6:9), dm= dat10, nlev=nlev10), + gSquareDis..(2,3, S = 4:9 , dm= dat10, nlev=nlev10), + gSquareDis..(2,4, S = 5:10 , dm= dat10, nlev=nlev10), + gSquareDis..(2,5, S = c(4,6:9) , dm= dat10, nlev=nlev10), + gSquareDis..(3,4, S = 5:10 , dm= dat10, nlev=nlev10), + gSquareDis..(3,4, S = c(1:2,5:8), dm= dat10, nlev=nlev10), + gSquareDis..(3,5, S = c(1:2,6:9), dm= dat10, nlev=nlev10), + gSquareDis..(4,5, S = c(1:3,7:9), dm= dat10, nlev=nlev10)) Edge 1 -- 2 with subset S = 3 4 5 6 7 8 added a total of 3 new parents Edge 1 -- 3 with subset S = 4 5 6 7 8 9 added a total of 3 new parents Edge 1 -- 4 with subset S = 5 6 7 8 9 10 added a total of 3 new parents Edge 1 -- 5 with subset S = 2 3 4 6 7 8 9 added a total of 3 new parents Edge 2 -- 3 with subset S = 4 5 6 7 8 9 added a total of 2 new parents Edge 2 -- 4 with subset S = 5 6 7 8 9 10 added a total of 2 new parents Edge 2 -- 5 with subset S = 4 6 7 8 9 added a total of 2 new parents Edge 3 -- 4 with subset S = 5 6 7 8 9 10 added a total of 3 new parents Edge 3 -- 4 with subset S = 1 2 5 6 7 8 added a total of 3 new parents Edge 3 -- 5 with subset S = 1 2 6 7 8 9 added a total of 3 new parents Edge 4 -- 5 with subset S = 1 2 3 7 8 9 added a total of 4 new parents > showProc.time() Time elapsed: 0.11 0.03 0.14 > dput(signif(zapsmall(pv6.10), 5)) c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) > > pv6.10.EXP <- { + if(doExtras) + c(1, 0.023908, 0.000208, 1, 1, 0.94747, 1, 0.0002593, 5.1e-06, 1, 0) + else + c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) + } > > (eq6.10 <- all.equal(pv6.10, pv6.10.EXP, tol= 5e5)) [1] TRUE > if(!isTRUE(eq6.10)) stop("gSquareDis(|S| = 5) not ok:", eq6.10) > > proc.time() user system elapsed 1.59 0.43 1.95