R version 4.3.1 (2023-06-16) -- "Beagle Scouts" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > pkgname <- "abn" > source(file.path(R.home("share"), "R", "examples-header.R")) > options(warn = 1) > base::assign(".ExTimings", "abn-Ex.timings", pos = 'CheckExEnv') > base::cat("name\tuser\tsystem\telapsed\n", file=base::get(".ExTimings", pos = 'CheckExEnv')) > base::assign(".format_ptime", + function(x) { + if(!is.na(x[4L])) x[1L] <- x[1L] + x[4L] + if(!is.na(x[5L])) x[2L] <- x[2L] + x[5L] + options(OutDec = '.') + format(x[1L:3L], digits = 7L) + }, + pos = 'CheckExEnv') > > ### * > library('abn') abn version 3.0.0 (2022-09-01) is loaded. To cite the package 'abn' in publications call: citation('abn'). Attaching package: ‘abn’ The following object is masked from ‘package:base’: factorial > > base::assign(".oldSearch", base::search(), pos = 'CheckExEnv') > base::assign(".old_wd", base::getwd(), pos = 'CheckExEnv') > cleanEx() > nameEx("abn-package") > ### * abn-package > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: abn-package > ### Title: 'abn' Package > ### Aliases: abn abn-package > ### Keywords: internal > > ### ** Examples > > ## Citations: > print(citation('abn'), bibtex=TRUE) To cite abn in publications use: Kratzer G, Lewis F, Comin A, Pittavino M, Furrer R (2023). “Additive Bayesian Network Modeling with the R Package abn.” _Journal of Statistical Software_, *105*(8), 1-41. doi:10.18637/jss.v105.i08 . A BibTeX entry for LaTeX users is @Article{, title = {Additive {B}ayesian Network Modeling with the {R} Package {abn}}, author = {Gilles Kratzer and Fraser Lewis and Arianna Comin and Marta Pittavino and Reinhard Furrer}, journal = {Journal of Statistical Software}, year = {2023}, volume = {105}, number = {8}, pages = {1--41}, doi = {10.18637/jss.v105.i08}, } To cite an example of a typical ABN analysis in publications use: Kratzer, G., Lewis, F.I., Willi, B., Meli, M.L., Boretti, F.S., Hofmann-Lehmann, R., Torgerson, P., Furrer, R. and Hartnack, S. (2020). Bayesian Network Modeling Applied to Feline Calicivirus Infection Among Cats in Switzerland. Frontiers in Veterinary Science, 7, 73 A BibTeX entry for LaTeX users is @Article{, title = {{B}ayesian {N}etwork {M}odeling {A}pplied to {F}eline {C}alicivirus {I}nfection {A}mong {C}ats in {S}witzerland}, author = {Gilles Kratzer and Fraser Iain Lewis and Barbara Willi and Marina Meli and Felicitas Boretti and Regina Hofmann-Lehmann and Paul Torgerson and Reinhard Furrer and Sonja Hartnack}, year = {2020}, journal = {Frontiers in Veterinary Science}, } To cite package of the R package 'abn' in publications use: Furrer, R., Kratzer, G. and Lewis, F.I. (2023). abn: Modelling Multivariate Data with Additive Bayesian Networks. R package version 2.7-2. https://CRAN.R-project.org/package=abn A BibTeX entry for LaTeX users is @Manual{, title = {abn: Modelling Multivariate Data with Additive {B}ayesian Networks}, author = {Reinhard Furrer and Gilles Kratzer and Fraser Iain Lewis}, year = {2023}, note = {R package version 2.7-2}, url = {https://CRAN.R-project.org/package=abn}, } > > ## Installing the R package Rgraphviz: > # if (!requireNamespace("BiocManager", quietly = TRUE)) > # install.packages("BiocManager") > # BiocManager::install("Rgraphviz") > > ## README.md in the directory `bootstrapping_example/`: > # edit(file=paste0( path.package('abn'),'/bootstrapping_example/README.md')) > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("abn-package", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("abn.version") > ### * abn.version > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: abn.version > ### Title: abn Version Information > ### Aliases: abn.version > > ### ** Examples > > abn.version()$version.string [1] "abn version 3.0.0 (2022-09-01)" > ## Not run: > ##D abn.version("system") > ## End(Not run) > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("abn.version", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("bern_bugs") > ### * bern_bugs > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: bern_bugs > ### Title: Bugs code for Bernoulli response > ### Aliases: bern_bugs bern_bugsGroup > ### Keywords: internal utilities > > ### ** Examples > > bern_bugs(nodename = "a", + parentnames = c("b", "c"), + nodesintercept = c(0.318077), + parentcoefs = list("b"=c(b=0.3059395), + "c"=c(c=0.5555))) a ~ dbern(p.a) # Bernoulli response logit(p.a) <- 0.318077 + 0.3059395*b + 0.5555*c # logistic regression > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("bern_bugs", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("build.control") > ### * build.control > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: build.control > ### Title: Control the iterations in 'buildScoreCache' > ### Aliases: build.control > > ### ** Examples > > ctrlmle <- abn::build.control(method = "mle", + ncores = 0, + max.irls = 100, + tol = 10^-11, + tolPwrss = 1e-7, + check.rankX = "message+drop.cols", + check.scaleX = "message+rescale", + check.conv.grad = "message", + check.conv.singular = "message", + check.conv.hess = "message", + xtol_abs = 1e-6, + ftol_abs = 1e-6, + trace.mblogit = FALSE, + catcov.mblogit = "free", + epsilon = 1e-6, + seed = 9062019L) > ctrlbayes <- abn::build.control(method = "bayes", + max.mode.error = 10, + mean = 0, prec = 0.001, + loggam.shape = 1, + loggam.inv.scale = 5e-05, + max.iters = 100, + epsabs = 1e-07, + error.verbose = FALSE, + epsabs.inner = 1e-06, + max.iters.inner = 100, + finite.step.size = 1e-07, + hessian.params = c(1e-04, 0.01), + max.iters.hessian = 10, + max.hessian.error = 0.5, + factor.brent = 100, + maxiters.hessian.brent = 100, + num.intervals.brent = 100, + tol = 10^-8, + seed = 9062019L) > > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("build.control", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("buildScoreCache") > ### * buildScoreCache > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: buildScoreCache > ### Title: Build a cache of goodness of fit metrics for each node in a DAG, > ### possibly subject to user-defined restrictions > ### Aliases: buildScoreCache buildScoreCache.bayes forLoopContent > ### buildScoreCache.mle > ### Keywords: buildScoreCache.bayes buildScoreCache.mle calc.node.inla.glm > ### calc.node.inla.glmm fitAbn.bayes fitAbn.mle models > > ### ** Examples > > ################################################################# > ## Example 1 > ################################################################# > > ## Subset of the build-in dataset, see ?ex0.dag.data > mydat <- ex0.dag.data[,c("b1","b2","g1","g2","b3","g3")] ## take a subset of cols > > ## setup distribution list for each node > mydists <- list(b1="binomial", b2="binomial", g1="gaussian", + g2="gaussian", b3="binomial", g3="gaussian") > > # Structural constraints > # ban arc from b2 to b1 > # always retain arc from g2 to g1 > > ## parent limits > max.par <- list("b1"=2, "b2"=2, "g1"=2, "g2"=2, "b3"=2, "g3"=2) > > ## now build the cache of pre-computed scores accordingly to the structural constraints > > res.c <- buildScoreCache(data.df=mydat, data.dists=mydists, + dag.banned= ~b1|b2, dag.retained= ~g1|g2, max.parents=max.par) > > > ## repeat but using R-INLA. The mlik's should be virtually identical. > ## now build cache: > if(requireNamespace("INLA", quietly = TRUE)){ + res.inla <- buildScoreCache(data.df=mydat, data.dists=mydists, + dag.banned= ~b1|b2, dag.retained= ~g1|g2, max.parents=max.par, + control=list(max.mode.error=100)) + + ## comparison - very similar + difference <- res.c$mlik - res.inla$mlik + } > > ## Not run: > ##D ## Comparison Bayes with MLE (unconstrained): > ##D res.mle <- buildScoreCache(data.df=mydat, data.dists=mydists, > ##D max.parents=3, method="mle") > ##D res.abn <- buildScoreCache(data.df=mydat, data.dists=mydists, > ##D max.parents=3, method="Bayes") > ##D ## of course different, but smame order: > ##D plot(-res.mle$bic, res.abn$mlik) > ##D > ##D ################################################################# > ##D ## Example 2 - mle with several cores > ##D ################################################################### > ##D > ##D ## Many variables, few observations > ##D mydat <- ex0.dag.data > ##D mydists <- as.list(rep(c("binomial", "gaussian", "poisson"), each=10)) > ##D names(mydists) <- names(mydat) > ##D > ##D # system.time( { > ##D # res.mle1 <- buildScoreCache(data.df=mydat, data.dists=mydists, > ##D # max.parents=2, method="mle", ncores=2) }) > ##D # system.time( { > ##D # res.mle2 <- buildScoreCache(data.df=mydat, data.dists=mydists, > ##D # max.parents=2, method="mle") }) > ##D > ##D > ##D ################################################################# > ##D ## Example 3 - grouped data - random effects example e.g. glmm > ##D ################################################################### > ##D > ##D mydat <- ex3.dag.data ## this data comes with abn see ?ex3.dag.data > ##D > ##D mydists <- list(b1="binomial", b2="binomial", b3="binomial", > ##D b4="binomial", b5="binomial", b6="binomial", b7="binomial", > ##D b8="binomial", b9="binomial", b10="binomial",b11="binomial", > ##D b12="binomial", b13="binomial" ) > ##D max.par <- 2 > ##D > ##D ## in this example INLA is used as default since these are glmm nodes > ##D ## when running this at node-parent combination 71 the default accuracy check on the > ##D ## INLA modes is exceeded (default is a max. of 10 percent difference from > ##D ## modes estimated using internal code) and a message is given that internal code > ##D ## will be used in place of INLA's results. > ##D > ##D # mycache <- buildScoreCache(data.df=mydat, data.dists=mydists, group.var="group", > ##D # cor.vars=c("b1","b2","b3","b4","b5","b6","b7", > ##D # "b8","b9","b10","b11","b12","b13"), > ##D # max.parents=max.par, which.nodes=c(1)) > ## End(Not run) > > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("buildScoreCache", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("categorical_bugs") > ### * categorical_bugs > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: categorical_bugs > ### Title: Bugs code for Categorical response > ### Aliases: categorical_bugs categorical_bugsGroup > ### Keywords: internal utilities > > ### ** Examples > > # A -> B > # Where B is a categorical variable with 4 levels. > categorical_bugs(nodename = "b", + nodesCatIdx = c(2, 3, 4), + parentnames = "a", + nodesintercepts = c(2.188650, 3.133928, 3.138531), + parentcoefs = list("a"=c(a=1.686432, a=3.134161, a=5.052104))) b ~ dcat(p.b) # Categorical response p.b[1] <- phi.b[1]/sum(phi.b) # soft-max log(phi.b[1]) <- 0 # Reference category p.b[2] <- phi.b[2]/sum(phi.b) # soft-max log(phi.b[2]) <- 2.18865 + 1.686432*a p.b[3] <- phi.b[3]/sum(phi.b) # soft-max log(phi.b[3]) <- 3.133928 + 3.134161*a p.b[4] <- phi.b[4]/sum(phi.b) # soft-max log(phi.b[4]) <- 3.138531 + 5.052104*a > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("categorical_bugs", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("compareDag") > ### * compareDag > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: compareDag > ### Title: Compare two DAGs or EGs > ### Aliases: compareDag > ### Keywords: utilities > > ### ** Examples > > test.m <- matrix(data = c(0,1,0, + 0,0,0, + 1,0,0), nrow = 3, ncol = 3) > ref.m <- matrix(data = c(0,0,0, + 1,0,0, + 1,0,0), nrow = 3, ncol = 3) > > colnames(test.m) <- rownames(test.m) <- colnames(ref.m) <- colnames(ref.m) <- c("a", "b", "c") > > unlist(compareDag(ref = ref.m, test = test.m)) TPR FPR Accuracy FDR 0.5000000 0.1428571 0.7777778 0.5000000 G-measure F1-score PPV FOR 0.5000000 2.0000000 0.5000000 0.5000000 Hamming-distance 1.0000000 > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("compareDag", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("compareEG") > ### * compareEG > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: compareEG > ### Title: Compare two DAGs or EGs > ### Aliases: compareEG > > ### ** Examples > > test.m <- matrix(data = c(0,1,0, + 0,0,0, + 1,0,0), nrow = 3, ncol = 3) > ref.m <- matrix(data = c(0,0,0, + 1,0,0, + 1,0,0), nrow = 3, ncol = 3) > > colnames(test.m) <- rownames(test.m) <- colnames(ref.m) <- colnames(ref.m) <- c("a", "b", "c") > > unlist(compareDag(ref = ref.m, test = test.m)) TPR FPR Accuracy FDR 0.5000000 0.1428571 0.7777778 0.5000000 G-measure F1-score PPV FOR 0.5000000 2.0000000 0.5000000 0.5000000 Hamming-distance 1.0000000 > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("compareEG", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("createAbnDag") > ### * createAbnDag > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: createAbnDag > ### Title: Make DAG of class "abnDag" > ### Aliases: createAbnDag > ### Keywords: internal utilities > > ### ** Examples > > createAbnDag(dag = ~a+b|a, data.df = data.frame("a"=1, "b"=1)) a b a 0 0 b 1 0 Class 'abnDag'. > plot(createAbnDag(dag = matrix(c(0,1,0,0), 2, 2))) > > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("createAbnDag", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("discretization") > ### * discretization > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: discretization > ### Title: Discretization of a Possibly Continuous Data Frame of Random > ### Variables based on their distribution > ### Aliases: discretization > ### Keywords: utilities > > ### ** Examples > > ## Generate random variable > rv <- rnorm(n = 100, mean = 5, sd = 2) > dist <- list("gaussian") > names(dist) <- c("rv") > > ## Compute the entropy through discretization > entropyData(freqs.table = discretization(data.df = rv, data.dists = dist, + discretization.method = "sturges", nb.states = FALSE)) [1] 2.654889 > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("discretization", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("entropyData") > ### * entropyData > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: entropyData > ### Title: Computes an Empirical Estimation of the Entropy from a Table of > ### Counts > ### Aliases: entropyData > ### Keywords: utilities > > ### ** Examples > > ## Generate random variable > rv <- rnorm(n = 100, mean = 5, sd = 2) > dist <- list("gaussian") > names(dist) <- c("rv") > > ## Compute the entropy through discretization > entropyData(freqs.table = discretization(data.df = rv, data.dists = dist, + discretization.method = "sturges", nb.states = FALSE)) [1] 2.654889 > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("entropyData", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("essentialGraph") > ### * essentialGraph > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: essentialGraph > ### Title: Construct the essential graph > ### Aliases: essentialGraph > ### Keywords: utilities > > ### ** Examples > > dag <- matrix(c(0,0,0, 1,0,0, 1,1,0), nrow = 3, ncol = 3) > dist <- list(a="gaussian", b="gaussian", c="gaussian") > colnames(dag) <- rownames(dag) <- names(dist) > > essentialGraph(dag) a b c a 0 1 1 b 0 0 1 c 0 1 0 > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("essentialGraph", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("ex0.dag.data") > ### * ex0.dag.data > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: ex0.dag.data > ### Title: Synthetic validation data set for use with abn library examples > ### Aliases: ex0.dag.data > ### Keywords: datasets > > ### ** Examples > > ## Not run: > ##D ## The dataset was (essentially) generated using the following code: > ##D datasize <- 300 > ##D tmp <- c(rep("y", as.integer(datasize/2)), rep("n", as.integer(datasize/2))) > ##D set.seed(1) > ##D > ##D ex0.dag.data <- data.frame(b1=sample(tmp, size=datasize, replace=TRUE), > ##D b2=sample(tmp, size=datasize, replace=TRUE), > ##D b3=sample(tmp, size=datasize, replace=TRUE), > ##D b4=sample(tmp, size=datasize, replace=TRUE), > ##D b5=sample(tmp, size=datasize, replace=TRUE), > ##D b6=sample(tmp, size=datasize, replace=TRUE), > ##D b7=sample(tmp, size=datasize, replace=TRUE), > ##D b8=sample(tmp, size=datasize, replace=TRUE), > ##D b9=sample(tmp, size=datasize, replace=TRUE), > ##D b10=sample(tmp, size=datasize, replace=TRUE), > ##D g1=rnorm(datasize, mean=0,sd=1), > ##D g2=rnorm(datasize, mean=0,sd=1), > ##D g3=rnorm(datasize, mean=0,sd=1), > ##D g4=rnorm(datasize, mean=0,sd=1), > ##D g5=rnorm(datasize, mean=0,sd=1), > ##D g6=rnorm(datasize, mean=0,sd=1), > ##D g7=rnorm(datasize, mean=0,sd=1), > ##D g8=rnorm(datasize, mean=0,sd=1), > ##D g9=rnorm(datasize, mean=0,sd=1), > ##D g10=rnorm(datasize, mean=0,sd=1), > ##D p1=rpois(datasize, lambda=10), > ##D p2=rpois(datasize, lambda=10), > ##D p3=rpois(datasize, lambda=10), > ##D p4=rpois(datasize, lambda=10), > ##D p5=rpois(datasize, lambda=10), > ##D p6=rpois(datasize, lambda=10), > ##D p7=rpois(datasize, lambda=10), > ##D p8=rpois(datasize, lambda=10), > ##D p9=rpois(datasize, lambda=10), > ##D p10=rpois(datasize, lambda=10)) > ## End(Not run) > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("ex0.dag.data", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("ex1.dag.data") > ### * ex1.dag.data > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: ex1.dag.data > ### Title: Synthetic validation data set for use with abn library examples > ### Aliases: ex1.dag.data > ### Keywords: datasets > > ### ** Examples > > ## The data is one realisation from the the underlying DAG: > ex1.true.dag <- matrix(data=c( + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 1,1,0,0,0,0,0,0,0,0, + 1,0,1,1,0,0,0,0,0,0, + 0,1,1,1,0,0,0,0,0,0, + 0,0,1,0,1,0,0,0,0,0, + 0,0,1,0,0,0,1,0,0,0, + 0,0,1,1,0,0,0,0,0,0), ncol=10, byrow=TRUE) > > colnames(ex1.true.dag) <- rownames(ex1.true.dag) <- + c("b1","p1","g1","b2","p2","b3","g2","b4","b5","g3") > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("ex1.dag.data", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("ex2.dag.data") > ### * ex2.dag.data > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: ex2.dag.data > ### Title: Synthetic validation data set for use with abn library examples > ### Aliases: ex2.dag.data > ### Keywords: datasets > > ### ** Examples > > ## The true underlying stochastic model has DAG - this data is a single realisation. > ex2.true.dag <- matrix(data = c( + 0,1,0,1,0,0,1,0,1,1,1,0,1,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, + 0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1, + 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,0,1,1,0,1,1,0,1,0,0,0,0,0,0,0, + 0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0, + 0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, + 0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0, + 0,0,0,1,1,0,1,0,1,0,1,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0 + ), ncol = 18, byrow = TRUE) > > colnames(ex2.true.dag) <- rownames(ex2.true.dag) <- c("b1","g1","p1","b2", + "g2","p2","b3","g3", + "p3","b4","g4","p4", + "b5","g5","p5","b6", + "g6","p6") > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("ex2.dag.data", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("fit.control") > ### * fit.control > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: fit.control > ### Title: Control the iterations in 'fitAbn' > ### Aliases: fit.control > > ### ** Examples > > ctrlmle <- abn::fit.control(method = "mle", + max.irls = 100, + tol = 10^-11, + tolPwrss = 1e-7, + xtol_abs = 1e-6, + ftol_abs = 1e-6, + epsilon = 1e-6, + ncores = 2, + seed = 9062019L) > ctrlbayes <- abn::fit.control(method = "bayes", + mean = 0, + prec = 0.001, + loggam.shape = 1, + loggam.inv.scale = 5e-05, + max.mode.error = 10, + max.iters = 100, + epsabs = 1e-07, + error.verbose = FALSE, + epsabs.inner = 1e-06, + max.iters.inner = 100, + finite.step.size = 1e-07, + hessian.params = c(1e-04, 0.01), + max.iters.hessian = 10, + max.hessian.error = 1e-04, + factor.brent = 100, + maxiters.hessian.brent = 10, + num.intervals.brent = 100, + min.pdf = 0.001, + n.grid = 100, + std.area = TRUE, + marginal.quantiles = c(0.025, 0.25, 0.5, 0.75, 0.975), + max.grid.iter = 1000, + marginal.node = NULL, + marginal.param = NULL, + variate.vec = NULL, + ncores = 1, + seed = 9062019L) > > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("fit.control", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("fitAbn") > ### * fitAbn > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: fitAbn > ### Title: Fit an additive Bayesian network model > ### Aliases: fitAbn fitAbn.bayes fitAbn.mle regressionLoop > ### Keywords: models > > ### ** Examples > > ## Built-in dataset with a subset of cols > mydat <- ex0.dag.data[,c("b1","b2","b3","g1","b4","p2","p4")] > > ## setup distribution list for each node > mydists <- list(b1="binomial", b2="binomial", b3="binomial", g1="gaussian", + b4="binomial", p2="poisson", p4="poisson") > > ## Null model - all independent variables > mydag.empty <- matrix(0, nrow=7, ncol=7) > colnames(mydag.empty) <- rownames(mydag.empty) <- names(mydat) > > ## Now fit the model to calculate its goodness-of-fit > myres <- fitAbn(dag=mydag.empty, data.df=mydat, data.dists=mydists) > > ## Log-marginal likelihood goodness-of-fit for complete DAG > print(myres$mlik) [1] -2855.45 > > ## fit using the formula statement > # including the creation of the graph of the DAG via Rgraphviz > myres <- fitAbn(dag=~b1|b2+b2|p4:g1+g1|p2+b3|g1+b4|b1+p4|g1, + data.df=mydat, data.dists=mydists) > print(myres$mlik) ## a much weaker fit than full independence DAG [1] -2897.506 > > plotAbn(dag=myres$abnDag$dag, data.dists=mydists, fitted.values=myres$modes) > > ## Or equivalentelly using the formula statement, with plotting > ## Now repeat but include some dependencies first > mydag <- mydag.empty > mydag["b1","b2"] <- 1 # b1<-b2 and so on > mydag["b2","p4"] <- mydag["b2","g1"] <- mydag["g1","p2"] <- 1 > mydag["b3","g1"] <- mydag["b4","b1"] <- mydag["p4","g1"] <- 1 > myresAlt <- fitAbn(dag=mydag, data.df=mydat, data.dists=mydists) > plot(myresAlt) > > ## ----------------------------------------------------------------------------------- > ## This function contains an MLE implementation accessible through a method parameter > ## use built-in simulated data set > ## ----------------------------------------------------------------------------------- > > myres.mle <- fitAbn(dag=~b1|b2+b2|p4+g1+g1|p2+b3|g1+b4|b1+p4|g1, + data.df=mydat, data.dists=mydists, method="mle") > > ## Print the output for mle first then for Bayes: > print(myres.mle) The ABN model was fitted using an mle approach. The estimated coefficients are: $b1 b1|intercept b2 [1,] 0.11 0.0465 $b2 b2|intercept p4 [1,] -0.252 0.0301 $b3 b3|intercept g1 [1,] 0.0267 0.0319 $g1 g1|intercept p2 [1,] -0.0435 0.00421 $b4 b4|intercept b1 [1,] 0.114 0.0863 $p2 p2|intercept [1,] 2.34 $p4 p4|intercept g1 [1,] 2.32 0.00941 Number of nodes in the network: 7. > print(myres) The ABN model was fitted using a Bayesian approach. The estimated modes are: $b1 b1|(Intercept) b1|b2 0.1097 0.0465 $b2 b2|(Intercept) b2|g1 b2|p4 -0.2560 -0.0415 0.0305 $b3 b3|(Intercept) b3|g1 0.0267 0.0319 $g1 g1|(Intercept) g1|p2 g1|precision -0.04350 0.00421 1.00353 $b4 b4|(Intercept) b4|b1 0.1144 0.0863 $p2 p2|(Intercept) 2.34 $p4 p4|(Intercept) p4|g1 2.31644 0.00941 Number of nodes in the network: 7. > > > ## Not run: > ##D ## A simple plot of some posterior densities the algorithm which chooses > ##D ## density points is very simple any may be rather sparse so also recompute > ##D ## the density over an equally spaced grid of 50 points between the two > ##D ## end points which had at f=min.pdf > ##D ## max.mode.error=0 foces to use the internal c code > ##D myres.c <- fitAbn(dag=mydag, data.df=mydat, data.dists=mydists, > ##D compute.fixed=TRUE, > ##D control=list(max.mode.error=0)) > ##D > ##D print(names(myres.c$marginals)) ## gives all the different parameter names > ##D > ##D ## Repeat but use INLA for the numerics using max.mode.error=100 > ##D ## as using internal code is the default here rather than INLA > ##D myres.inla <- fitAbn(dag=mydag, data.df=mydat, data.dists=mydists, > ##D compute.fixed=TRUE, > ##D control=list(max.mode.error=100)) > ##D > ##D ## Plot posterior densities > ##D par(mfrow=c(2,2), mai=c(.7,.7,.2,.1)) > ##D plot(myres.c$marginals$b1[["b1|(Intercept)"]], type="l", xlab="b1|(Intercept)") > ##D lines(myres.inla$marginals$b1[["b1|(Intercept)"]], col="blue") > ##D plot(myres.c$marginals$b2[["b2|p4"]], type="l", xlab="b2|p4") > ##D lines(myres.inla$marginals$b2[["b2|p4"]], col="blue") > ##D plot(myres.c$marginals$g1[["g1|precision"]], type="l", xlab="g1|precision") > ##D lines(myres.inla$marginals$g1[["g1|precision"]], col="blue") > ##D plot(myres.c$marginals$b4[["b4|b1"]], type="l", xlab="b4|b1") > ##D lines(myres.inla$marginals$b4[["b4|b1"]], col="blue") > ##D > ##D ## An elementary mixed model example using built-in data specify DAG, > ##D ## only two variables using a subset of variables from ex3.dag.data > ##D ## both variables are assumed to need (separate) adjustment for the > ##D ## group variable, i.e., a binomial GLMM at each node > ##D > ##D > ##D mydists <- list(b1="binomial", b2="binomial") > ##D > ##D ## Compute marginal likelihood - use internal code via max.mode.error=0 > ##D ## as using INLA is the default here. > ##D ## Model where b1 <- b2 > ##D myres.c <- fitAbn(dag=~b1|b2, data.df=ex3.dag.data[,c(1,2,14)], data.dists=mydists, > ##D group.var="group", cor.vars=c("b1","b2"), > ##D control=list(max.mode.error=0)) > ##D print(myres.c) ## show all the output > ##D > ##D ## compare mode for node b1 with glmer(), lme4::glmer is automatically attached. > ##D > ##D ## Now for marginals - INLA is strongly preferable for estimating marginals for nodes > ##D ## with random effects as it is far faster, but may not be reliable > ##D ## see http://r-bayesian-networks.org > ##D > ##D ## INLA's estimates of the marginals, using high n.grid=500 > ##D ## as this makes the plots smoother - see below. > ##D ## myres.inla <- fitAbn(dag=~b1|b2, data.df=ex3.dag.data[,c(1,2,14)], > ##D # data.dists=mydists, > ##D # group.var="group", cor.vars=c("b1", "b2"), > ##D # compute.fixed=TRUE, n.grid=500, > ##D # control=list(max.mode.error=100, max.hessian.error=10E-02)) > ##D > ##D ## this is NOT recommended - marginal density estimation using fitAbn in mixed models > ##D ## is really just for diagnostic purposes, better to use fitAbn.inla() here > ##D ## but here goes...be patient > ##D # myres.c <- fitAbn(dag=~b1|b2, data.df=ex3.dag.data[,c(1,2,14)], data.dists=mydists, > ##D # group.var="group", cor.vars=c("b1", "b2"), compute.fixed=TRUE, > ##D # control=list(max.mode.error=0, max.hessian.error=10E-02)) > ##D > ##D ## compare marginals between internal and INLA. > ##D # par(mfrow=c(2,3)) > ##D ## 5 parameters - two intercepts, one slope, two group level precisions > ##D # plot(myres.inla$marginals$b1[[1]], type="l", col="blue") > ##D # lines(myres.c$marginals$b1[[1]], col="brown", lwd=2) > ##D # plot(myres.inla$marginals$b1[[2]], type="l", col="blue") > ##D # lines(myres.c$marginals$b1[[2]], col="brown", lwd=2) > ##D ## the precision of group-level random effects > ##D # plot(myres.inla$marginals$b1[[3]],type="l", col="blue", xlim=c(0,2)) > ##D # lines(myres.c$marginals$b1[[3]],col="brown",lwd=2) > ##D # plot(myres.inla$marginals$b2[[1]],type="l", col="blue") > ##D # lines(myres.c$marginals$b2[[1]],col="brown",lwd=2) > ##D # plot(myres.inla$marginals$b2[[1]], type="l", col="blue") > ##D # lines(myres.c$marginals$b2[[1]], col="brown", lwd=2) > ##D ## the precision of group-level random effects > ##D # plot(myres.inla$marginals$b2[[2]], type="l", col="blue", xlim=c(0,2)) > ##D # lines(myres.c$marginals$b2[[2]], col="brown", lwd=2) > ##D > ##D ### these are very similar although not exactly identical > ##D > ##D ## use internal code but only to compute a single parameter over a specified grid > ##D ## This can be necessary if the simple auto grid finding functions does a poor job > ##D > ##D #myres.c <- fitAbn(dag=~b1|b2, data.df=ex3.dag.data[,c(1,2,14)], data.dists=mydists, > ##D # group.var="group", cor.vars=c("b1", "b2"), > ##D # centre=FALSE, compute.fixed=TRUE, > ##D # control=list(marginal.node=1, marginal.param=3,## precision term in node 1 > ##D # variate.vec=seq(0.05, 1.5, len=25), max.hessian.error=10E-02)) > ##D > ##D #par(mfrow=c(1,2)) > ##D #plot(myres.c$marginals[[1]], type="l", col="blue")## still fairly sparse > ##D ## An easy way is to use spline to fill in the density without recomputing other > ##D ## points provided the original grid is not too sparse. > ##D #plot(spline(myres.c$marginals[[1]], n=100), type="b", col="brown") > ## End(Not run) > > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("fitAbn", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("gauss_bugs") > ### * gauss_bugs > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: gauss_bugs > ### Title: Bugs code for Gaussian response > ### Aliases: gauss_bugs gauss_bugsGroup > ### Keywords: internal utilities > > ### ** Examples > > gauss_bugs(nodename = "a", + parentnames = c("b", "c"), + nodesintercept = c(0.318077), + parentcoefs = list("b"=c(b=0.3059395), + "c"=c(c=0.5555)), + std = c(0.05773503)) a ~ dnorm(mu.a, precision.a) # Gaussian response mu.a <- 0.318077 + 0.3059395*b + 0.5555*c # Linear regression precision.a <- inverse(0.05773503) # precision tau = 1/standard_dev > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("gauss_bugs", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("infoDag") > ### * infoDag > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: infoDag > ### Title: Compute standard information for a DAG. > ### Aliases: infoDag > ### Keywords: utilities > > ### ** Examples > > ## Creating a dag: > dag <- matrix(c(0,0,0,0, 1,0,0,0, 1,1,0,1, 0,1,0,0), nrow = 4, ncol = 4) > dist <- list(a="gaussian", b="gaussian", c="gaussian", d="gaussian") > colnames(dag) <- rownames(dag) <- names(dist) > > infoDag(dag) $n.nodes [1] 4 $n.arcs [1] 5 $mb.average [1] 3 $nh.average [1] 2.5 $parent.average [1] 1.25 $children.average [1] 1.25 > plot(createAbnDag(dag)) dev.new(): using pdf(file="Rplots1.pdf") > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("infoDag", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("linkStrength") > ### * linkStrength > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: linkStrength > ### Title: Returns the strengths of the edge connections in a Bayesian > ### Network learned from observational data > ### Aliases: linkStrength > ### Keywords: utilities > > ### ** Examples > > # Gaussian > N <- 1000 > mydists <- list(a="gaussian", + b="gaussian", + c="gaussian") > a <- rnorm(n = N, mean = 0, sd = 1) > b <- 1 + 2*rnorm(n = N, mean = 5, sd = 1) > c <- 2 + 1*a + 2*b + rnorm(n = N, mean = 2, sd = 1) > mydf <- data.frame("a" = a, + "b" = b, + "c" = c) > mycache.mle <- buildScoreCache(data.df = mydf, + data.dists = mydists, + method = "mle", + max.parents = 2) > mydag.mp <- mostProbable(score.cache = mycache.mle, verbose = FALSE) > linkstr <- linkStrength(dag = mydag.mp$dag, + data.df = mydf, + data.dists = mydists, + method = "ls", + discretization.method = "sturges") > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("linkStrength", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("makebugs") > ### * makebugs > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: makebugs > ### Title: Make BUGS model from fitted DAG > ### Aliases: makebugs > ### Keywords: internal utilities > > ### ** Examples > > mydists <- list(a="gaussian", + b="multinomial", + c="binomial", + d="poisson") > mydag <- matrix(0, 4, 4, byrow = TRUE, + dimnames = list(c("a", "b", "c", "d"), + c("a", "b", "c", "d"))) > mydag[2,1] <- mydag[3,2] <- mydag[4,3] <- 1 > # plotAbn(mydag, data.dists = mydists) > mycoefs <- list("a"=matrix(-6.883383e-17, byrow = TRUE, + dimnames = list(NULL, + "a|intercept")), + "b"=matrix(c(2.18865, 3.133928, 3.138531, 1.686432, 3.134161, 5.052104), + nrow= 1, byrow = TRUE, + dimnames = list(c(NULL), + c("b|intercept.2", "b|intercept.3", "b|intercept.4", + "a.2", "a.3", "a.4"))), + "c"=matrix(c(1.11, 2.22, 3.33, 4.44, 5.55), + nrow= 1, byrow = TRUE, + dimnames = list(c(NULL), + c("c|intercept", "b1", "b2", "b3", "b4"))), + "d"=matrix(c(3.33, 4.44), + nrow= 1, byrow = TRUE, + dimnames = list(c(NULL), + c("d|intercept", "c")))) > mymse <- c("a"=0,"b"=1,"c"=2,"d"=3) > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("makebugs", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("mb") > ### * mb > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: mb > ### Title: Compute the Markov blanket > ### Aliases: mb > ### Keywords: utilities > > ### ** Examples > > ## Defining distribution and dag > dist <- list(a="gaussian", b="gaussian", c="gaussian", d="gaussian", + e="binomial", f="binomial") > dag <- matrix(c(0,1,1,0,1,0, + 0,0,1,1,0,1, + 0,0,0,0,0,0, + 0,0,0,0,0,0, + 0,0,0,0,0,1, + 0,0,0,0,0,0), nrow = 6L, ncol = 6L, byrow = TRUE) > colnames(dag) <- rownames(dag) <- names(dist) > > mb(dag, node = "b") [1] "a" "c" "d" "f" "e" > mb(dag, node = c("b","e")) [1] "a" "c" "d" "f" "e" "b" > > mb(~a|b:c:e+b|c:d:f+e|f, node = "e", data.dists = dist) [1] "a" "f" "b" "c" > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("mb", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("miData") > ### * miData > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: miData > ### Title: Empirical Estimation of the Entropy from a Table of Counts > ### Aliases: miData > ### Keywords: utilities > > ### ** Examples > > ## Generate random variable > Y <- rnorm(n = 100, mean = 0, sd = 2) > X <- rnorm(n = 100, mean = 5, sd = 2) > > dist <- list(Y="gaussian", X="gaussian") > > miData(discretization(data.df = cbind(X,Y), data.dists = dist, + discretization.method = "fd", nb.states = FALSE), + method = "mi.raw") [1] 0 > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("miData", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("mostProbable") > ### * mostProbable > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: mostProbable > ### Title: Find most probable DAG structure > ### Aliases: mostProbable > ### Keywords: models > > ### ** Examples > > ############################## > ## Example 1 > ############################## > > ## This data comes with `abn` see ?ex1.dag.data > mydat <- ex1.dag.data[1:5000, c(1:7,10)] > > ## Setup distribution list for each node: > mydists <- list(b1="binomial", p1="poisson", g1="gaussian", b2="binomial", + p2="poisson", b3="binomial", g2="gaussian", g3="gaussian") > > ## Parent limits, for speed purposes quite specific here: > max.par <- list("b1"=0,"p1"=0,"g1"=1,"b2"=1,"p2"=2,"b3"=3,"g2"=3,"g3"=2) > ## Now build cache (no constraints in ban nor retain) > mycache <- buildScoreCache(data.df=mydat, data.dists=mydists, max.parents=max.par) > > ## Find the globally best DAG: > mp.dag <- mostProbable(score.cache=mycache) Step1. completed max alpha_i(S) for all i and S Total sets g(S) to be evaluated over: 256 > myres <- fitAbn(object=mp.dag,create.graph=TRUE) > myres$mlik [1] -38274.7 > plot(myres) # plot the best model > > ## last line is essentially equivalent to: > # plotAbn(dag=mp.dag$dag, data.dists=mydists, fitted.values=myres$modes) > > ## Fit the known true DAG (up to variables 'b4' and 'b5'): > true.dag <- matrix(data=0, ncol=8, nrow=8) > colnames(true.dag) <- rownames(true.dag) <- names(mydists) > > true.dag["p2",c("b1","p1")] <- 1 > true.dag["b3",c("b1","g1","b2")] <- 1 > true.dag["g2",c("p1","g1","b2")] <- 1 > true.dag["g3",c("g1","b2")] <- 1 > > fitAbn(dag=true.dag, data.df=mydat, data.dists=mydists)$mlik [1] -38274.7 > > ## Not run: > ##D ################################################################# > ##D ## Example 2 - models with random effects > ##D ################################################################# > ##D > ##D ## This data comes with abn see ?ex3.dag.data > ##D # mydat <- ex3.dag.data[,c(1:4,14)] > ##D # mydists <- list(b1="binomial", b2="binomial", b3="binomial", b4="binomial") > ##D > ##D ## This takes a few seconds and requires INLA: > ##D # mycache.mixed <- buildScoreCache(data.df=mydat, data.dists=mydists, > ##D # group.var="group", cor.vars=c("b1","b2","b3","b4"), > ##D # max.parents=2, which.nodes=c(1:4)) > ##D > ##D ## Find the most probable DAG: > ##D # mp.dag <- mostProbable(score.cache=mycache.mixed) > ##D > ##D ## and get goodness of fit: > ##D # fitAbn(object=mp.dag, data.df=mydat, data.dists=mydists, > ##D # group.var="group", cor.vars=c("b1","b2","b3","b4"))$mlik > ## End(Not run) > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("mostProbable", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("plot.abnDag") > ### * plot.abnDag > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: plot.abnDag > ### Title: Plots DAG from an object of class 'abnDag' > ### Aliases: plot.abnDag > > ### ** Examples > > mydag <- createAbnDag(dag = ~a+b|a, data.df = data.frame("a"=1, "b"=1)) > plot(mydag) dev.new(): using pdf(file="Rplots2.pdf") > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("plot.abnDag", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("plotAbn") > ### * plotAbn > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: plotAbn > ### Title: Plot an ABN graphic > ### Aliases: plotAbn > ### Keywords: hplot models > > ### ** Examples > > #Define distribution list > dist <- list(a="gaussian", b="gaussian", c="gaussian", d="gaussian", e="binomial", f="binomial") > > #Define a matrix formulation > edge.strength <- matrix(c(0,0.5,0.5,0.7,0.1,0, + 0,0,0.3,0.1,0,0.8, + 0,0,0,0.35,0.66,0, + 0,0,0,0,0.9,0, + 0,0,0,0,0,0.8, + 0,0,0,0,0,0),nrow = 6L, ncol = 6L, byrow = TRUE) > > ## Naming of the matrix > colnames(edge.strength) <- rownames(edge.strength) <- names(dist) > > ## Plot form a matrix > plotAbn(dag = edge.strength, data.dists = dist) > > ## Edge strength > plotAbn(dag = ~a|b:c:d:e+b|c:d:f+c|d:e+d|e+e|f, data.dists = dist, edge.strength = edge.strength) > > ## Plot from a formula for a different DAG! > plotAbn(dag = ~a|b:c:e+b|c:d:f+e|f, data.dists = dist) > > ## Markov blanket > plotAbn(dag = ~a|b:c:e+b|c:d:f+e|f, data.dists = dist, markov.blanket.node = "e") > > ## Change col for all edges > tmp <- plotAbn(dag = ~a|b:c:e+b|c:d:f+e|f, data.dists = dist, plot=FALSE) > graph::edgeRenderInfo(tmp) <- list(col="blue") > Rgraphviz::renderGraph(tmp) > > ## Change lty for individual ones. Named vector is necessary > tmp <- plotAbn(dag = ~a|b:c:e+b|c:d:f+e|f, data.dists = dist, plot=FALSE) > edgelty <- rep(c("solid","dotted"), c(6,1)) > names(edgelty) <- names( graph::edgeRenderInfo(tmp, "col")) > graph::edgeRenderInfo(tmp) <- list(lty=edgelty) > Rgraphviz::renderGraph(tmp) > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("plotAbn", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("pois_bugs") > ### * pois_bugs > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: pois_bugs > ### Title: Bugs code for Poisson response > ### Aliases: pois_bugs pois_bugsGroup > ### Keywords: internal utilities > > ### ** Examples > > pois_bugs(nodename = "a", + parentnames = c("b", "c"), + nodesintercept = c(0.318077), + parentcoefs = list("b"=c(b=0.3059395), + "c"=c(c=0.5555))) a ~ dpois(lambda.a) # Poisson response log(lambda.a) <- 0.318077 + 0.3059395*b + 0.5555*c # logistic regression > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("pois_bugs", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("print.abnCache") > ### * print.abnCache > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: print.abnCache > ### Title: Print objects of class 'abnCache' > ### Aliases: print.abnCache > > ### ** Examples > > ## Subset of the build-in dataset, see ?ex0.dag.data > mydat <- ex0.dag.data[,c("b1","b2","g1","g2","b3","g3")] ## take a subset of cols > > ## setup distribution list for each node > mydists <- list(b1="binomial", b2="binomial", g1="gaussian", + g2="gaussian", b3="binomial", g3="gaussian") > > # Structural constraints > # ban arc from b2 to b1 > # always retain arc from g2 to g1 > > ## parent limits > max.par <- list("b1"=2, "b2"=2, "g1"=2, "g2"=2, "b3"=2, "g3"=2) > > ## now build the cache of pre-computed scores accordingly to the structural constraints > > res.c <- buildScoreCache(data.df=mydat, data.dists=mydists, + dag.banned= ~b1|b2, dag.retained= ~g1|g2, max.parents=max.par) > print(res.c) Number of nodes in the network: 6. Distribution of the marginal likelihood: Min. 1st Qu. Median Mean 3rd Qu. Max. -455 -454 -225 -328 -223 -213 > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("print.abnCache", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("print.abnDag") > ### * print.abnDag > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: print.abnDag > ### Title: Print objects of class 'abnDag' > ### Aliases: print.abnDag > > ### ** Examples > > mydag <- createAbnDag(dag = ~a+b|a, data.df = data.frame("a"=1, "b"=1)) > print(mydag) a b a 0 0 b 1 0 Class 'abnDag'. > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("print.abnDag", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("scoreContribution") > ### * scoreContribution > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: scoreContribution > ### Title: Compute the score's contribution in a network of each > ### observation. > ### Aliases: scoreContribution > ### Keywords: utilities > > ### ** Examples > > ## Use a subset of a built-in simulated data set > mydat <- ex1.dag.data[,c("b1","g1","p1")] > > ## setup distribution list for each node > mydists <- list(b1="binomial", g1="gaussian", p1="poisson") > > ## now build cache > mycache <- buildScoreCache(data.df = mydat, data.dists = mydists, max.parents = 2, method = "mle") > > ## Find the globally best DAG > mp.dag <- mostProbable(score.cache=mycache, score="bic", verbose = FALSE) > > out <- scoreContribution(object = mp.dag) > > ## Observations contribution per network node > boxplot(out$bic) > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("scoreContribution", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("searchHeuristic") > ### * searchHeuristic > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: searchHeuristic > ### Title: A family of heuristic algorithms that aims at finding high > ### scoring directed acyclic graphs > ### Aliases: searchHeuristic > > ### ** Examples > > ## Not run: > ##D ############################################## > ##D ## example: use built-in simulated data set > ##D ############################################## > ##D > ##D mydat <- ex1.dag.data ## this data comes with abn see ?ex1.dag.data > ##D > ##D ## setup distribution list for each node > ##D mydists<-list(b1="binomial", p1="poisson", g1="gaussian", b2="binomial", > ##D p2="poisson", b3="binomial", g2="gaussian", b4="binomial", > ##D b5="binomial", g3="gaussian") > ##D > ##D mycache <- buildScoreCache(data.df = mydat, data.dists = mydists, max.parents = 2) > ##D > ##D ## Now peform 10 greedy searches > ##D heur.res <- searchHeuristic(score.cache = mycache, data.dists = mydists, > ##D start.dag = "random", num.searches = 10, > ##D max.steps = 50) > ##D > ##D ## Plot (one) dag > ##D plotAbn(heur.res$dags[[1]], data.dists = mydists) > ## End(Not run) > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("searchHeuristic", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("simulateAbn") > ### * simulateAbn > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: simulateAbn > ### Title: Simulate data from a fitted additive Bayesian network. > ### Aliases: simulateAbn > > ### ** Examples > > df <- FCV[, c(12:15)] > mydists <- list(Outdoor="binomial", + Sex="multinomial", + GroupSize="poisson", + Age="gaussian") > > ## buildScoreCache -> mostProbable() -> fitAbn() > suppressWarnings({ + mycache.mle <- buildScoreCache(data.df = df, data.dists = mydists, method = "mle", + adj.vars = NULL, cor.vars = NULL, + dag.banned = NULL, dag.retained = NULL, + max.parents = 1, + which.nodes = NULL, defn.res = NULL) + }) # ignore non-convergence warnings > mp.dag.mle <- mostProbable(score.cache = mycache.mle, verbose = FALSE) > myres.mle <- fitAbn(object = mp.dag.mle, method = "mle") > > myres.sim <- simulateAbn(object = myres.mle, + run.simulation = TRUE, + bugsfile = NULL, + verbose = FALSE) > str(myres.sim) 'data.frame': 1000 obs. of 4 variables: $ Age : num -0.796 -0.6201 0.0929 -0.1999 -0.9388 ... $ GroupSize: num 4 4 2 5 3 3 3 2 3 4 ... $ Outdoor : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 2 2 ... $ Sex : Factor w/ 4 levels "1","2","3","4": 3 1 4 1 3 1 4 2 3 1 ... > prop.table(table(myres.sim$Outdoor)) 0 1 0.4 0.6 > prop.table(table(df$Outdoor)) 0 1 0.4233333 0.5766667 > > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("simulateAbn", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("simulateDag") > ### * simulateDag > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: simulateDag > ### Title: Simulate a DAG with with arbitrary arcs density > ### Aliases: simulateDag > ### Keywords: utilities > > ### ** Examples > > simdag <- simulateDag(node.name = c("a", "b", "c", "d"), + edge.density = 0.5, + data.dists = list(a = "gaussian", + b = "binomial", + c = "poisson", + d = "multinomial")) > > ## Example using Ozon entries: > dist <- list(Ozone="gaussian", Solar.R="gaussian", Wind="gaussian", + Temp="gaussian", Month="gaussian", Day="gaussian") > out <- simulateDag(node.name = names(dist), data.dists = dist, edge.density = 0.8) > plot(out) dev.new(): using pdf(file="Rplots3.pdf") > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("simulateDag", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("summary.abnDag") > ### * summary.abnDag > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: summary.abnDag > ### Title: Prints summary statistics from an object of class 'abnDag' > ### Aliases: summary.abnDag > > ### ** Examples > > mydag <- createAbnDag(dag = ~a+b|a, data.df = data.frame("a"=1, "b"=1)) > summary(mydag) $n.nodes [1] 2 $n.arcs [1] 1 $mb.average [1] 1 $nh.average [1] 1 $parent.average [1] 0.5 $children.average [1] 0.5 > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("summary.abnDag", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("toGraphviz") > ### * toGraphviz > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: toGraphviz > ### Title: Convert a DAG into graphviz format > ### Aliases: toGraphviz > > ### ** Examples > > ## On a typical linux system the following code constructs a nice > ## looking pdf file 'graph.pdf'. > ## Not run: > ##D ## Subset of a build-in dataset > ##D mydat <- ex0.dag.data[,c("b1","b2","b3","g1","b4","p2","p4")] > ##D > ##D ## setup distribution list for each node > ##D mydists <- list(b1="binomial", b2="binomial", b3="binomial", > ##D g1="gaussian", b4="binomial", p2="poisson", > ##D p4="poisson") > ##D ## specify DAG model > ##D mydag <- matrix(c( 0,1,0,0,1,0,0, # > ##D 0,0,0,0,0,0,0, # > ##D 0,1,0,0,1,0,0, # > ##D 1,0,0,0,0,0,1, # > ##D 0,0,0,0,0,0,0, # > ##D 0,0,0,1,0,0,0, # > ##D 0,0,0,0,1,0,0 # > ##D ), byrow=TRUE, ncol=7) > ##D > ##D colnames(mydag) <- rownames(mydag) <- names(mydat) > ##D > ##D ## create file for processing with graphviz > ##D outfile <- paste(tempdir(), "graph.dot", sep="/") > ##D toGraphviz(dag=mydag, data.df=mydat, data.dists=mydists, outfile=outfile) > ##D ## and then process using graphviz tools e.g. on linux > ##D # system(paste( "dot -Tpdf -o graph.pdf", outfile)) > ##D # system("evince graph.pdf") > ##D > ##D ## Example using data with a group variable where b1<-b2 > ##D mydag <- matrix(c(0,1, 0,0), byrow=TRUE, ncol=2) > ##D > ##D colnames(mydag) <- rownames(mydag) <- names(ex3.dag.data[,c(1,2)]) > ##D ## specific distributions > ##D mydists <- list(b1="binomial", b2="binomial") > ##D > ##D ## create file for processing with graphviz > ##D outfile <- paste0(tempdir(), "/graph.dot") > ##D toGraphviz(dag=mydag, data.df=ex3.dag.data[,c(1,2,14)], data.dists=mydists, > ##D group.var="group", > ##D outfile=outfile, directed=FALSE) > ##D ## and then process using graphviz tools e.g. on linux: > ##D # pdffile <- paste0(tempdir(), "/graph.pdf")" > ##D # system(paste("dot -Tpdf -o ", pdffile, outfile)) > ##D # system(paste("evince ", pdffile, " &") ## or some other viewer > ## End(Not run) > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("toGraphviz", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > cleanEx() > nameEx("var33") > ### * var33 > > flush(stderr()); flush(stdout()) > > base::assign(".ptime", proc.time(), pos = "CheckExEnv") > ### Name: var33 > ### Title: simulated dataset from a DAG comprising of 33 variables > ### Aliases: var33 > ### Keywords: datasets > > ### ** Examples > > ## Constructing the DAG of the dataset: > dag33 <- matrix(0, 33, 33) > dag33[2,1] <- 1 > dag33[4,3] <- 1 > dag33[6,4] <- 1; dag33[6,7] <- 1 > dag33[5,6] <- 1 > dag33[7,8] <- 1 > dag33[8,9] <- 1 > dag33[9,10] <- 1 > dag33[11,10] <- 1; dag33[11,12] <- 1; dag33[11,19] <- 1; > dag33[14,13] <- 1 > dag33[17,16] <- 1; dag33[17,20] <- 1 > dag33[15,14] <- 1; dag33[15,21] <- 1 > dag33[18,20] <- 1 > dag33[19,20] <- 1 > dag33[21,20] <- 1 > dag33[22,21] <- 1 > dag33[23,21] <- 1 > dag33[24,23] <- 1 > dag33[25,23] <- 1; dag33[25,26] <- 1 > dag33[26,20] <- 1 > dag33[33,31] <- 1 > dag33[33,31] <- 1 > dag33[32,21] <- 1; dag33[32,31] <- 1; dag33[32,29] <- 1 > dag33[30,29] <- 1 > dag33[28,27] <- 1; dag33[28,29] <- 1; dag33[28,31] <- 1 > > > > > base::assign(".dptime", (proc.time() - get(".ptime", pos = "CheckExEnv")), pos = "CheckExEnv") > base::cat("var33", base::get(".format_ptime", pos = 'CheckExEnv')(get(".dptime", pos = "CheckExEnv")), "\n", file=base::get(".ExTimings", pos = 'CheckExEnv'), append=TRUE, sep="\t") > ### *