#***********************IMPORTANT****************************************************** # if a multithreaded backend openBLAS for matrix multiplications # is installed on your machine, please force it to use 1 thread explicitly # library(RhpcBLASctl) # blas_set_num_threads(1) # omp_set_num_threads(1) #***********************IMPORTANT****************************************************** data.example = read.csv("https://raw.githubusercontent.com/aliaksah/EMJMCMC2016/master/supplementaries/BGNLM/abalone%20age/abalone.data",header = FALSE) data.example$MS=as.integer(data.example$V1=="M") data.example$FS=as.integer(data.example$V1=="F") data.example$V1=data.example$V9 data.example$V9 = NULL names(data.example) = c("Age","Length", "Diameter","Height","WholeWeight","ShuckedWeight","VisceraWeight","ShellWeight","Male","Femele") set.seed(040590) teid = read.csv("https://raw.githubusercontent.com/aliaksah/EMJMCMC2016/master/supplementaries/BGNLM/abalone%20age/teid.csv",sep = ";")[,2] test = data.example[teid,] data.example = data.example[-teid,] test_that("Input dataset is still roughly the same", { expect_equal(sum(test$Age), 9801) }) #specify the initial formula formula1 = as.formula(paste(colnames(test)[1],"~ 1 +",paste0(colnames(test)[-1],collapse = "+"))) #define the number or CPUs M = 2 #define the size of the simulated samples NM= 100 #define \k_{max} + 1 from the paper compmax = 16 #define treshold for preinclusion of the tree into the analysis th=(10)^(-5) #define a final treshold on the posterior marginal probability for reporting a tree thf=0.05 #specify tuning parameters of the algorithm for exploring DBRM of interest #notice that allow_offsprings=3 corresponds to the GMJMCMC runs and # g = function(x) x results=array(0,dim = c(2,100,5)) res1_seq <- suppressMessages(runemjmcmc( formula = formula1, data = data.example, estimator = estimate.gamma.cpen, estimator.args = list(data = data.example), recalc_margin = 249, save.beta = TRUE, interact = TRUE, outgraphs = FALSE, relations = c("to25", "expi", "logi", "to35", "troot", "sigmoid"), relations.prob = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1), interact.param = list( allow_offsprings = 3, mutation_rate = 250, last.mutation = 100, max.tree.size = 5, Nvars.max =15, p.allow.replace = 0.9, p.allow.tree = 0.01, p.nor = 0.9, p.and = 0.9 ), n.models = 1000, unique = TRUE, max.cpu = 4, max.cpu.glob = 4, create.table = FALSE, create.hash = TRUE, pseudo.paral = TRUE, burn.in = 100, print.freq = 0L, advanced.param = list( max.N.glob = 10L, min.N.glob = 5L, max.N = 3L, min.N = 1L, printable = FALSE ) )) test_that("runemjmcmc outputs with correct elements", { expect_named(res1_seq, c("p.post", "m.post", "s.mass")) expect_length(res1_seq[["p.post"]], 15L) expect_gt(length(res1_seq[["m.post"]]), 1000L) expect_length(res1_seq[["s.mass"]], 1L) expect_lte(mean(res1_seq[["p.post"]]), 1) expect_lte(mean(res1_seq[["m.post"]]), 1) expect_equal(res1_seq[["s.mass"]], 0) }) set.seed(2915224) res1_par <- suppressMessages(pinferunemjmcmc( n.cores = M, report.level = 0.2, num.mod.best = NM, simplify = TRUE, predict = TRUE, test.data = as.data.frame(test), link.function = g, runemjmcmc.params = list( formula = formula1, data = data.example, estimator = estimate.gamma.cpen, estimator.args = list(data = data.example), recalc_margin = 1000, save.beta = TRUE, interact = TRUE, outgraphs = FALSE, relations = c("to25", "expi", "logi", "to35", "troot", "sigmoid"), relations.prob = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1), interact.param = list( allow_offsprings = 3, mutation_rate = 250, last.mutation = 100, max.tree.size = 5, Nvars.max =15, p.allow.replace = 0.9, p.allow.tree = 0.01, p.nor = 0.9, p.and = 0.9 ), n.models = 500, unique = TRUE, max.cpu = 1L, max.cpu.glob = 1L, create.table = FALSE, create.hash = TRUE, pseudo.paral = TRUE, burn.in = 100, print.freq = 0L, advanced.param = list( max.N.glob = 10L, min.N.glob = 5L, max.N = 3L, min.N = 1L, printable = FALSE ) ) )) test_that("pinferunemjmcmc outputs with correct elements", { expect_named( res1_par, c("feat.stat", "predictions", "allposteriors", "threads.stats") ) expect_gte(ncol(res1_par[["feat.stat"]]), 2L) expect_lte(ncol(res1_par[["feat.stat"]]), 2L) expect_equal(length(res1_par[["predictions"]]), 1000L) expect_equal(length(res1_par[["allposteriors"]]), 2L) expect_gte(length(res1_par[["threads.stats"]]), 1L) expect_equal(length(res1_par[["threads.stats"]]), M) expect_equal(mean(res1_par[["predictions"]]), 9.9, tolerance = 1e-1) if (length(res1_par[["threads.stats"]]) == 5) { expect_equal( res1_par[["threads.stats"]][[1]][["cterm"]], -6573, tolerance = 1e-1 ) expect_gte(res1_par[["threads.stats"]][[2]][["preds"]][1], 6) expect_gte(res1_par[["threads.stats"]][[3]][["p.post"]][1], .7) expect_lte(res1_par[["threads.stats"]][[4]][["post.populi"]], .03) expect_equal( res1_par[["threads.stats"]][[5]][["mliks"]][1], -7762, tolerance = 1e-1 ) } }) if (interactive()) { J <- seq_len(1L) # 1L to save time, but results are basically the same for 10L for(j in J) { #specify the initial formula set.seed(j) res1 <- suppressMessages( pinferunemjmcmc( n.cores = M, report.level = 0.2, num.mod.best = NM, simplify = TRUE, predict = TRUE, test.data = as.data.frame(test), link.function = g, runemjmcmc.params = list( formula = formula1, data = data.example, estimator = estimate.gamma.cpen, estimator.args = list(data = data.example), recalc_margin = 249, save.beta = TRUE, interact = TRUE, outgraphs = FALSE, relations = c("to25", "expi", "logi", "to35", "troot", "sigmoid"), relations.prob = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1), interact.param = list( allow_offsprings = 3, mutation_rate = 250, last.mutation = 100, max.tree.size = 5, Nvars.max =15, p.allow.replace = 0.9, p.allow.tree = 0.01, p.nor = 0.9, p.and = 0.9 ), n.models = 1000, unique = TRUE, max.cpu = 4, max.cpu.glob = 4, create.table = FALSE, create.hash = TRUE, pseudo.paral = TRUE, burn.in = 100, print.freq = 0L, advanced.param = list( max.N.glob = 10L, min.N.glob = 5L, max.N = 3L, min.N = 1L, printable = FALSE ) ) ) ) results[1,j,1]= sqrt(mean((res1$threads.stats[[1]]$preds - test$Age)^2)) results[1,j,2]= sqrt(mean(abs(res1$threads.stats[[1]]$preds - test$Age))) results[1,j,3] = cor(res1$threads.stats[[1]]$preds,test$Age) results[2,j,1]= sqrt(mean((res1$predictions - test$Age)^2)) results[2,j,2]= sqrt(mean(abs(res1$predictions - test$Age))) results[2,j,3] = cor(res1$predictions,test$Age) posteriorshell_1.csv <- res1$feat.stat # replaced write.csv() #print the run's metrics and clean the results resultsrun_1.csv <- results[, j, ] # replaces write.csv() sqrt_mean_preds <- sqrt(mean((res1$predictions - test$Age)^2)) test_that(paste("Iteration", j, "results look ok"), { expect_named( res1, c("feat.stat", "predictions", "allposteriors", "threads.stats") ) expect_equal(mean(res1[["predictions"]]), 9.9, tolerance = 1e-1) expect_equal(dim(results), c(2L, 100L, 5L)) expect_equal(results[1, 1:10, 1:3][1, ], c(2.04, 1.23, 0.75), tolerance = 1e-1) expect_equal(results[2, 1:10, 1:3][1, ], c(2.01, 1.22, 0.76), tolerance = 1e-1) expect_gte(nrow(posteriorshell_1.csv), 10L) expect_lte(nrow(posteriorshell_1.csv), 15L) expect_equal(ncol(posteriorshell_1.csv), 2L) expect_equal(dim(resultsrun_1.csv), c(2L, 5L)) expect_equal(mean(resultsrun_1.csv), .80, tolerance = 1e-1) expect_equal(sqrt_mean_preds, 2.01, tolerance = 1e-1) }) } for(j in J) { tmp = resultsrun_1.csv results[1,j,1]= tmp[1,2] results[1,j,2]= tmp[1,3] results[1,j,3] = tmp[1,4] results[2,j,1]= tmp[2,2] results[2,j,2]= tmp[2,3] results[2,j,3] = tmp[2,4] } #make the joint summary of the runs, including min, max and medians of the performance metrics summary.results=array(data = NA,dim = c(2,15)) for(i in 1:2) { for(j in 1:5) { summary.results[i,(j-1)*3+1]=min(results[i,,j]) summary.results[i,(j-1)*3+2]=median(results[i,,j]) summary.results[i,(j-1)*3+3]=max(results[i,,j]) } } summary.results=as.data.frame(summary.results) test_that("summary has reasonable values", { expect_gte(mean(summary.results[, "V3"]), 1) expect_lte(mean(summary.results[, "V3"]), 2) expect_gte(mean(summary.results[, "V6"]), 0) expect_lte(mean(summary.results[, "V6"]), 1) }) featgmj = hash::hash() for (j in J) { tmpf <- res1$feat.stat for (feat in as.character(tmpf[[1]])) { if (!hash::has.key(hash = featgmj, key = feat)) { featgmj[[feat]] = as.numeric(1) } else { featgmj[[feat]] = as.numeric(featgmj[[feat]]) + 1 } } } tmp <- simplifyposteriors( X = data.example, posteriors = data.frame(hash::keys(featgmj), hash::values(featgmj)), resp = "Age" ) test_that("Final result is achieved", { expect_s3_class(tmp, "data.frame") expect_named(tmp, c("posterior", "tree")) }) }