x = matrix(c(1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,0,1,0,0,0,0,1,0,1,0,1,1,0,1,1,1,0,1,1,0,1,1,1,1,1, 2,4.7,9.9,3,9.9,7.5,2.9,2.6,2.4,2.8,3.7,0.6,9.8 ,0.7,5.8,6.3,6.2,5.1,8.1,4,3.2,1.6,7.9,8.9,4.6,2.2,7.3,8.5,3.1,4.6,4.7,4.2,9.6 ,3,3.1,9.6,9,2.6,1.5,1.9,4.7,6.3,6.1,7.2,1,6.4,7.1,1.6,2.9,8.1,2,1,9,2.4,4.9,0.4,6.8,1,8.4,7.8,0.2,4.5,9.5,7.8,8,9.5 ,0.8,6.9,7.6,6.2,7.3,3.7,3.7,6.2,9.1 ,6.9,5.5,0.7,7.9,8.1 ,5.1,5.7,5.6,5,2.6,3.7,5.2,7.3,8.1,2.7,2.8,8,4.2,3.7,1.7,0.3,6.8,8.3 ,9.1 ,1.5,5.6,0.2,4,6.7,0.8,8.3 ,7.4,2.8,3.2,7.9,4.9,6.1,3.2,0.2,2.7,4.5,8.9,4.5,5.2,4.3,4.3,3.6,8.8 ,4.9,3.4,9.4,9.5,9.9,4.5,9,0.2,7.1,1.6,6.3,2.6,4.8,1.7,8.9,4.2,4.3,6.3,2.5,4.1,0.2,7.8,6.7,4.8,3.9,1,7.2,1.4,2.2,3.1,4.9,7.3,2,1.5,9.1,3.3,1.4,8.8 ,7.4,8.3 ,4.2,7.7,7.8,4.5,6.9,7.6,7.8,2.8,0,0.3,1,7.9,1.8,9.5,8.6,7.1,6.7,4.6,5.6,8.9,7.3,7.6,8,4.1,6.7,9.1,0.6,3.4,2.6,5.6,2.8,5.5,8.9,4.1,9.8 ,2.4,10,2.2,7.9,6.3,3.2,4.4,1.3,1.9,2.1,6.9,5.5,2.5,2.9,2,6.6,6.9,3.3,8.5, 1.1,6.6,9.8 ,3.7,7.8,7.8,7,4.5,0.2,4.4,7.5,5.5,9,7.7,9.6 ,6.2,7.1,4.5,6.7,9.3 ,5.7,2.8,6.3,6,2,9.6 ,3.6,8.1,0.5,0.6,9,5.9,2.2,0.4,3,3.2,2.6,6.3,4.4,0.1,9.3 ,8.6 ,4.2,6.9,4.4,6.6,7.9,7.8,2.8,6.7,4.8,2.1,0.2,4,2.7,8.1 ,0.3,3.8,1.7,8.1,4.2,8.9,4,9.6,5.4,1.6,3.3,8.6,4.7,6.4,3.6,0.8,0.1,6.6,10,7,8.6,9.4,1.6,5.3,1.8,5.8,5.3,3.9,5.8,6.9,5.7,8.1 ,7.7,4.8,2.9,8.9,4.6,6.3,5.9,0.3,4.7,4.6,1.4,2.8,3.4,9.3 ,3.1,6.4,3.1,4,2.1,3.6,4,5.7,9.6,0.9,2.2,6.4,6.3,8.4,3.6,0.1,3.8, 8.6,5.9,5.1,0.9,9.9,3.5,4.7,6.3,3.8,0.7,9.6,2.3,4,8,8.8 ,1.9,2.4,2.7,8.9,0.8,1.7,4.8,2.1,4.2,2,5.2,2,6.7,8.1,9.9,7.4,6.7,4.6,0.8,2.9,6.8,8.3 ,8.4,3.3,2.8,8.4,7.7,8.1 ,2.9,4.4,3.4,5.5,3,3.3,4.5,3.9,5.8,4.7,9.4,2.1,3.8,3.8,0.9,8.9,4.7,1.6,8,8.1 ,3.9,3.3,2.9,7.7,3.3,5.1,2,3.6,4,9.4,6.1,2.3,4,6.6,9.1 ,9.8 ,8.6,9.6,9,3.4,3.6,6,1.7,2,5.3,8.6 ,9.1,3,7.4,9.5,8.6,1.3,0.4,9.6,4.3,9.6,1.5,5.7,2.3,7.9,1.3,0.8,9.9,6.4,8.6 ,7,8.1,7.2,4.6,1.1,0.3,5,2.5,4.6,4.5,7.8,1.6,1,2.4,4.6,6,9,6.2,4.5,0.8,4.8,1.9,4.4,2.2,9.6,8.3 ,2.9,7.4,1.2,2,2.8,6.2,5.3,3.3,8.3 ,0.6,1.9,8.4,6.3,9.1 ,3.5,8.8 ,5.7,5.1,6.5,8,0.5,3.3,7.7,1.8,2.3,4.6,5.3,5.5,3.9,5.1,3.6,5,5,2.8,7.5,7.7,1.4,2.6,1.7,1.7,6.3,7.7,4.2,4,4.2,0.6,9.9,5.4,7.3,6,6.2,9.4,5.8,1.6,7.8,9.6,9.6 ,9.4,9.6 ,8.1 ,0.8,9.8 ,9.3 ,3.2,7,6.9,3.4,3.5,1.7,5.9,7.4,3.1,3.4,4.8,1.7,2.8,9.1,3.7,7.6,1.8,4.7,10,8.1 ,7.9,8.9,4.3,9.1 ,3.5,5,7.8,8,2,2.9,3.5,0.8,6.6,8.4,1,7,7.3,7.2,6.4,3.7,1.5,8.6,3.1,6,0.9,9.9,5,0.5,6,1.7,7.9,3.5,0.9,1.6,1.4,4.3,2.7,7.7,9.9,8.5,3.9,7.9,5.4,0.2,9.8 ,4,5.1,4.4,8.1 ,6.6,0.3,4.5,8.3 ,6.3,6.7,9.3 ,5.8,8.3 ,8.3 ,3.5,10,7.4,1.9,0.3,5,0,10,5,6.9,6.6,4.3,9.6,7.3,8,9.8 ,3.6,3.2,2.9,6.2,6.9, 4.8,0.6,2.3,9.4,1.4,3.4,5.2,4.6,4.9,2.1,8.5,4.4,9.4,6.4,8.5,6.5,6,5.1,5.2,9.8 ,4,0.1,7.5,1.5,2.3,0.8,7.2,0.3,6.5,3.2,6.1,2.9,2.1,1.3,8.6 ,9.4,5,2.5,8.9,1.6,4.8,5.4,0.1,4.9,8.1,9.9,0.1,1,9.5,3.3,7.6,8.1,7.6,6.3,2.9,4.7,3.1,0.8,7.1,0.6,6.2,4.9,8,1.7,6,5.6,2.3,8.1,7.7,9.1,1.5,3.5,6.2,3.6,5,8.6,10,2.6,3.4,2.4,0.6,6.3,0.8,3.2,5.6,8,5.3,5.8,8.4,4,6.3,4.2,5.4,8.3 ,5.4,9.9,7.4,8.4,8.9,2.5,7.6,2,3.3,0.4,1.8,4.1,1.8,4,1.3,4.3,4.7,0.5,7.4,4.7,10,9.9,4.1,4.8,6.1,7.9,0.3,6.6,9.6 ,3.7,2.8,7.2,8.1,8,8.1 ,5.1,1.3,1.6,3.7,0.8,4.8,8.6,3,0.4,1.4,1.5,6.2,1.3,4.4,4.2,9.3 ,5.8,6.8,0.6,8.3 ,5.6,5.8,2.5,2.3,1, 2.3,1,2.3,9.6,7.6,4.6,6,3.9,5.9,6.2,3,3.1,6.8,6.6,9.9,1.4,6.9,3.7,3.5,4.6,1.7,9,2.1,5.6,4.4,7,0.5,7.5,9.6 ,2.6,9.3 ,8.8 ,9.3 ,2.8,8.6,3.8,6.4,3.3,2.8,6.8,5.5,2.3,5.4,8.1 ,1.2,6.1,9,0.2,0.7,6.9,8.6,2.1,9,7.7,9.1 ,3.2,8.9,6.7,5.3,0.6,4.2,8.5,4.1,3.3,6.9,4.2,3.9,4,8,6.2,5,3.6,0.9,6.7,9.1,9.4,3.6,3.9,4.4,8.3 ,8,0.2,6.8,2.8,4,7.2,9.6 ,1.5,3.5,5.3,4.8,9.6 ,8.1,2.2,2.6,7.1,8.1 ,2.1,1.3,7,2.2,5,2.2,4.5,1.9,5,1.3,2.3,8.6,9.8 ,9.6,5,2.3,7,2.6,3.9,1.6,1.3,9.3 ,9.6 ,4.5,2.4,0.5,2.7,1.4,5.3,5.5,8.6,4.1,2,1.2,1.5,2.6,7.4,6,3.1,8,2.3,0.9,8.9,5.9,7.4,2.9,3.7,5.7,9.8 ,7.4,1.1,9.6 ,8.6 ,5.4,6.9, 2.8,1.6,2.5,9.9,4.5,9.5,3.1,6.6,9.5,2.6,8.1 ,6.7,8.6,8.1,2.5,4.9,1,9.5,5.9,9.3 ,0.4,0.4,3.8,7,9.5,6.8,7,1.6,7.9,4.4,9,5.4,6.5,4.3,8.6 ,0.9,4.8,6.2,8.6,8.5,9.1 ,4.3,9.4,3.3,9.1 ,6.2,5,7.6,7.3,2.4,0.1,0.2,2.8,6.1,6.5,2.5,7.8,5.5,0.4,7.7,2.6,1.6,8.6 ,5.1,3.6,2.8,0.1,4.4,9.5,1.5,0.1,4.8,0.9,7.3,0.1,6.2,1,0.6,7.8,3.2,1.8,9.4,4.2,5.5,4.3,8.8 ,2.6,0.4,7.8,6.2,3.2,9.3 ,5.7,0.9,6,7.6,5.5,4.3,9.6,6.4,6.1,0.7,8.1,5.8,2.3,5.3,9.3 ,5.5,6,0.3,1.5,7.2,7.7,7.2,5.9,3.2,7.3,3.3,0.8,4.8,0.2,9.6 ,6.6,3.3,8.4,0.8,9.1,9.9,1.4,2.5,0.6,6.7,1,1.4,7.1,1.1,1.3,8.5,3.1,9,9.5,7.1,4.5,7.4,4.7,5.9,1.6,8.6,5, 2.8,0.3,2.2,3.4,4.8,6.2,6.4,4.1,4.8,7.4,1.5,6.2,9.4,4.1,3.5,9.6 ,4.8,6.3,6.3,0,4.8,1.8,8.9,0.1,1.8,5.3,3.1,5.2,9.1,8.6,2.4,9.1,0.5,4.9,8.6,2.2,8.4,3.3,6.2,5.1,0.7,7,6.6,1.1,1.6,2.7,0.2,3,1,7.7,9.9,9.6 ,5.3,8.6,0.2,1,1.4,2.1,1.6,9.4,3.1,1.2,5,6.1,5,2.4,5.8,4.5,5.5,1.2,1.3,1.5,0.1,1.2,9.3 ,7.9,5.3,7.8,8.5,9.1,5.2,0.1,4.2,1.4,8.6 ,2.7,3.8,1.9,4.7,7.6,0.8,6.3,1.7,6.7,2,9.5,2, 0.4,6.1,9.1 ,7,5.7,7.5,9.8 ,6.4,8.5,9.6,4.2,9.3 ,2.3,5.7,3.7) , 40,31) colnames(x) <- paste0("V", c(1:ncol(x))) dx <- as.data.frame(x) test_that("Discrete choice (binary) estimation works", { for (linkFunc in c("logit", "probit")){ res <- estim.bin(data = get.data(x[,1:4]), linkFunc = linkFunc) resR <- glm(V1 ~ V2 + V3 + V4, data = dx, family = binomial(link = linkFunc)) expect_equal(as.numeric(resR$coefficients), as.numeric(res$estimations$gamma), tolerance = 1e-5) sum_resR=summary(resR) expect_equal(as.numeric(sum_resR$cov.scaled), as.numeric(res$estimations$gammaVar), tolerance = 1e-1) expect_equal(as.numeric(resid(res, pearson = FALSE)), as.numeric(resid(resR, type="response")), tolerance = 1e-5) expect_equal(as.numeric(resid(res, pearson = TRUE)), as.numeric(resid(resR, type="pearson")), tolerance = 1e-5) glm_pp <- predict(resR, type = "response") brier_score <- mean((dx$V1 - glm_pp)^2) expect_equal(brier_score, res$metrics[4,1], tolerance = 1e-1) auc <- ldt::s.roc(dx$V1, 1 - glm_pp) # 1- glm_pp because of s.roc behaviour expect_equal(auc$auc, res$metrics[5,1], tolerance = 1e-1) } }) test_that("Discrete choice (binary) estimation works with PCA", { y = as.data.frame(cbind(as.matrix(x[,1,drop=FALSE]), prcomp(x[,2:ncol(x)], scale. = TRUE)$x)) # orthogonal, will be used in R glm pcaOp = get.options.pca() pcaOp$ignoreFirst = 0 #It automatically adds and ignores intercept pcaOp$exactCount = 3 for (linkFunc in c("logit", "probit")){ res <- estim.bin(data = get.data(x[,1:ncol(x)]), linkFunc = linkFunc, pcaOptions = pcaOp) resR <- glm(V1 ~ PC1 + PC2 + PC3, data = y, family = binomial(link = linkFunc)) expect_equal(abs(as.numeric(resR$coefficients)), abs(as.numeric(res$estimations$gamma)), tolerance = 1e-4) # abs? they are related to PC sum_resR=summary(resR) expect_equal(as.numeric(sum_resR$cov.scaled), as.numeric(res$estimations$gammaVar), tolerance = 1e-1) } }) test_that("Discrete choice (binary, weighted) estimation works", { for (linkFunc in c("logit", "probit")){ res <- estim.bin(data = get.data(x[,1:4], weights = x[,7]), linkFunc = linkFunc) resR <- glm(V1 ~ V2 + V3 + V4, data = dx, weights = x[,7], family = quasibinomial(link = linkFunc)) # quasibinomial (to avoid a warning (non-integer #successes in a binomial glm!)) # see: https://stackoverflow.com/a/12954119 expect_equal(as.numeric(resR$coefficients), as.numeric(res$estimations$gamma), tolerance = 1e-4) sum_resR=summary(resR) expect_equal(as.numeric(sum_resR$cov.scaled), as.numeric(res$estimations$gammaVar), tolerance = 1e-1) } }) test_that("Discrete choice (binary, weighted) estimation works with PCA", { y = as.data.frame(cbind(as.matrix(x[,1,drop=FALSE]), prcomp(x[,2:ncol(x)], scale. = TRUE)$x)) # orthogonal, will be used in R glm pcaOp = get.options.pca() pcaOp$ignoreFirst = 0 pcaOp$exactCount = 3 for (linkFunc in c("logit", "probit")){ res <- estim.bin(data = get.data(x[,1:ncol(x)], weights = x[,7]), linkFunc, pcaOptions = pcaOp) resR <- glm(V1 ~ PC1 + PC2 + PC3, data = y, weights = x[,7,drop = FALSE], family = quasibinomial(link = linkFunc)) expect_equal(abs(as.numeric(resR$coefficients)), abs(as.numeric(res$estimations$gamma)), tolerance = 1e-5) # abs? they are related to PCs sum_resR=summary(resR) expect_equal(as.numeric(sum_resR$cov.scaled), as.numeric(res$estimations$gammaVar), tolerance = 1e-1) } }) test_that("Discrete choice (binary, weighted, logL) estimation works", { y0 = x[,1,drop=FALSE] w0 = x[,7,drop=FALSE] Z0 = x[,3:5] y=as.matrix(append(y0, c(1,1,1,1))) colnames(y) <- "V1" w=as.matrix(append(w0, c(1,1,1,1))) m=matrix(c(3,3,3,3,5,5,5,5,6,6,6,6),4,3) colnames(m)<-colnames(Z0) Z=rbind(Z0,m) yw=as.matrix(append(y0,c(1))) # remove the last 4 observations and add a weight of 4 colnames(yw) <- "V1" ww = as.matrix(append(w0,c(4))) m=matrix(c(3,5,6),1,3) colnames(m)<-colnames(Z0) Zw = rbind(Z0,m) for (linkFunc in c("logit", "probit")){ res <- estim.bin(data = get.data(cbind(y, Z), weights = w), linkFunc) resw <- estim.bin(data = get.data(cbind(yw, Zw), weights = ww), linkFunc) expect_equal(res$estimations$gamma, resw$estimations$gamma) expect_equal(res$estimations$gammaVar, resw$estimations$gammaVar) expect_equal(res$metrics[1,1], resw$metrics[1,1]) # while LogL is the same, aic and sic are different # this is because I use the number of observations in the weighted case (instead of e.g., sum of weights) # TODO : I don't know if it is OK to use the sum of weights here! # the following fails: #expect_equal(res$metrics[2,1], resw$metrics[2,1]) } }) test_that("Discrete choice (binary, weighted) projection works", { for (linkFunc in c("logit", "probit")){ res <- estim.bin(data = get.data(x[,1:4], newData = x[c(6,7),]), linkFunc = linkFunc) resR <- glm(V1 ~ V2 + V3 + V4, data = dx, family = binomial(link = linkFunc)) resRp = predict(resR, newdata = as.data.frame(x[c(6,7),]), type="response") expect_equal(resRp[[1]], res$projection[[1,2]], tolerance = 1e-5) expect_equal(resRp[[2]], res$projection[[2,2]], tolerance = 1e-5) } }) test_that("Discrete choice (binary, weighted) projection works with PCA", { pr=prcomp(x[,2:ncol(x)], scale. = TRUE) y = as.data.frame(cbind(as.matrix(x[,1,drop=FALSE]), pr$x)) # orthogonal, will be used in R glm pcaOp = get.options.pca() pcaOp$ignoreFirst = 0 pcaOp$exactCount = 3 newX <- x[6:10,2:ncol(x)] projY <- as.data.frame(predict(pr, newdata = newX )) for (linkFunc in c("logit", "probit")){ res <- estim.bin(data = get.data(x[,1:ncol(x)], weights = x[,7, drop=FALSE], newData = newX), linkFunc, pcaOptions = pcaOp) resR <- glm(V1 ~ PC1 + PC2 + PC3, data = y, weights = x[,7], family = quasibinomial(link = linkFunc)) resRp = predict(resR, newdata = projY, type="response") expect_equal(resRp[[1]], res$projection[[1,2]], tolerance = 1e-5) expect_equal(resRp[[2]], res$projection[[2,2]], tolerance = 1e-5) } }) test_that("Discrete choice (binary) projection works", { for (linkFunc in c("logit", "probit")){ newX <- dx[5:6,] res <- estim.bin(data = get.data(x[,1:4], newData = newX), linkFunc = linkFunc) resR <- glm(V1 ~ V2 + V3 + V4, data = dx, family = binomial(link = linkFunc)) resRp = predict(resR, newdata = newX, type="response") expect_equal(resRp[[1]], res$projection[[1,2]], tolerance = 1e-5) expect_equal(resRp[[2]], res$projection[[2,2]], tolerance = 1e-5) } }) test_that("Discrete choice (binary) scoring works", { for (linkFunc in c("logit", "probit")){ c1=matrix(c(0.5,1, 1, 0, 1, 0),2,3) # if probability is < 0.5, count as an error (1 cost) c2=matrix(c(0.5,1, 1, 0, 1, 0),2,3) # same as c1 c3=matrix(c(0.5,1, 0, 1, 0, 1),2,3) # reversed res <- estim.bin(data = get.data(x[,1:4], newData = x[c(5,6),]), linkFunc, costMatrices = list(c1,c2, c3), simFixSize = 50) expect_equal(res$simulation$costRatios[[1]],res$simulation$costRatios[[2]], tolerance = 1e-16) expect_equal(res$simulation$costRatios[[1]], 1 - res$simulation$costRatios[[3]], tolerance = 1e-14) #TODO: test the actual scores & probability frequencies } }) test_that("Discrete choice (binary) scoring works with PCA", { pcaOp = get.options.pca() pcaOp$ignoreFirst = 0 pcaOp$exactCount = 3 newX <- x[6:10,2:ncol(x)] for (linkFunc in c("logit", "probit")){ c1=matrix(c(0.5,1, 1, 0, 1, 0),2,3) # if probability is < 0.5, count as an error (1 cost) c2=matrix(c(0.5,1, 1, 0, 1, 0),2,3) # same as c1 c3=matrix(c(0.5,1, 0, 1, 0, 1),2,3) # reversed res <- estim.bin(data = get.data(x[,1:4], newData = x[c(5,6),]), linkFunc, list(c1,c2, c3), pcaOptions = pcaOp, simFixSize = 50) expect_equal(res$simulation$costRatios[[1]],res$simulation$costRatios[[2]], tolerance = 1e-16) expect_equal(res$simulation$costRatios[[1]], 1 - res$simulation$costRatios[[3]], tolerance = 1e-14) #TODO: test the actual scores & probability frequencies } }) test_that("Discrete choice search (avgCost,all) works", { skip_on_cran() c1=matrix(c(0.5,1, 1, 0, 1, 0),2,3) c2=matrix(c(0.5,1, 0, 1, 0, 1),2,3) # reversed g1 = c(1,2) g2 = c(3,4) res <- search.bin(data = get.data(x[,1:6], newData = x[c(5,6),], weights = x[,7]), combinations = get.combinations(sizes = c(1,2)), metrics = get.search.metrics(typesIn = c("aic"), typesOut = c("frequencyCost")), costMatrices = list(c1,c2), items = get.search.items(bestK = 4, all = TRUE)) sapply(res$results[which(sapply(res$results, function(r)r$typeName=="model" && r$evalName == "aic"))], function(d)d$value$metric) sumRes <- summary(res, test = TRUE) for (a in res$results[which(sapply(res$results, function(r)r$typeName=="model" && r$evalName == "frequencyCostOut"))]) expect_equal(0.5,a$value$weight, tolerance = 1e-16) #because of the structure of cost tables }) test_that("Discrete choice search (NA) works", { skip_on_cran() y0 = x[,1,drop=FALSE] w0 = x[,5,drop=FALSE] Z0 = x[,2:6] yNA=as.matrix(append(y0, c(1,1,1,1))) colnames(yNA) <- c("yNA") wNA=as.matrix(append(w0, c(1,1,1,1))) m=matrix(c(1,1,1,1, 3,3,3,3, 5,5,5,1, 4,4,4,4, 7,7,7,7),4,5)*NA colnames(m)<-colnames(Z0) ZNA=rbind(Z0,m) g1 = c(2) g2 = c(3,4) res1 <- search.bin(data = get.data(cbind(y0,Z0), weights = w0), combinations = get.combinations(sizes = c(2), partitions = list(c(1),g1,g2)), metrics = get.search.metrics(typesIn = c("aic"), typesOut = NULL), items = get.search.items(bestK = 0, all = TRUE)) res2 <- search.bin(data = get.data(cbind(yNA,ZNA), weights = wNA), combinations = get.combinations(sizes = c(2), partitions = list(c(1),g1,g2)), metrics = get.search.metrics(typesIn = c("aic"), typesOut = c()), items = get.search.items(bestK = 0, all = TRUE)) allWeights1 = sort(sapply(res1$results, function(x){x$value$metric})) allWeights2 = sort(sapply(res2$results, function(x){x$value$metric})) expect_equal(allWeights1, allWeights2) }) test_that("Discrete choice search works with inclusion weights", { skip_on_cran() res <- search.bin(data = get.data(x[,1:7], weights = x[,7]), combinations = get.combinations(sizes = c(1,2,3)), metrics = get.search.metrics(typesIn = c("aic"), typesOut = NULL), items = get.search.items(all = TRUE, inclusion = TRUE)) inclusion = matrix(0,8,2, dimnames = list(colnames(res$info$data$data)[-2], NULL)) for (m in res$results[which(sapply(res$results, function(r) r$evalName == "aic" && r$typeName == "model" && r$targetName == "V1"))]){ for (d in (m$value$endogenous)){ inclusion[d,1] = inclusion[d,1] + m$value$weight inclusion[d,2] = inclusion[d,2] + 1 } for (d in (m$value$exogenous)){ inclusion[d,1] = inclusion[d,1] + m$value$weight inclusion[d,2] = inclusion[d,2] + 1 } } inclusion[,1] = inclusion[,1]/inclusion[,2] searchInclusion = res$results[which(sapply(res$results, function(r) r$evalName == "aic" && r$targetName == "V1" && r$typeName == "inclusion"))] expect_equal(as.numeric(searchInclusion[[1]]$value), as.numeric(inclusion), tolerance = 1e-10) }) test_that("Discrete choice search works with coefficients (bests)", { skip_on_cran() res <- search.bin(data = get.data(x[,1:7]), combinations = get.combinations(sizes = c(1,2,3)), metrics = get.search.metrics(typesIn = c("aic"), typesOut = NULL), items = get.search.items(all = TRUE, type1 = TRUE, bestK = 2)) sumRes <- summary(res, test = TRUE) expect_equal(res$counts, sumRes$counts) }) test_that("Discrete choice search works with coefficients (cdfs)", { skip_on_cran() res <- search.bin(data = get.data(x[,1:7]), combinations = get.combinations(sizes = c(1,2,3)), metrics = get.search.metrics(typesIn = c("aic"), typesOut = NULL), items = get.search.items(all = TRUE, type1 = TRUE, cdfs = c(0,1))) sumRes <- summary(res, test = TRUE) sum = 0 c = 0 cc=0 i = 0 for (m in sumRes$results){ i = i + 1 if (m$evalName != "aic" || m$typeName != "model" || m$targetName != "V1") next() ind = which(rownames(m$value$estimations$coefs) == "V4") if (length(ind) == 1){ coef = m$value$estimations$gamma[ind] sd = sqrt(m$value$estimations$gammaVar[ind,ind]) w = res$results[[i]]$value$weight sum = sum + w * pnorm(0,coef,sd) # note the NORMAL dist. If t, d.o.f : nrow(y)-length(gamma) c=c+w cc=cc+1 } } cdfs = res$results[which(sapply(res$results, function(r) r$evalName == "aic" && r$targetName == "V1" && r$typeName == "cdf"))] expect_equal(cdfs[[1]]$value[4,1], sum/c, tolerance = 1e-10) expect_equal(cdfs[[1]]$value[4,2], cc, tolerance = 1e-10) }) test_that("Discrete choice search works with coefficients (extreme bounds)", { skip_on_cran() res <- search.bin(data = get.data(x[,1:7]), combinations = get.combinations(sizes = c(1,2,3)), metrics = get.search.metrics(typesIn = c("aic"), typesOut = NULL), items = get.search.items(all = TRUE, type1 = TRUE, extremeMultiplier = 2)) sumRes <- summary(res, test = TRUE) mn = Inf mx = -Inf for (m in sumRes$results){ if (m$evalName != "aic" || m$typeName != "model" || m$targetName != "V1") next() ind = which(rownames(m$value$estimations$coefs) == "V4") if (length(ind) == 1){ coef = m$value$estimations$gamma[ind] sd = sqrt(m$value$estimations$gammaVar[ind,ind]) mn = min(mn,coef-2*sd) mx = max(mx,coef+2*sd) } } extremeB = res$results[which(sapply(res$results, function(r) r$evalName == "aic" && r$targetName == "V1" && r$typeName == "extreme bound"))] expect_equal(extremeB[[1]]$value[4,1], mn, tolerance = 1e-10) expect_equal(extremeB[[1]]$value[4,2], mx, tolerance = 1e-10) }) test_that("Discrete choice search works with coefficients (mixture)", { skip_on_cran() res <- search.bin(data = get.data(x[,1:7], weights = x[,7]), combinations = get.combinations(sizes = c(1,2,3)), metrics = get.search.metrics(typesIn = c("aic"), typesOut = NULL), items = get.search.items(all = TRUE, type1 = TRUE, mixture4 = TRUE)) sumRes <- summary(res, test = TRUE) coefs = c() vars = c() weights = c() i = 0 for (m in sumRes$results){ i = i + 1 if (m$evalName != "aic" || m$typeName != "model" || m$targetName != "V1") next() ind = which(rownames(m$value$estimations$coefs) == "V4") if (length(ind) == 1){ coefs = append(coefs,m$value$estimations$gamma[ind]) vars = append(vars, m$value$estimations$gammaVar[ind,ind]) w = res$results[[i]]$value$weight weights = append(weights, w) } } mixture = res$results[which(sapply(res$results, function(r) r$evalName == "aic" && r$targetName == "V1" && r$typeName == "mixture"))] # note that we need weighted mean, variance, etc. assuming normal distribution len = length(coefs) expect_equal(mixture[[1]]$value[4,5], len) me = weighted.mean(coefs, weights) expect_equal(mixture[[1]]$value[4,1], me, tolerance = 1e-14) }) test_that("Discrete choice SplitSearch works (no subsetting)", { skip_on_cran() data = get.data(x[,1:7, drop = FALSE]) combinations = get.combinations() items = get.search.items(inclusion = TRUE #, all = TRUE , bestK = 4 # TODO: Test fails for bestK > 1. best coefficients for info>0 are not included! , type1 = TRUE , cdfs = c(0,0.3) , mixture4 = TRUE , extremeMultiplier = 2 ) metrics = get.search.metrics(c("sic", "aic")) # don't test with out-of-sample metrics. It seems we have different model with equal weights (the result change by repeating the call ?!) options = get.search.options(FALSE, reportInterval = 0 ) combinations$sizes <- c(1, 2, 3) whole = search.bin(data = data, combinations = combinations, items = items, metrics = metrics, options = options) combinations$sizes <- list(c(1, 2), c(3)) combinations$stepsNumVariables <- c(NA, NA) split = search.bin(data = data, combinations = combinations, items = items, metrics = metrics, options = options) expect_equal(whole$counts, split$counts) expect_equal(length(whole$results), length(split$results)) pastedList_w <- unlist(lapply(whole$results, function(x) paste(x[1:4], collapse = ""))) pastedList_s <- unlist(lapply(split$results, function(x) paste(x[1:4], collapse = ""))) sortedList_w <- whole$results[order(pastedList_w)] sortedList_s <- split$results[order(pastedList_s)] for (i in 1:length(sortedList_w)){ if (sortedList_s[[i]]$typeName == "mixture"){ expect_equal(sortedList_s[[i]]$value[,c(1:3,5,6)], sortedList_w[[i]]$value[,c(1:3,5,6)]) expect_equal(sortedList_s[[i]]$value[,c(4)], sortedList_w[[i]]$value[,c(4)], tolerance = 0.1) } else expect_equal(sortedList_s[[i]]$value, sortedList_w[[i]]$value) } })