test_that('nfold must be valid', { set.seed(123) y <- rnorm(100) x <- rnorm(100) expect_error(cv.grpsel(x, y, nfold = 1)) expect_error(cv.grpsel(x, y, nfold = 101)) }) test_that('length of folds must match sample size', { set.seed(123) y <- rnorm(100) x <- rnorm(100) folds <- sample(2, 101, T) expect_error(cv.grpsel(x, y, folds = folds)) }) test_that('cross-validation leads to correct subset under square loss', { set.seed(123) group <- rep(1:5, each = 2) x <- matrix(rnorm(100 * 10), 100, 10) y <- rnorm(100, rowSums(x[, which(group %in% 1:2)])) fit <- cv.grpsel(x, y, group, eps = 1e-15) beta <- as.numeric(coef(fit)) beta.target <- rep(0, 11) beta.target[c(T, group %in% 1:2)] <- as.numeric(glm(y ~ x[, group %in% 1:2])$coef) expect_equal(beta, beta.target) }) test_that('cross-validation leads to correct subset under logistic loss', { set.seed(123) group <- rep(1:5, each = 2) x <- matrix(rnorm(100 * 10), 100, 10) y <- rbinom(100, 1, 1 / (1 + exp(- 2 * rowSums(x[, which(group %in% 1:2)])))) fit <- cv.grpsel(x, y, group, loss = 'logistic', eps = 1e-15) beta <- as.numeric(coef(fit)) beta.target <- rep(0, 11) beta.target[c(T, group %in% 1:2)] <- as.numeric(glm(y ~ x[, group %in% 1:2], 'binomial')$coef) expect_equal(beta, beta.target) }) test_that('cross-validation works when folds are manually supplied', { set.seed(123) group <- rep(1:5, each = 2) x <- matrix(rnorm(100 * 10), 100, 10) y <- rnorm(100, rowSums(x[, which(group %in% 1:2)])) folds <- sample(5, 100, T) fit <- cv.grpsel(x, y, group, eps = 1e-15, folds = folds) beta <- as.numeric(coef(fit)) beta.target <- rep(0, 11) beta.target[c(T, group %in% 1:2)] <- as.numeric(glm(y ~ x[, group %in% 1:2])$coef) expect_equal(beta, beta.target) }) test_that('coefficients are extracted correctly', { set.seed(123) x <- matrix(rnorm(100 * 10), 100, 10) y <- rnorm(100, rowSums(x)) fit <- cv.grpsel(x, y, eps = 1e-15) fit.target <- glm(y ~ x) beta <- coef(fit) beta.target <- as.matrix(as.numeric(coef(fit.target))) expect_equal(beta, beta.target) }) test_that('predictions are computed correctly', { set.seed(123) x <- matrix(rnorm(100 * 10), 100, 10) y <- rnorm(100, rowSums(x)) fit <- cv.grpsel(x, y, eps = 1e-15) fit.target <- glm(y ~ x) yhat <- predict(fit, x) yhat.target <- as.matrix(as.numeric(predict(fit.target, as.data.frame(x)))) expect_equal(yhat, yhat.target) }) test_that('plot function returns a plot', { set.seed(123) x <- matrix(rnorm(100 * 10), 100, 10) y <- rnorm(100) fit <- cv.grpsel(x, y) p <- plot(fit) expect_s3_class(p, 'ggplot') }) test_that('sequential and parallel cross-validation produce same output', { set.seed(123) x <- matrix(rnorm(100 * 10), 100, 10) y <- rnorm(100) folds <- rep(1:10, each = 10) fit.seq <- cv.grpsel(x, y, folds = folds) cl <- parallel::makeCluster(2) fit.par <- cv.grpsel(x, y, folds = folds, cluster = cl) parallel::stopCluster(cl) expect_equal(fit.seq, fit.par) })