compareImSy <- function(y, bandwidth) { n <- length(y) kernel <- function(x) ifelse(abs(x) <= 1 + 1e-15, 3 / 4 * (1 - x^2), 0) L <- max(0, min(as.integer(bandwidth * n + 1e-14), n - 1)) K <- kernel(-L:L / (bandwidth * n)) y - .kernelSmoothing(y, K) } compareImSX <- function(y, bandwidth) { n <- length(y) kernel <- function(x) ifelse(abs(x) <= 1 + 1e-15, 3 / 4 * (1 - x^2), 0) L <- max(0, min(as.integer(bandwidth * n + 1e-14), n - 1)) K <- kernel(-L:L / (bandwidth * n)) Matrix::bandSparse(n, n - 1, k = (-L + 1):L - 1, diagonals = .createImSX(n = n, K = rev(cumsum(K)))) } compareXty <- function(y, bandwidth, x = NULL) { if (is.null(x)) { x <- compareImSX(y, bandwidth) } Matrix::t(x) %*% y / length(y) } compareXtX <- function(y, bandwidth, x = NULL) { if (is.null(x)) { x <- compareImSX(y, bandwidth) } Matrix::t(x) %*% x / length(y) } # y must be of even length compareCv <- function(y, lambda, bandwidth, sd, thresh, maxit, loss = function(test, est) mean(abs(test - est))) { n <- length(y) if (n %% 2 == 1) { stop("n is odd") } V <- 2 cv <- numeric(length(bandwidth)) value <- list() for (indexBandwidth in seq(along = bandwidth)) { if (bandwidth[indexBandwidth] == Inf) { estRet <- numeric(n) for (i in 1:V) { J <- seq(i, n, V) mJ <- seq(along = y)[-J] pelt <- .pelt(obs = y[mJ], sd = sd)$est est <- numeric(n) est[1] <- pelt[1] est[mJ[-1]] <- diff(pelt) estRet[J] <- cumsum(est)[J] } cv[indexBandwidth] <- loss(test = y, est = estRet) value[[indexBandwidth]] <- NA } else { kernel <- function(x) ifelse(abs(x) <= 1 + 1e-15, 3 / 4 * (1 - x^2), 0) L <- max(0, min(as.integer(bandwidth[indexBandwidth] * n + 1e-14), n - 1)) K <- kernel(-L:L / (bandwidth[indexBandwidth] * n)) Kfold <- K Kfold[-L:L %% V == 0] <- 0 precomputed <- .prepareImSXcv(K = K, Vm1 = V - 1, modValues = -L:L %% V) precomputed$val <- precomputed$val[, 1:max(precomputed$lengths), drop = FALSE] maxRight <- max(precomputed$lengthsRight) maxLeft <- max(precomputed$lengths - 1 - precomputed$lengthsRight) vec <- numeric(length(lambda)) estRet <- matrix(0, n, length(lambda)) for (i in 1:V) { J <- seq(i, n, V) mJ <- seq(along = y)[-J] rows <- rep_len(c((V - 1):1, (V - 1):1)[1:(V - 1) + (V - i)], length(mJ)) - 1 ImSX <- Matrix::bandSparse(length(mJ), length(mJ), k = (-maxLeft + 1):maxRight, diagonals = .createImSXcv(n = length(mJ), precomputed = precomputed, maxLeft = maxLeft, maxRight = maxRight, rows = rows)) YmJ <- y YmJ[J] <- NA ImSy <- y[mJ] - .kernelSmoothingVfoldMJ(Y = YmJ, K = K, lengthMJ = length(mJ)) ret <- glmnet::glmnet(x = ImSX[, -1], y = ImSy, lambda = lambda, standardize = FALSE, intercept = FALSE) for (indexLambda in seq(along = lambda)) { fit <- numeric(n) fit[mJ] <- c(0, as.numeric(ret$beta[, indexLambda])) fit <- cumsum(fit) YmJ <- y - fit YmJ[J] <- NA smooth <- .kernelSmoothingVfoldMJ(Y = YmJ, K = K, lengthMJ = length(mJ)) # restimate cps by PELT pelt <- .pelt(obs = y[mJ] - smooth, sd = sd) # reestimate without penalty but condition on cps of PELT's estimate cps <- pelt$cps if (length(cps) > 0) { fitJ <- as.numeric(Matrix::solve(Matrix::t(ImSX[, cps, drop = FALSE]) %*% ImSX[, cps, drop = FALSE], as.numeric(Matrix::t(ImSX[, cps, drop = FALSE]) %*% ImSy))) fit <- numeric(n) fit[mJ][cps] <- fitJ fit <- cumsum(fit) estRet[J, indexLambda] <- fit[J] + .kernelSmoothingVfold(Y = y - fit, K = Kfold, V = V, fold = i) } else { estRet[J, indexLambda] <- .kernelSmoothingVfold(Y = y, K = Kfold, V = V, fold = i) } } } for (indexLambda in seq(along = lambda)) { vec[indexLambda] <- loss(test = y, est = estRet[, indexLambda]) } index <- which.min(vec) cv[indexBandwidth] <- vec[index] value[[indexBandwidth]] <- lambda[1:index] } } index <- which.min(cv) list(lambda = value[[index]], bandwidth = bandwidth[index], cv = cv[index], cvs = cv, bandwidth = bandwidth) } test_that("cv.pcplus is giving correct results I", { testy <- rnorm(100) testbandwidth <- c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30)), Inf) testlambda <- c(10:1 * 10, 10:1) / 100 testthresh <- 1e-7 testmaxit <- 1e5 testsd <- 1 object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, sd = testsd, thresh = testthresh, maxit = testmaxit) expected <- compareCv(testy, testlambda, testbandwidth, testsd, testthresh, testmaxit) expect_equal(object$cvs, expected$cvs, tolerance = 1e-1) }) test_that("cv.pcplus is giving correct results II", { testy <- rnorm(100) testy[40:100] <- testy[40:100] + 3 testy[70:100] <- testy[70:100] + 5 testbandwidth <- c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30)), Inf) testlambda <- c(10:1 * 10, 10:1) / 100 testthresh <- 1e-7 testmaxit <- 1e5 testsd <- 1 object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, sd = testsd, thresh = testthresh, maxit = testmaxit) expected <- compareCv(testy, testlambda, testbandwidth, testsd, testthresh, testmaxit) expect_equal(object$cvs, expected$cvs, tolerance = 1e-1) }) test_that("cv.pcplus is giving correct results III", { testy <- rnorm(100) testy[7:100] <- testy[7:100] + 50 testy[12:100] <- testy[12:100] + 20 testy[90:100] <- testy[90:100] + 20 testbandwidth <- c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30)), Inf) testlambda <- c(10:1 * 10, 10:1) / 100 testthresh <- 1e-7 testmaxit <- 1e5 testsd <- 0.5 object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, sd = testsd, thresh = testthresh, maxit = testmaxit) expected <- compareCv(testy, testlambda, testbandwidth, testsd, testthresh, testmaxit) expect_equal(object$cvs, expected$cvs, tolerance = 1e-1) }) test_that("argument y is checked", { testy <- rnorm(100) expect_error(cv.pcplus()) expect_error(cv.pcplus(y = NA)) expect_error(cv.pcplus(y = as.numeric(NA))) expect_error(cv.pcplus(y = c(testy, "s"))) expect_error(cv.pcplus(y = c(testy, Inf))) expect_error(cv.pcplus(y = c(-Inf, testy))) expect_error(cv.pcplus(y = c(NaN, testy))) }) test_that("argument bandwidth is checked", { testy <- rnorm(100) testy[7:100] <- testy[7:100] + 50 testy[12:100] <- testy[12:100] + 20 testy[90:100] <- testy[90:100] + 20 expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf))) expect_error(cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf, "s"))) expect_error(cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf, NA))) expect_error(cv.pcplus(y = testy, bandwidth = c(-0.05, exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf))) expect_error(cv.pcplus(y = testy, bandwidth = c(as.numeric(NA), exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf))) expect_error(cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), NaN, Inf))) expect_warning(object <- cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), 0.5))) expect_equal(object, cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf))) expect_warning(object <- cv.pcplus(y = testy, bandwidth = c(exp(seq(log(1.99 / 50 / 2), log(0.25), length.out = 30L)), Inf))) expect_equal(object, cv.pcplus(y = testy, bandwidth = c(2.01 / 50 / 2, exp(seq(log(1.99 / 50 / 2), log(0.25), length.out = 30L))[-1], Inf))) }) test_that("argument lambda is checked", { testy <- rnorm(100) testy[7:100] <- testy[7:100] + 50 testy[12:100] <- testy[12:100] + 20 testy[90:100] <- testy[90:100] + 20 testbandwidth <- c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30)), Inf) testlambda <- c(10:1 * 10, 10:1) / 100 testthresh <- 1e-7 testmaxit <- 1e5 testsd <- 0.5 expect_error(cv.pcplus(y = testy, lambda = "s")) expect_error(cv.pcplus(y = testy, lambda = NA)) expect_error(cv.pcplus(y = testy, lambda = -0.5)) expect_error(cv.pcplus(y = testy, lambda = as.numeric(NA))) expect_error(cv.pcplus(y = testy, lambda = c(seq(1, 0.05, -0.05), "s"))) expect_error(cv.pcplus(y = testy, lambda = c(as.numeric(NA), seq(1, 0.05, -0.05)))) expect_error(cv.pcplus(y = testy, lambda = c(seq(1, 0.5, -0.05), -0.05, seq(0.45, 0.05, -0.05)))) expect_error(cv.pcplus(y = testy, lambda = c(seq(1, 0.5, -0.05), Inf, seq(0.45, 0.05, -0.05)))) object <- cv.pcplus(y = testy, lambda = 0.05, bandwidth = testbandwidth, sd = testsd, thresh = testthresh, maxit = testmaxit) expected <- compareCv(testy, 0.05, testbandwidth, testsd, testthresh, testmaxit) expect_equal(object$cvs, expected$cvs, tolerance = 1e-1) expect_warning(object <- cv.pcplus(y = testy, lambda = rev(seq(1, 0.05, -0.05)))) expect_identical(object, cv.pcplus(y = testy, lambda = seq(1, 0.05, -0.05))) expect_warning(object <- cv.pcplus(y = testy, lambda = c(seq(1, 0.2, -0.05), 0.05, 0.15, 0.1))) expect_identical(object, cv.pcplus(y = testy, lambda = seq(1, 0.05, -0.05))) }) test_that("argument nbandwidth is checked", { testy <- rnorm(100) testy[7:100] <- testy[7:100] + 50 testy[12:100] <- testy[12:100] + 20 testy[90:100] <- testy[90:100] + 20 expect_error(cv.pcplus(y = testy, nbandwidth = "s")) expect_error(cv.pcplus(y = testy, nbandwidth = NA)) expect_error(cv.pcplus(y = testy, nbandwidth = NaN)) expect_error(cv.pcplus(y = testy, nbandwidth = Inf)) expect_error(cv.pcplus(y = testy, nbandwidth = -10)) expect_error(cv.pcplus(y = testy, nbandwidth = as.numeric(NA))) expect_error(cv.pcplus(y = testy, nbandwidth = c(5, 10))) expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, nbandwidth = 30L)) expect_identical(cv.pcplus(y = testy, nbandwidth = 10), cv.pcplus(y = testy, nbandwidth = 10L)) expect_identical(cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf), nbandwidth = 10L), cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf))) expect_identical(cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf), nbandwidth = "s"), cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf))) expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 30L)), Inf))) expect_identical(cv.pcplus(y = testy, nbandwidth = 1L), cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 1L)), Inf))) expect_identical(cv.pcplus(y = testy, nbandwidth = 10L), cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 10L)), Inf))) expect_identical(cv.pcplus(y = testy, nbandwidth = 20L), cv.pcplus(y = testy, bandwidth = c(exp(seq(log(2.01 / 50 / 2), log(0.25), length.out = 20L)), Inf))) }) test_that("argument nlambda is checked", { testy <- rnorm(100) testy[7:100] <- testy[7:100] + 50 testy[12:100] <- testy[12:100] + 20 testy[90:100] <- testy[90:100] + 20 expect_error(cv.pcplus(y = testy, nlambda = "s")) expect_error(cv.pcplus(y = testy, nlambda = NA)) expect_error(cv.pcplus(y = testy, nlambda = NaN)) expect_error(cv.pcplus(y = testy, nlambda = Inf)) expect_error(cv.pcplus(y = testy, nlambda = -10)) expect_error(cv.pcplus(y = testy, nlambda = as.numeric(NA))) expect_error(cv.pcplus(y = testy, nlambda = c(5, 10))) expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, nlambda = 30L)) expect_identical(cv.pcplus(y = testy, nlambda = 10), cv.pcplus(y = testy, nlambda = 10L)) expect_identical(cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100, nlambda = 10L), cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100)) expect_identical(cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100, nlambda = "s"), cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100)) }) test_that("argument lambda.min.ratio is checked", { testy <- rnorm(100) testy[7:100] <- testy[7:100] + 50 testy[12:100] <- testy[12:100] + 20 testy[90:100] <- testy[90:100] + 20 expect_error(cv.pcplus(y = testy, lambda.min.ratio = "s")) expect_error(cv.pcplus(y = testy, lambda.min.ratio = NA)) expect_error(cv.pcplus(y = testy, lambda.min.ratio = NaN)) expect_error(cv.pcplus(y = testy, lambda.min.ratio = Inf)) expect_error(cv.pcplus(y = testy, lambda.min.ratio = -1)) expect_error(cv.pcplus(y = testy, lambda.min.ratio = 0)) expect_error(cv.pcplus(y = testy, lambda.min.ratio = 1)) expect_error(cv.pcplus(y = testy, lambda.min.ratio = 2)) expect_error(cv.pcplus(y = testy, lambda.min.ratio = as.numeric(NA))) expect_error(cv.pcplus(y = testy, lambda.min.ratio = c(0.5, 0.1))) expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, lambda.min.ratio = 0.01)) expect_identical(cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100, lambda.min.ratio = 0.1), cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100)) expect_identical(cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100, lambda.min.ratio = "s"), cv.pcplus(y = testy, lambda = c(10:1 * 10, 10:1) / 100)) }) test_that("argument sd is checked", { testy <- rnorm(100) testy[7:100] <- testy[7:100] + 50 testy[12:100] <- testy[12:100] + 20 testy[90:100] <- testy[90:100] + 20 testlambda <- seq(1, 0.05, -0.05) testbandwidth <- 0.05 testthresh <- 1e-7 testmaxit <- 1e5 expect_error(cv.pcplus(y = testy, sd = "s")) expect_error(cv.pcplus(y = testy, sd = NA)) expect_error(cv.pcplus(y = testy, sd = -0.05)) expect_error(cv.pcplus(y = testy, sd = NaN)) expect_error(cv.pcplus(y = testy, sd = as.numeric(NA))) expect_error(cv.pcplus(y = testy, sd = c(0.05, 0.1))) testsd <- as.numeric(diff(quantile(diff(testy), c(0.25, 0.75))) / diff(qnorm(c(0.25, 0.75))) / sqrt(2)) expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, sd = testsd)) object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, sd = 0.5, thresh = testthresh, maxit = testmaxit) expected <- compareCv(testy, testlambda, testbandwidth, 0.5, testthresh, testmaxit) expect_equal(object$cvs, expected$cvs, tolerance = 1e-1) }) test_that("argument thresh is checked", { testy <- rnorm(100) testy[7:100] <- testy[7:100] + 50 testy[12:100] <- testy[12:100] + 20 testy[90:100] <- testy[90:100] + 20 testbandwidth <- 0.1 testlambda <- seq(1, 0.05, -0.05) testmaxit <- 1e5 testsd <- 0.5 expect_error(cv.pcplus(y = testy, thresh = "s")) expect_error(cv.pcplus(y = testy, thresh = NA)) expect_error(cv.pcplus(y = testy, thresh = NaN)) expect_error(cv.pcplus(y = testy, thresh = Inf)) expect_error(cv.pcplus(y = testy, thresh = -10)) expect_error(cv.pcplus(y = testy, thresh = 0)) expect_error(cv.pcplus(y = testy, thresh = as.numeric(NA))) expect_error(cv.pcplus(y = testy, thresh = c(0.001, 1e-7))) expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, thresh = 1e-7)) object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, sd = testsd, thresh = 1e-5, maxit = testmaxit) expected <- compareCv(testy, testlambda, testbandwidth, testsd, 1e-5, testmaxit) expect_equal(object$cvs, expected$cvs, tolerance = 1e-1) object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, sd = testsd, thresh = 1e-3, maxit = testmaxit) expected <- compareCv(testy, testlambda, testbandwidth, testsd, 1e-3, testmaxit) expect_equal(object$cvs, expected$cvs, tolerance = 1e-1) }) test_that("argument maxit is checked", { testy <- rnorm(100) testy[7:100] <- testy[7:100] + 50 testy[12:100] <- testy[12:100] + 20 testy[90:100] <- testy[90:100] + 20 testbandwidth <- 0.1 testlambda <- c(0.5, 0.4) testthresh <- 1e7 testsd <- 0.5 expect_error(cv.pcplus(y = testy, maxit = "s")) expect_error(cv.pcplus(y = testy, maxit = NA)) expect_error(cv.pcplus(y = testy, maxit = NaN)) expect_error(cv.pcplus(y = testy, maxit = Inf)) expect_error(cv.pcplus(y = testy, maxit = -10)) expect_error(cv.pcplus(y = testy, maxit = 0)) expect_error(cv.pcplus(y = testy, maxit = as.numeric(NA))) expect_error(cv.pcplus(y = testy, maxit = c(100, 1e4))) expect_identical(cv.pcplus(y = testy), cv.pcplus(y = testy, maxit = 1e5L)) expect_identical(cv.pcplus(y = testy, maxit = 1e4), cv.pcplus(y = testy, maxit = 1e4L)) object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, sd = testsd, thresh = testthresh, maxit = 1e3) expected <- compareCv(testy, testlambda, testbandwidth, testsd, testthresh, 1e3) expect_equal(object$cvs, expected$cvs, tolerance = 1e-1) object <- cv.pcplus(y = testy, lambda = testlambda, bandwidth = testbandwidth, sd = testsd, thresh = testthresh, maxit = 5) expected <- compareCv(testy, testlambda, testbandwidth, testsd, testthresh, 5) expect_equal(object$cvs, expected$cvs, tolerance = 1e-1) })