# Author: Quentin Grimonprez context("Manage column fills with 0 in compute_optimal_encoding") oldcompute_optimal_encoding <- function(data, basisobj, nCores = max(1, ceiling(detectCores() / 2)), verbose = TRUE, ...) { t1 <- proc.time() ## check parameters checkData(data) checkDataBeginTime(data) checkDataEndTmax(data) if (!is.basis(basisobj)) { stop("basisobj is not a basis object.") } if (any(is.na(nCores)) || !is.whole.number(nCores) || (nCores < 1)) { stop("nCores must be an integer > 0.") } ## end check if (verbose) { cat("######### Compute encoding #########\n") } # change state as integer out <- stateToInteger(data$state) data$state <- out$state label <- out$label rm(out) # refactor labels as 1:nbId uniqueId <- unique(data$id) nId <- length(uniqueId) id2 <- refactorCategorical(data$id, uniqueId, seq_along(uniqueId)) uniqueId2 <- unique(id2) nCores <- min(max(1, nCores), detectCores() - 1) Tmax <- max(data$time) K <- length(label$label) nBasis <- basisobj$nbasis # nombre de fonctions de base if (verbose) { cat(paste0("Number of individuals: ", nId, "\n")) cat(paste0("Number of states: ", K, "\n")) cat(paste0("Basis type: ", basisobj$type, "\n")) cat(paste0("Number of basis functions: ", nBasis, "\n")) cat(paste0("Number of cores: ", nCores, "\n")) } I <- diag(rep(1, nBasis)) phi <- fd(I, basisobj) # les fonctions de base comme données fonctionnelles # declare parallelization cl <- makeCluster(nCores) parallel::clusterExport(cl, "eval.fd") if (verbose) { cat("---- Compute V matrix:\n") } t2 <- proc.time() # on construit les variables V_ij = int(0,T){phi_j(t)*1_X(t)=i} dt V <- do.call(rbind, pblapply(cl = cl, split(data, data$id), cfda:::compute_Vxi, phi = phi, K = K)) rownames(V) <- NULL G <- cov(V) * (nrow(V) - 1) / nrow(V) t3 <- proc.time() if (verbose) { cat(paste0("\nDONE in ", round((t3 - t2)[3], 2), "s\n---- Compute F matrix:\n")) } Fval <- do.call(rbind, pblapply(cl = cl, split(data, data$id), cfda:::compute_Uxij, phi = phi, K = K)) # stop parallelization stopCluster(cl) # create F matrix Fval <- colMeans(Fval) Fmat <- matrix(0, ncol = K * nBasis, nrow = K * nBasis) # matrice avec K blocs de taille nBasis*nBasis sur la diagonale for (i in 1:K) { Fmat[((i - 1) * nBasis + 1):(i * nBasis), ((i - 1) * nBasis + 1):(i * nBasis)] <- matrix(Fval[((i - 1) * nBasis * nBasis + 1):(i * nBasis * nBasis)], ncol = nBasis, byrow = TRUE) } t4 <- proc.time() if (verbose) { cat(paste0("\nDONE in ", round((t4 - t3)[3], 2), "s\n---- Compute encoding: ")) } # res = eigen(solve(F)%*%G) F05 <- t(mroot(Fmat)) # F = t(F05)%*%F05 if (any(dim(F05) != rep(K * nBasis, 2))) { cat("\n") if (any(colSums(Fmat) == 0)) { stop("F matrix is not invertible. In the support of each basis function, each state must be present at least once (p(x_t) != 0 for t in the support). You can try to change the basis.") } stop("F matrix is not invertible. You can try to change the basis.") } invF05 <- solve(F05) # res = eigen(F05%*%solve(F)%*%G%*%solve(F05)) res <- eigen(t(invF05) %*% G %*% invF05) # les vecteurs propres (qui donnent les coeffs des m=nBasis codages, pour chaque val propre) # je les mets sous la forme d'une liste de matrices de taille m x K. La premiere matrice # correspond au premier vecteur propre. ce vecteur (1ere colonne dans res$vectors) contient # les coefs du codage pour l'état 1 sur les premières m (=nBasis) positions, ensuite pour l'état 2 sur # m positions et enfin pour le k-eme état. Je mets cette première colonne sous forme de # matrice et les coefs sont sous forme de colonnes de taille m # met la matrice de vecteurs propres comme une liste # aux1 = split(res$vectors, rep(1:ncol(res$vectors), each = nrow(res$vectors))) aux1 <- split(invF05 %*% res$vectors, rep(1:ncol(res$vectors), each = nrow(res$vectors))) # on construit les matrices m x K pour chaque valeur propre : 1ere colonne les coefs pour etat 1, # 2eme col les coefs pour état 2, etc aux2 <- lapply(aux1, function(w) { return(matrix(w, ncol = K, dimnames = list(NULL, label$label))) }) pc <- V %*% (invF05 %*% res$vectors) rownames(pc) <- uniqueId if (verbose) { cat("DONE\n") } out <- list(eigenvalues = res$values, alpha = aux2, pc = pc, F = Fmat, G = G, V = V, basisobj = basisobj) class(out) <- "fmca" t6 <- proc.time() if (verbose) { cat(paste0("Run Time: ", round((t6 - t1)[3], 2), "s\n")) } return(out) } test_that("compute_optimal_encodings has the same results as before when there is no 0-column", { skip_on_cran() skip_on_ci() skip_on_covr() set.seed(42) n <- 25 Tmax <- 1 K <- 2 m <- 10 d <- generate_2State(n) dT <- cut_data(d, Tmax) row.names(dT) <- NULL b <- create.bspline.basis(c(0, Tmax), nbasis = m, norder = 4) expect_silent( fmcaNew <- compute_optimal_encoding(dT, b, computeCI = FALSE, method = "parallel", nCores = 1, verbose = FALSE) ) expect_silent(fmcaOld <- oldcompute_optimal_encoding(dT, b, nCores = 1, verbose = FALSE)) expect_equal(fmcaOld$eigenvalues, fmcaNew$eigenvalues) expect_equal(fmcaOld$alpha, fmcaNew$alpha) expect_equal(fmcaOld$pc, fmcaNew$pc) expect_equal(fmcaOld$F, fmcaNew$F) expect_equal(fmcaOld$G, fmcaNew$G) expect_equal(fmcaOld$V, fmcaNew$V) expect_false(any(is.na(fmcaNew$alpha[[1]]))) }) test_that("compute_optimal_encodings works when there is some 0-column", { skip_on_cran() data(biofam2) d <- biofam2[biofam2$id <= 100, ] nState <- length(unique(d$state)) nbasis <- 8 b <- create.bspline.basis(c(15, 30), nbasis = nbasis, norder = 4) expect_warning( { fmcaNew <- compute_optimal_encoding(d, b, computeCI = FALSE, method = "parallel", nCores = 1, verbose = FALSE) }, regexp = paste( "The F matrix contains at least one column of 0s.", "At least one state is not present in the support of one basis function.", "Corresponding coefficients in the alpha output will have a 0 value." ), fixed = TRUE ) expect_error( { fmcaOld <- oldcompute_optimal_encoding(d, b, method = "parallel", nCores = 1, verbose = FALSE) }, regexp = paste( "F matrix is not invertible. In the support of each basis function,", "each state must be present at least once (p(x_t) != 0 for t in the support). You can try to change the basis." ), fixed = TRUE ) expect_equal(dim(fmcaNew$F), c(nbasis * nState, nbasis * nState)) expect_equal(dim(fmcaNew$G), c(nbasis * nState, nbasis * nState)) expect_equal(dim(fmcaNew$V), c(length(unique(d$id)), nbasis * nState)) expect_true(any(is.na(fmcaNew$alpha[[1]]))) out <- get_encoding(fmcaNew, fdObject = FALSE) expect_true(any(is.na(out$y))) expect_warning( { fmcaNewBootstrap <- compute_optimal_encoding(d, b, computeCI = TRUE, method = "parallel", nCores = 1, verbose = FALSE) }, regexp = paste( "The F matrix contains at least one column of 0s.", "At least one state is not present in the support of one basis function.", "Corresponding coefficients in the alpha output will have a 0 value." ), fixed = TRUE ) expect_true("bootstrap" %in% names(fmcaNewBootstrap)) expect_true("varAlpha" %in% names(fmcaNewBootstrap)) }) # test_that("oldcompute_optimal_encoding throws an error when the basis is not well suited", { # # data_msm <- data.frame(id = rep(1:2, each = 3), time = c(0, 3, 5, 0, 4, 5), state = c(1, 2, 2, 1, 2, 2)) # b <- create.bspline.basis(c(0, 5), nbasis = 3, norder = 2) # # expect_error({fmca <- oldcompute_optimal_encoding(data_msm, b, nCores = 1)}, # regexp = "F matrix is not invertible. In the support of each basis function, each state must be present at least once (p(x_t) != 0 for t in the support). You can try to change the basis.", # fixed = TRUE) # }) test_that("removeTimeAssociatedWithNACoeff works", { fdmat <- matrix(rnorm(80), nrow = 20, ncol = 4, dimnames = list(NULL, c("b", "d", "a", "e"))) timeVal <- 1:20 pt <- list( t = seq(1, 20, length = 10), pt = matrix(runif(50, 0, 1), nrow = 5, ncol = 10, dimnames = list(c(letters[1:5]), NULL)) ) class(pt) <- "pt" pt$pt[1, 1:5] <- 0 pt$pt[4, 3:5] <- 0 pt$pt[4, 9:10] <- 0 pt$pt[5, 8:10] <- 0 out <- removeTimeAssociatedWithNACoeff(fdmat, timeVal, pt) expect_equal(dim(out), dim(fdmat)) expect_equal(colnames(out), colnames(fdmat)) expect_equal(out[!is.na(out)], fdmat[!is.na(out)]) expect_true(all(is.na(out[1:11, 3]))) expect_true(all(is.na(out[6:11, 2]))) expect_true(all(is.na(out[18:20, 2]))) expect_true(all(is.na(out[16:20, 4]))) }) test_that("predict works when there is some 0-column", { skip_on_cran() data(biofam2) d <- biofam2[biofam2$id <= 100, ] nState <- length(unique(d$state)) nbasis <- 8 b <- create.bspline.basis(c(15, 30), nbasis = nbasis, norder = 4) fmcaNewBootstrap <- compute_optimal_encoding(d, b, computeCI = TRUE, nCores = 1, verbose = FALSE) out <- predict(fmcaNewBootstrap, d, nCores = 1, verbose = FALSE) expect_equivalent(out, fmcaNewBootstrap$pc) })