R Under development (unstable) (2025-09-16 r88844 ucrt) -- "Unsuffered Consequences" Copyright (C) 2025 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # library(VIM) > # library(laeken) > # library(robustbase) > # library(mice) > # data("Animals_na") > # > # > # # debugonce(imputeRobust) > # imputeRobust(data = Animals_na, > # form = formula("lbrain ~ lbody"), > # method = "lm", uncert = "wresid", boot = FALSE) > # > # > # data(Animals, package = "MASS") > # ani_plot(Animals = Animals, method = "imputeRobust-lts-pmm", mainlab = NULL, > # seed = 1) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-pmm-chain", mainlab = NULL, > # seed = 1) > # > # > # ani_plot(Animals = Animals, method = "imputeRobust-lts-normal", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-resid", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-wresid", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-pmm-nboot", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-normal-nboot", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-resid-nboot", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-wresid-nboot", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-pmm-noboot", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-normal-noboot", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-resid-noboot", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lts-wresid-noboot", mainlab = NULL, seed = 123) > # > # > # ani_plot(Animals = Animals, method = "imputeRobust-gam-pmm", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-gam-normal", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-gam-resid", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-gam-wresid", mainlab = NULL, seed = 123) > # > # ani_plot(Animals = Animals, method = "imputeRobust-lm-pmm", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lm-normal", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lm-resid", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-lm-wresid", mainlab = NULL, seed = 123) > # > # ani_plot(Animals = Animals, method = "imputeRobust-MM-pmm", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-MM-normal", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-MM-resid", mainlab = NULL, seed = 123) > # ani_plot(Animals = Animals, method = "imputeRobust-MM-wresid", mainlab = NULL, seed = 123) > # > # debugonce("imputeRobustChain") > # > # Animals_na$lbody[1:3] <- NA > # xi <- imputeRobustChain(formulas = list(formula("lbody ~ lbrain"), > # formula("lbrain ~ lbody")), > # data = Animals_na, > # boot = TRUE, > # robustboot = TRUE, > # method = "lts", > # takeAll = TRUE, > # eps = 0.5, > # maxit = 4, > # alpha = 0.5, > # uncert = "pmm", > # familiy = "Gaussian", > # value_back = "matrix", > # trace = TRUE) > # > # data(iris) > # iris[1:3, 1] <- iris[4:6, 2] <- iris[5:7, 3] <- iris[7:9, 4] <- NA > # iris[9:11, 5] <- NA > # debugonce("imputeRobustChain") > # xi <- imputeRobustChain(formulas = list(formula("Sepal.Length ~ ."), > # NULL, NULL, NULL, formula("Species ~ .")), > # data = iris, > # boot = TRUE, > # robustboot = TRUE, > # method = "lts", > # multinom.method = "multinom", > # takeAll = TRUE, > # eps = 0.5, > # maxit = 4, > # alpha = 0.5, > # uncert = "pmm", > # familiy = "Gaussian", > # value_back = "matrix", > # trace = TRUE) > # > # > # > # ## blasting > # > # library(stmo) > # data(blasting) > # head(blasting) > # > # lm_blasting1 <- lm(log(vibration) ~ log(distance) + log(load), > # data = blasting) > # summary(lm_blasting1) > # lm_blasting2 <- lm(log10(vibration) ~ log10(distance) * location + load, > # data = blasting) > # summary(lm_blasting2) > # > # blasting$vibration[sample(1:nrow(blasting), 7)] <- NA > # w <- complete.cases(blasting) > # > # forms <- list(NULL, NULL, NULL, formula("log10(vibration) ~ log10(distance) * location + load")) > # debugonce("imputeRobustChain") > # xi <- suppressWarnings(imputeRobustChain(formulas = forms, data = blasting, trace = TRUE, maxit = 1, uncert = "resid")) > # xi2 <- suppressWarnings(imputeRobustChain(data = blasting, trace = TRUE, maxit = 20)) > # > # plot(xi[, c("vibration", "distance")], col = w + 3) > # plot(xi2[, c("vibration", "distance")], col = w + 3) > # > # library(mice) > # x2 <- complete(mice(blasting, m = 1)) > # > # plot(x2[, c("vibration", "distance")], col = w + 3) > # > # > # f <- formula("log10(vibration) ~ log10(distance) * location + load") > # fn <- f[2] > # cn <- "vibration" > # > # data("cherry") > # cherry.lm <- lm(log(volume) ~ log(diameter) + log(height), data = cherry) > # > # data("leafburn", package = "faraway") > # pairs(leafburn) > # # log10(burntimei) = β0 + β1nitrogeni + β2log10(chlorine)i + β3potassiumi + > # > # data(wood, package = "robustbase") > # ?wood > # > # setMissOut <- function(x, vars = 1:ncol(x), r = rep(0.1, ncol(x))){ > # # outlier detection > # o <- robCompositions::outCoDa(x, coda = FALSE, quantile = 0.99) > # n_outliers <- sum(o$outlierIndex) > # n_nonoutliers <- sum(!o$outlierIndex) > # # sort data > # x_na <- x_orig <- rbind(x[!o$outlierIndex, ], x[o$outlierIndex, ]) > # # set missings in all variables > # # but not in outliers > # n <- nrow(x) > # n_good <- sum(!o$outlierIndex) > # n_out <- sum(o$outlierIndex) > # for(i in vars){ > # s <- sample(1:n_good, round(r[i] * n), replace = FALSE) > # x_na[s, i] <- NA > # } > # m1 <- is.na(x_na[, vars]) > # return(x_na) > # } > # > # wood_na <- setMissOut(wood) > # xi <- imputeRobustChain(data = wood_na) > # > # > # > # setMiss <- function(x, kind = "MCAR", rate = 0.3){ > # # simple function assuming x has variables Y and X > # SEQ <- 1:nrow(x) > # n <- nrow(x) > # p <- ncol(x) > # if(kind == "MCAR"){ > # s <- sample(SEQ, size = round(rate * n, 0)) > # } > # if(kind == "MAR"){ > # s <- sample(SEQ, size = round(rate * n, 0), prob = (x$X+abs(min(x$X))+1)/sum(x$X+abs(min(x$X))+1)) > # } > # if(kind == "MNAR"){ > # s <- sample(SEQ, size = round(rate * n, 0), prob = (x$Y+abs(min(x$Y))+1)/sum(x$Y+abs(min(x$Y))+1)) > # } > # x$Y[s] <- NA > # return(x) > # } > # > > # imputeRobust(x = Animals_na, > # formulas = list(formula("lbody ~ lbrain"), > # formula("lbrain ~ lbody"))) > # > > # head(Animals_na) > # aggr(Animals_na) > # First algorithm (fit robust and draw robust bootrap samples, > # then classical fit) > > # > # bootfunc.robust <- function (n) > # { > # random <- sample.int(n, replace = TRUE) > # as.numeric(table(factor(random, levels = seq_len(n)))) > # } > # > # > # imputeRobust <- function(x, > # formulas = NULL, > # eps = 0.1, > # maxit = 5, > # mi = 1){ > # > # # Missing index M > # M <- is.na(x) > # n <- nrow(x) > # p <- ncol(x) > # > # # colswithNA > # colswithNA <- apply(x, 2, function(x){any(is.na(x))}) > # > # # Initialize > # x <- kNN(x, imp_var = FALSE) > # > # > # > # for(i in 1:p){ > # if(colswithNA[i]){ > # if(!is.null(formulas[[i]])){ > # mf <- model.frame(formulas[[i]], data = x) > # y <- model.extract(mf, "response") > # y_name <- all.vars(formulas[[i]])[1] > # X_names <- all.vars(formulas[[i]])[-1] > # # robust fit > # mod <- ltsReg(formulas[[1]], data = x) > # # robust bootstrap sample > # bs <- sample(x = mod$best, size = n, replace = TRUE) > # # Fit a OLS regression > # mod2 <- lm(formulas[[i]], data = x[bs, ]) > # # Predict/Update missings including imputation uncertainty > # x[M[, i], y_name] <- predict(mod2, > # newdata = x[M[, i], X_names, drop = FALSE]) > # } > # > # } > # } > # return(x) > # } > # > # debugonce(imputeRobust) > # imputeRobust(x = Animals_na, > # formulas = list(formula("lbody ~ lbrain"), > # formula("lbrain ~ lbody"))) > > # > # > # # 1. calulate M > # # M <- is.na(data[, c(y_name, X_names)]) > # # calculate m > # m <- is.na(y) > # n <- nrow(Animals_na) > # > # # 2. initialize with knn > # x <- kNN(data[, c(y_name, X_names], imp_var = FALSE)]) > # > # # 3. LTS-regression > # mod <- ltsReg(lbrain ~ lbody, data = Animals_na) > # > # # 4. Draw a bootstrap sample of size n > # # from non-outliers > # bs <- sample(x = mod$best, size = n, replace = TRUE) > # > # # 5. Fit a OLS regression > # mod2 <- lm(lbrain ~ lbody, data = Animals[bs, ]) > # > # # 6. Predict/Update missings including imputation uncertainty > # Animals[M, "lbrain"] <- predict(mod2, > # newdata = data.frame("lbody" = 1)) > # > # } > # > # x <- Animals_na > # formulas <- list(formula("lbrain ~ lbody", data = x), > # formula("lbody ~ lbrain", data = x)) > # > # > # form <- formula(lbrain ~ lbody, data = Animals_na) > # mf <- model.frame(form, data = Animals_na) > # debugonce(imputeRobust) > # imputeRobust(lbrain ~ lbody, data = Animals_na) > # > # > # # 1. calulate M > # M <- is.na(Animals_na$lbrain) > # n <- nrow(Animals_na) > # > # # 2. initialize with knn > # Animals <- kNN(Animals_na, imp_var = FALSE) > # > # # 3. LTS-regression > # mod <- ltsReg(lbrain ~ lbody, data = Animals_na) > # > # # 4. Draw a bootstrap sample of size n > # # from non-outliers > # bs <- sample(x = mod$best, size = n, replace = TRUE) > # > # # 5. Fit a OLS regression > # mod2 <- lm(lbrain ~ lbody, data = Animals[bs, ]) > # > # # 6. Predict/Update missings including imputation uncertainty > # Animals[M, "lbrain"] <- predict(mod2, > # newdata = data.frame("lbody" = 1)) > # > # } > # > # # Second algorithm (draw calibrated bootstrap) > # > # > # # Third algorithm (draw bootrap sample and fit robust) > > proc.time() user system elapsed 0.20 0.03 0.21