if (isTRUE(as.logical(Sys.getenv("CI")))){ # If on CI env_test <- "CI" }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){ # If on CRAN env_test <- "CRAN" set.seed(135) # CRAN SEED }else{ # If on local machine env_test <- 'local' } test_that("mfx with degenerate Mahalanobis", { N <- 100 x1 <- rnorm(N) x2 <- rbinom(N, size = 1, prob = .2) y <- x1^3 - 0.5 * x2 + rnorm(N, 0, 1) y <- y * 10 X <- cbind(x1, x2, x1 + x2 * 3) colnames(X) <- paste0("x", 1:ncol(X)) for (std in c('Mahalanobis', 'scaled')){ fit_gKRLS <- gam(y ~ 0 + s(x1, x2, x3, bs = "gKRLS", xt = gKRLS(standardize = std) ), family = gaussian(), method = "REML", data = data.frame(y, X) ) expect_equivalent(fit_gKRLS$smooth[[1]]$term, c("x1", "x2", "x3")) if (std != 'scaled'){ expect_equivalent(ncol(fit_gKRLS$smooth[[1]]$X_train), 2) } mfx_num <- calculate_effects(fit_gKRLS, data = data.frame(X), use_original = TRUE, continuous_type = "deriv") mfx_legacy <- legacy_marginal_effect(fit_gKRLS, data = data.frame(X), variables = c("x1", "x2", "x3") ) expect_equivalent(mfx_num$est, mfx_legacy$AME_pointwise, tol = 1e-3) expect_equivalent(mfx_num$se, sqrt(mfx_legacy$AME_pointwise_var), tol = 1e-3) } }) test_that("Test MFX", { N <- 100 x1 <- rnorm(N) x2 <- rbinom(N, size = 1, prob = .2) y <- x1^3 - 0.5 * x2 + rnorm(N, 0, 1) y <- y * 10 X <- cbind(x1, x2, x1 + x2 * 3) colnames(X) <- paste0("x", 1:ncol(X)) fit_gKRLS <- gam(y ~ 0 + s(x1, x2, x3, bs = "gKRLS", xt = gKRLS(standardize = "scaled", sketch_method = "gaussian") ), family = gaussian(), method = "REML", data = data.frame(y, X) ) expect_equivalent(fit_gKRLS$smooth[[1]]$term, c("x1", "x2", "x3")) expect_equivalent(ncol(fit_gKRLS$smooth[[1]]$X_train), 3) if (env_test != 'CRAN'){ mfx_num <- suppressWarnings(calculate_effects(fit_gKRLS, data = data.frame(X), continuous_type = "deriv")) mfx_legacy <- legacy_marginal_effect(fit_gKRLS, data = data.frame(X), variables = c("x1", "x2", "x3") ) expect_equivalent(mfx_num$est, mfx_legacy$AME_pointwise, tol = 1e-3) expect_equivalent(mfx_num$se, sqrt(mfx_legacy$AME_pointwise_var), tol = 1e-3) } }) test_that("test 'calculate_effects'", { N <- 100 x1 <- rnorm(N, sd = 1.6) x2 <- rbinom(N, size = 1, prob = .2) s <- sample(letters[1:5], N, replace = T) X <- data.frame(x1, x2, s = factor(s), l = sample(c(TRUE,FALSE), N, replace = T), stringsAsFactors = F) colnames(X) <- paste0("x", 1:ncol(X)) X$x4 <- factor(X$x4) y <- x1^3 - 0.5 * x2 + 1/5 * match(X$x3, letters) + rnorm(N, 0, 1) y <- y * 10 fit_gKRLS <- gam(y ~ s(x1, x2, x3, x4, bs = "gKRLS"), family = gaussian(), method = "REML", data = data.frame(y, X) ) factor_test <- calculate_effects(model = fit_gKRLS, variables = "x3") expect_null(factor_test$individual) factor_test <- calculate_effects( model = fit_gKRLS, variables = "x3", individual = TRUE, conditional = data.frame(x1 = c(-1, 1)) ) expect_false(is.null(get_individual_effects(factor_test))) expect_equal(nrow(factor_test), 8) logical_test <- calculate_effects(model = fit_gKRLS, variables = "x4") custom_cont_test <- calculate_effects( model = fit_gKRLS, variables = "x1", continuous_type = list('x1' = c(-1, 1))) # Test alignment with derivative and second derivative fit_first_deriv <- calculate_effects(model = fit_gKRLS, variables = 'x1', continuous_type = 'derivative', individual = TRUE) fit_second_deriv <- calculate_effects(model = fit_gKRLS, variables = 'x1', continuous_type = 'second_derivative', individual = TRUE) obj <- fit_gKRLS$smooth[[1]] S <- obj$sketch_matrix X_test <- create_data_gKRLS( term_levels = obj$term_levels, term_class = obj$term_class, data_term = data.frame(X)[obj$term], terms = obj$term, allow.missing.levels = TRUE) if (identical(obj$xt$sketch_method, 'gaussian')){ X_nystrom <- X_test }else{ X_nystrom <- X_test[obj$subsampling_id,, drop = F] } obj$xt$return_raw <- TRUE K <- create_sketched_kernel( X_test = Predict.matrix.gKRLS.smooth(object = obj, data = data.frame(X)), X_train = as.matrix(obj$X_train), S = diag(nrow(obj$X_train)), bandwidth = obj$bandwidth ) W <- obj$std_train$whiten W2 <- tcrossprod(W) man_first_deriv <- -2/obj$bandwidth * as.vector( (K * outer( (X_test %*% W2)[,1], (X_nystrom %*% W2)[,1], FUN=function(x,y){x-y})) %*% t(S) %*% coef(fit_gKRLS)[-1] ) man_second_deriv_a <- 4/obj$bandwidth^2 * as.vector( (K * outer( (X_test %*% W2)[,1], (X_nystrom %*% W2)[,1], FUN=function(x,y){x-y})^2 ) %*% t(S) %*% coef(fit_gKRLS)[-1] ) man_second_deriv_b <- -2/obj$bandwidth * as.vector( (K * diag(W2)[1]) %*% t(S) %*% coef(fit_gKRLS)[-1] ) man_second_deriv <- man_second_deriv_a + man_second_deriv_b expect_equivalent(get_individual_effects(fit_first_deriv)$est, man_first_deriv, tol = 1e-3) expect_equivalent(get_individual_effects(fit_second_deriv)$est, man_second_deriv, tol = 1e-3) SE_MAT <- -2/obj$bandwidth * ( (K * outer( (X_test %*% W2)[,1], (X_nystrom %*% W2)[,1], FUN=function(x,y){x-y})) %*% t(S)) man_first_deriv_se <- sqrt(rowSums( (SE_MAT %*% vcov(fit_gKRLS)[-1,-1]) * SE_MAT )) expect_equivalent(get_individual_effects(fit_first_deriv)$se, man_first_deriv_se, tol = 1e-3) SE_MAT2 <- 4/obj$bandwidth^2 * (K * outer( (X_test %*% W2)[,1], (X_nystrom %*% W2)[,1], FUN=function(x,y){x-y})^2 ) %*% t(S) + -2/obj$bandwidth * (K * diag(W2)[1]) %*% t(S) man_second_deriv_se <- sqrt(rowSums( (SE_MAT2 %*% vcov(fit_gKRLS)[-1,-1]) * SE_MAT2 )) expect_equivalent(get_individual_effects(fit_second_deriv)$se, man_second_deriv_se, tol = 1e-3) }) test_that("test logical and binary", { N <- 100 x1 <- rnorm(N, sd = 1.6) x2 <- rbinom(N, size = 1, prob = .2) s <- sample(letters[1:5], N, replace = T) X <- data.frame(x1, x2, s = factor(s), l = sample(c(TRUE,FALSE), N, replace = T), stringsAsFactors = F) colnames(X) <- paste0("x", 1:ncol(X)) y <- x1^3 - 0.5 * x2 + 1/5 * match(X$x3, letters) + rnorm(N, 0, 1) y <- y * 10 fit_gKRLS <- suppressWarnings( gam(list(y ~ x4 * x3 + s(x1, by = x3, bs = 'unregpoly'), ~ x4 * x3 + x1), family = gaulss(), method = "REML", data = data.frame(y, X) )) est_effects <- calculate_effects(fit_gKRLS, variables = list(c('x4', 'x3'))) copy_X <- data.frame(X) copy_X$x4 <- FALSE copy_X$x3 <- 'a' pred_base <- predict(fit_gKRLS, newdata = copy_X, type = 'response') pred_base <- colMeans(pred_base) for (v in c('b', 'c', 'd')){ copy_X$x3 <- v copy_X$x4 <- TRUE pred_alt <- colMeans(predict(fit_gKRLS, newdata = copy_X, type = 'response')) expect_equal(pred_alt - pred_base, subset(est_effects, grepl(variable, pattern=paste0('factor.x3.', v)))$est) } }) test_that("test mfx where conditional contains variable", { N <- 100 x1 <- rnorm(N) x2 <- rbinom(N, size = 1, prob = .2) y <- x1^3 - 0.5 * x2 + rnorm(N, 0, 1) X <- cbind(x1, x2, x1 + x2 * 3) fit <- gam(y ~ s(x1, x2, bs = 'gKRLS')) if (env_test == 'CRAN'){ grid_x <- seq(-2, 2, length.out=2) }else{ grid_x <- seq(-2, 2, length.out=10) } # Check warning occurs fit_IQR <- expect_warning(calculate_effects(fit, variables = 'x1', conditional = data.frame(x1 = grid_x)), regexp = 'should not usually contain') # Should *not* warn fit_pred <- calculate_effects(fit, variables = 'x1', continuous_type = 'predict', conditional = data.frame(x1 = grid_x)) fit_deriv <- expect_warning(calculate_effects(fit, variables = 'x1', continuous_type = 'derivative', conditional = data.frame(x1 = grid_x)), regexp = 'should not usually contain.*step') fit_second_deriv <- expect_warning(calculate_effects(fit, variables = 'x1', continuous_type = 'second_derivative', conditional = data.frame(x1 = grid_x)), regexp = 'should not usually contain.*step size') expect_true(identical(fit_IQR$est, rep(0, nrow(fit_IQR)))) copy_dat <- data.frame(x1 = x1, x2 = x2) step <- max(1, max(abs(x1))) * 1e-7 check_deriv <- sapply(grid_x, FUN=function(i){ copy_dat$x1 <- i - step p0 <- mean(predict(fit, newdata = copy_dat)) copy_dat$x1 <- i + step p1 <- mean(predict(fit, newdata = copy_dat)) return( (p1 - p0)/(2 * step)) }) expect_equal(fit_deriv$est, check_deriv) copy_dat <- data.frame(x1 = x1, x2 = x2) step <- sqrt(max(1, max(abs(x1))) * 1e-7) check_deriv <- sapply(grid_x, FUN=function(i){ copy_dat$x1 <- i - step p0 <- mean(predict(fit, newdata = copy_dat)) copy_dat$x1 <- i + step p2 <- mean(predict(fit, newdata = copy_dat)) copy_dat$x1 <- i p1 <- mean(predict(fit, newdata = copy_dat)) return( (p2 - 2 * p1 + p0)/(step^2)) }) expect_equal(fit_second_deriv$est, check_deriv) })