test_that("Calculations are correct", { ex_dat <- generate_numeric_test_data() truth <- ex_dat$obs pred <- ex_dat$pred truth_lag <- dplyr::lag(truth, 1L) naive_error <- truth - truth_lag mae_denom <- mean(abs(naive_error)[-1]) scaled_error <- (truth - pred) / mae_denom exp <- mean(abs(scaled_error)) expect_equal( mase_vec(truth = ex_dat$obs, estimate = ex_dat$pred), exp ) }) test_that("both interfaces gives the same results", { ex_dat <- generate_numeric_test_data() expect_identical( mase_vec(ex_dat$obs, ex_dat$pred), mase(ex_dat, obs, pred)[[".estimate"]], ) }) test_that("Calculations handles NAs", { ex_dat <- generate_numeric_test_data() na_ind <- 1:10 ex_dat$pred[na_ind] <- NA truth <- ex_dat$obs[-na_ind] pred <- ex_dat$pred[-na_ind] truth_lag <- dplyr::lag(truth, 1L) naive_error <- truth - truth_lag mae_denom <- mean(abs(naive_error)[-1]) scaled_error <- (truth - pred) / mae_denom exp <- mean(abs(scaled_error)) expect_identical( mase_vec(ex_dat$obs, ex_dat$pred, na_rm = FALSE), NA_real_ ) expect_equal( mase_vec(truth = ex_dat$obs, estimate = ex_dat$pred), exp ) }) test_that("Case weights calculations are correct", { truth <- c(1, 2, 3) estimate <- c(2, 4, 3) weights <- c(1, 2, 1) expect_identical( mase_vec(truth, estimate, case_weights = weights), 5 / 4 ) }) test_that("works with hardhat case weights", { solubility_test$weights <- floor(read_weights_solubility_test()) df <- solubility_test imp_wgt <- hardhat::importance_weights(df$weights) freq_wgt <- hardhat::frequency_weights(df$weights) expect_no_error( mase_vec(df$solubility, df$prediction, case_weights = imp_wgt) ) expect_no_error( mase_vec(df$solubility, df$prediction, case_weights = freq_wgt) ) }) test_that("na_rm argument check", { expect_snapshot( error = TRUE, mase_vec(1, 1, na_rm = "yes") ) }) test_that("bad argument check", { expect_snapshot( error = TRUE, mase_vec(1, 1, m = "yes") ) }) test_that("mase() - errors if m is larger than number of observations", { expect_snapshot( error = TRUE, mase(mtcars, mpg, disp, m = 100) ) }) test_that("mase() - m argument works", { ex_dat <- generate_numeric_test_data() truth <- ex_dat$obs pred <- ex_dat$pred m <- 2 truth_lag <- dplyr::lag(truth, m) naive_error <- truth - truth_lag mae_denom <- mean(abs(naive_error)[-c(1, 2)]) scaled_error <- (truth - pred) / mae_denom exp <- mean(abs(scaled_error)) expect_equal( mase_vec(ex_dat$obs, ex_dat$pred, m = 2), exp ) }) test_that("mase() - mae_train argument works", { ex_dat <- generate_numeric_test_data() truth <- ex_dat$obs pred <- ex_dat$pred mae_train <- 0.5 mae_denom <- mae_train scaled_error <- (truth - pred) / mae_denom exp <- mean(abs(scaled_error)) expect_equal( mase_vec(ex_dat$obs, ex_dat$pred, mae_train = mae_train), exp ) }) test_that("range values are correct", { direction <- metric_direction(mase) range <- metric_range(mase) perfect <- ifelse(direction == "minimize", range[1], range[2]) worst <- ifelse(direction == "minimize", range[2], range[1]) df <- tibble::tibble( truth = c(5, 6, 2, 6, 4, 1, 3) ) df$estimate <- df$truth df$off <- df$truth + 1 expect_identical( mase_vec(df$truth, df$estimate), perfect ) if (direction == "minimize") { expect_gt(mase_vec(df$truth, df$off), perfect) expect_lt(mase_vec(df$truth, df$off), worst) } if (direction == "maximize") { expect_lt(mase_vec(df$truth, df$off), perfect) expect_gt(mase_vec(df$truth, df$off), worst) } })