release_metric_exports <- hydroeval_release_metric_exports() release_metric_wrappers <- stats::setNames( lapply(release_metric_exports, hydroeval_release_get_export), release_metric_exports ) list2env(release_metric_wrappers, envir = environment()) test_that("release-hardening export surface stays locked to the 31 public wrappers", { namespace_exports <- .hydroeval_test_namespace_exports() expect_setequal(namespace_exports, hydroeval_release_expected_exports()) expect_setequal(names(release_metric_wrappers), release_metric_exports) }) test_that("all public wrappers behave like released scalar APIs on near-perfect data", { scenario <- hydroeval_release_scenario("near_perfect") for (metric_name in release_metric_exports) { hydroeval_release_expect_wrapper_contract(metric_name, scenario) } }) test_that("all public wrappers honor omit and fail NA forwarding through public entry points", { for (metric_name in release_metric_exports) { hydroeval_release_expect_na_policy_forwarding(metric_name) } }) test_that("perfect-fit invariants stay exact across all public wrappers", { scenario <- hydroeval_release_scenario("perfect_fit") perfect_targets <- c( me = 0, mae = 0, medae = 0, mse = 0, rmse = 0, sse = 0, sae = 0, max_error = 0, rae = 0, rrse = 0, nrmse = 0, pbias = 0, mape = 0, smape = 0, wape = 0, msle = 0, rmsle = 0, rsr = 0, ubrmse = 0, nse = 1, evs = 1, r = 1, rho = 1, r_squared = 1, ccc = 1, ve = 1, d = 1, d_r = 1, rsd = 1, kge_2009 = 1, kge_2012 = 1 ) for (metric_name in names(perfect_targets)) { expect_equal( hydroeval_release_call_wrapper( metric_name = metric_name, observed = scenario$observed, simulated = scenario$simulated ), unname(perfect_targets[[metric_name]]), tolerance = 1e-12 ) } }) test_that("high-risk scalar wrappers keep exact formula-level behavior on hand-checkable data", { scenario <- hydroeval_release_scenario("positive_reference") observed <- scenario$observed simulated <- scenario$simulated residuals <- simulated - observed correlation <- stats::cor(observed, simulated, method = "pearson") spearman <- stats::cor(observed, simulated, method = "spearman") alpha <- stats::sd(simulated) / stats::sd(observed) beta <- mean(simulated) / mean(observed) gamma <- (stats::sd(simulated) / mean(simulated)) / (stats::sd(observed) / mean(observed)) ccc_expected <- 2 * stats::cov(observed, simulated) / (stats::var(observed) + stats::var(simulated) + (mean(observed) - mean(simulated))^2) d_expected <- 1 - sum(residuals^2) / sum((abs(simulated - mean(observed)) + abs(observed - mean(observed)))^2) smape_expected <- 100 * mean( 2 * abs(residuals) / (abs(observed) + abs(simulated)) ) msle_expected <- mean((log1p(simulated) - log1p(observed))^2) expect_equal(pbias(observed, simulated), 2, tolerance = 1e-12) expect_equal(nrmse(observed, simulated), sqrt(4.5) / 30, tolerance = 1e-12) expect_equal(mape(observed, simulated), 10.625, tolerance = 1e-12) expect_equal(smape(observed, simulated), smape_expected, tolerance = 1e-12) expect_equal(wape(observed, simulated), 8, tolerance = 1e-12) expect_equal(msle(observed, simulated), msle_expected, tolerance = 1e-12) expect_equal(rmsle(observed, simulated), sqrt(msle_expected), tolerance = 1e-12) expect_equal(nse(observed, simulated), 0.964, tolerance = 1e-12) expect_equal(r(observed, simulated), correlation, tolerance = 1e-12) expect_equal(rho(observed, simulated), spearman, tolerance = 1e-12) expect_equal(ccc(observed, simulated), ccc_expected, tolerance = 1e-12) expect_equal( kge_2009(observed, simulated), 1 - sqrt((correlation - 1)^2 + (alpha - 1)^2 + (beta - 1)^2), tolerance = 1e-12 ) expect_equal( kge_2012(observed, simulated), 1 - sqrt((correlation - 1)^2 + (beta - 1)^2 + (gamma - 1)^2), tolerance = 1e-12 ) expect_equal(rsd(observed, simulated), alpha, tolerance = 1e-12) expect_equal(rsr(observed, simulated), sqrt(4.5) / stats::sd(observed), tolerance = 1e-12) expect_equal(ubrmse(observed, simulated), sqrt(4.25), tolerance = 1e-12) expect_equal(r_squared(observed, simulated), correlation^2, tolerance = 1e-12) expect_equal(rae(observed, simulated), 0.2, tolerance = 1e-12) expect_equal(rrse(observed, simulated), sqrt(18 / 500), tolerance = 1e-12) expect_equal(ve(observed, simulated), 0.92, tolerance = 1e-12) expect_equal(d(observed, simulated), d_expected, tolerance = 1e-12) expect_equal(d_r(observed, simulated), 0.9, tolerance = 1e-12) }) test_that("additive bias scenarios keep sign conventions and directional penalties", { over <- hydroeval_release_scenario("additive_overestimation") under <- hydroeval_release_scenario("additive_underestimation") expect_gt(me(over$observed, over$simulated), 0) expect_gt(pbias(over$observed, over$simulated), 0) expect_lt(ve(over$observed, over$simulated), 1) expect_lt(ccc(over$observed, over$simulated), 1) expect_lt(kge_2009(over$observed, over$simulated), 1) expect_lt(kge_2012(over$observed, over$simulated), 1) expect_lt(me(under$observed, under$simulated), 0) expect_lt(pbias(under$observed, under$simulated), 0) expect_lt(ve(under$observed, under$simulated), 1) expect_lt(ccc(under$observed, under$simulated), 1) expect_lt(kge_2009(under$observed, under$simulated), 1) expect_lt(kge_2012(under$observed, under$simulated), 1) }) test_that("scaling and variance scenarios separate variability penalties from pure mean bias", { scaling <- hydroeval_release_scenario("multiplicative_scaling_bias") variance <- hydroeval_release_scenario("variance_distortion_same_mean") expect_gt(rsd(scaling$observed, scaling$simulated), 1) expect_gt(nrmse(scaling$observed, scaling$simulated), 0) expect_gt(rsr(scaling$observed, scaling$simulated), 0) expect_gt(ubrmse(scaling$observed, scaling$simulated), 0) expect_lt(kge_2009(scaling$observed, scaling$simulated), 1) expect_lt(kge_2012(scaling$observed, scaling$simulated), 1) expect_equal(me(variance$observed, variance$simulated), 0, tolerance = 1e-12) expect_equal(pbias(variance$observed, variance$simulated), 0, tolerance = 1e-12) expect_false(isTRUE(all.equal(rsd(variance$observed, variance$simulated), 1, tolerance = 1e-12))) expect_lt(evs(variance$observed, variance$simulated), 1) expect_lt(ccc(variance$observed, variance$simulated), 1) expect_lt(kge_2009(variance$observed, variance$simulated), 1) expect_lt(kge_2012(variance$observed, variance$simulated), 1) }) test_that("high-correlation and ranking-degradation scenarios stay scientifically distinct", { high_corr <- hydroeval_release_scenario("high_correlation_imperfect_fit") ranking <- hydroeval_release_scenario("ranking_degradation") expect_gt(r(high_corr$observed, high_corr$simulated), 0.98) expect_gt(r_squared(high_corr$observed, high_corr$simulated), 0.95) expect_gt(mae(high_corr$observed, high_corr$simulated), 0) expect_lt(nse(high_corr$observed, high_corr$simulated), 1) expect_lt(d(high_corr$observed, high_corr$simulated), 1) expect_lt(rho(ranking$observed, ranking$simulated), 1) expect_lt(r(ranking$observed, ranking$simulated), 1) expect_true(rho(ranking$observed, ranking$simulated) < r(ranking$observed, ranking$simulated)) }) test_that("outlier contamination separates robust and outlier-sensitive error metrics", { scenario <- hydroeval_release_scenario("outlier_contamination") expect_gt(max_error(scenario$observed, scenario$simulated), mae(scenario$observed, scenario$simulated)) expect_gt(rmse(scenario$observed, scenario$simulated), mae(scenario$observed, scenario$simulated)) expect_true(medae(scenario$observed, scenario$simulated) <= mae(scenario$observed, scenario$simulated)) expect_gt(ubrmse(scenario$observed, scenario$simulated), 0) }) test_that("low-flow, zero-heavy, and negative-valued scenarios hit the right metric families", { low_flow <- hydroeval_release_scenario("low_flow_zero_heavy") negative_values_scenario <- hydroeval_release_scenario("negative_values") smape_expected <- 100 * mean(c( 0, 2 * 0.25 / 0.75, 2 * 0.5 / 2.5, 2 * 0.5 / 3.5 )) expect_equal(smape(low_flow$observed, low_flow$simulated), smape_expected, tolerance = 1e-12) expect_equal( pbias(low_flow$observed, low_flow$simulated), 100 * sum(low_flow$simulated - low_flow$observed) / sum(low_flow$observed), tolerance = 1e-12 ) expect_equal( wape(low_flow$observed, low_flow$simulated), 100 * sum(abs(low_flow$simulated - low_flow$observed)) / sum(abs(low_flow$observed)), tolerance = 1e-12 ) expect_equal( ve(low_flow$observed, low_flow$simulated), 1 - sum(abs(low_flow$simulated - low_flow$observed)) / sum(low_flow$observed), tolerance = 1e-12 ) expect_error( mape(low_flow$observed, low_flow$simulated), class = "hydroeval_metric_degeneracy", regexp = "zero_observed_values" ) expect_equal(mae(negative_values_scenario$observed, negative_values_scenario$simulated), 1, tolerance = 1e-12) expect_equal( r(negative_values_scenario$observed, negative_values_scenario$simulated), stats::cor(negative_values_scenario$observed, negative_values_scenario$simulated), tolerance = 1e-12 ) expect_equal(pbias(negative_values_scenario$observed, negative_values_scenario$simulated), 0, tolerance = 1e-12) expect_error( msle(negative_values_scenario$observed, negative_values_scenario$simulated), class = "hydroeval_metric_degeneracy", regexp = "negative_observed_values" ) expect_error( rmsle(negative_values_scenario$observed, negative_values_scenario$simulated), class = "hydroeval_metric_degeneracy", regexp = "negative_observed_values" ) }) test_that("degeneracy surfaces fire only for the relevant scalar metric families", { constant_observed <- hydroeval_release_scenario("constant_observed") constant_simulated <- hydroeval_release_scenario("constant_simulated") zero_observed_sum <- hydroeval_release_scenario("zero_observed_sum") zero_absolute_observed_sum <- hydroeval_release_scenario("zero_absolute_observed_sum") zero_observed_mean <- hydroeval_release_scenario("zero_observed_mean") zero_simulated_mean <- hydroeval_release_scenario("zero_simulated_mean") zero_simulated_mean_kge_2009_valid <- hydroeval_release_scenario("zero_simulated_mean_kge_2009_valid") for (metric_name in c("rae", "rrse", "nrmse", "evs", "rsr", "rsd", "nse")) { expect_error( hydroeval_release_call_wrapper(metric_name, constant_observed$observed, constant_observed$simulated), class = "hydroeval_metric_degeneracy", regexp = "constant_observed_series" ) } for (metric_name in c("r", "rho", "r_squared", "kge_2009", "kge_2012")) { expect_error( hydroeval_release_call_wrapper(metric_name, constant_simulated$observed, constant_simulated$simulated), class = "hydroeval_metric_degeneracy", regexp = "constant_simulated_series" ) } expect_true(is.finite(ccc(constant_simulated$observed, constant_simulated$simulated))) expect_error( pbias(zero_observed_sum$observed, zero_observed_sum$simulated), class = "hydroeval_metric_degeneracy", regexp = "zero_observed_sum" ) expect_error( ve(zero_observed_sum$observed, zero_observed_sum$simulated), class = "hydroeval_metric_degeneracy", regexp = "zero_observed_sum" ) expect_error( wape(zero_absolute_observed_sum$observed, zero_absolute_observed_sum$simulated), class = "hydroeval_metric_degeneracy", regexp = "zero_absolute_observed_sum" ) expect_error( kge_2012(zero_observed_mean$observed, zero_observed_mean$simulated), class = "hydroeval_metric_degeneracy", regexp = "zero_observed_mean" ) expect_error( kge_2012(zero_simulated_mean$observed, zero_simulated_mean$simulated), class = "hydroeval_metric_degeneracy", regexp = "zero_simulated_mean" ) expect_equal( kge_2009( zero_simulated_mean_kge_2009_valid$observed, zero_simulated_mean_kge_2009_valid$simulated ), 0, tolerance = 1e-12 ) }) test_that("single-point inputs fail clearly for released wrapper contracts", { scenario <- hydroeval_release_scenario("single_point") for (metric_name in c("mae", "nrmse", "rho", "kge_2012")) { expect_error( hydroeval_release_call_wrapper(metric_name, scenario$observed, scenario$simulated), class = "hydroeval_validation_error", regexp = "Need at least 2 complete paired observations" ) } }) test_that("large-magnitude and very-small-magnitude scaling stays numerically stable", { base <- hydroeval_release_scenario("positive_reference") large <- hydroeval_release_scenario("large_magnitude") small <- hydroeval_release_scenario("very_small_magnitude") expect_equal(nrmse(base$observed, base$simulated), nrmse(large$observed, large$simulated), tolerance = 1e-12) expect_equal(nrmse(base$observed, base$simulated), nrmse(small$observed, small$simulated), tolerance = 1e-12) expect_equal(r(base$observed, base$simulated), r(large$observed, large$simulated), tolerance = 1e-12) expect_equal(r(base$observed, base$simulated), r(small$observed, small$simulated), tolerance = 1e-12) expect_equal(pbias(base$observed, base$simulated), pbias(large$observed, large$simulated), tolerance = 1e-12) expect_equal(pbias(base$observed, base$simulated), pbias(small$observed, small$simulated), tolerance = 1e-12) expect_equal(rmse(large$observed, large$simulated), rmse(base$observed, base$simulated) * 1e7, tolerance = 1e-4) expect_true( isTRUE( all.equal( rmse(small$observed, small$simulated), rmse(base$observed, base$simulated) * 1e-9, tolerance = 1e-15 ) ) ) })