new_wave_public_metrics <- c( "mae", "rmse", "pbias", "nse", "kge_2009", "me", "medae", "mse", "sse", "sae", "max_error", "rae", "rrse", "nrmse", "r_squared", "mape", "smape", "wape", "msle", "rmsle", "evs", "r", "rho", "ccc", "rsr", "ubrmse", "ve", "d", "d_r", "rsd", "kge_2012" ) new_wave_display_labels <- c( mae = "MAE", rmse = "RMSE", pbias = "PBIAS (%)", nse = "NSE", kge_2009 = "KGE (2009)", me = "ME", medae = "MedAE", mse = "MSE", sse = "SSE", sae = "SAE", max_error = "Max Error", rae = "RAE", rrse = "RRSE", nrmse = "NRMSE", r_squared = "R²", mape = "MAPE", smape = "sMAPE", wape = "WAPE", msle = "MSLE", rmsle = "RMSLE", evs = "EVS", r = "r", rho = "ρ", ccc = "CCC", rsr = "RSR", ubrmse = "ubRMSE", ve = "VE", d = "d", d_r = "dr", rsd = "rSD", kge_2012 = "KGE (2012)" ) new_wave_metric_wrappers <- mget(new_wave_public_metrics, inherits = TRUE) test_that("public surface and exports match the approved live metric wave", { public_metrics <- .hydroeval_registry_view( include_non_public = FALSE, status = "planned" ) namespace_exports <- .hydroeval_test_namespace_exports() expect_setequal(public_metrics$canonical_id, new_wave_public_metrics) expect_setequal(namespace_exports, c(new_wave_public_metrics, "gof", "gof_compare")) expect_false("kge_2021" %in% namespace_exports) expect_false("kge" %in% namespace_exports) expect_false("bias" %in% namespace_exports) }) test_that("implemented public metrics keep locked scientific display labels", { public_metrics <- .hydroeval_registry_view( include_non_public = FALSE, status = "planned" ) label_index <- match(names(new_wave_display_labels), public_metrics$canonical_id) expect_false(anyNA(label_index)) expect_false(anyNA(public_metrics$display_label)) expect_true(all(nzchar(public_metrics$display_label))) expect_identical( stats::setNames( public_metrics$display_label[label_index], public_metrics$canonical_id[label_index] ), new_wave_display_labels ) expect_identical(anyDuplicated(public_metrics$display_label), 0L) }) test_that("high-risk labels remain exact and variants stay explicit", { public_metrics <- .hydroeval_registry_view( include_non_public = FALSE, status = "planned" ) label_lookup <- stats::setNames(public_metrics$display_label, public_metrics$canonical_id) expect_identical(label_lookup[["pbias"]], "PBIAS (%)") expect_identical(label_lookup[["nrmse"]], "NRMSE") expect_identical(label_lookup[["kge_2009"]], "KGE (2009)") expect_identical(label_lookup[["kge_2012"]], "KGE (2012)") expect_identical(label_lookup[["r_squared"]], "R²") expect_identical(label_lookup[["rho"]], "ρ") expect_identical(label_lookup[["ubrmse"]], "ubRMSE") expect_identical(label_lookup[["rsd"]], "rSD") expect_identical(label_lookup[["r"]], "r") expect_identical(label_lookup[["d"]], "d") expect_false(identical(label_lookup[["kge_2009"]], "KGE")) expect_false(identical(label_lookup[["kge_2012"]], "KGE")) expect_false(identical(label_lookup[["pbias"]], "Bias")) expect_false(identical(label_lookup[["kge_2009"]], label_lookup[["kge_2012"]])) }) test_that("live public metrics keep provenance closure and scalar reference roles", { public_metrics <- .hydroeval_registry_view( include_non_public = FALSE, status = "planned" ) bibliography <- readLines( .hydroeval_test_reference_path(), warn = FALSE ) bib_keys <- sub( "^@(?:article|book|manual|misc|incollection|inproceedings)\\{([^,]+),.*$", "\\1", grep( "^@(?:article|book|manual|misc|incollection|inproceedings)\\{", bibliography, value = TRUE ) ) expect_true(all(public_metrics$reference_role %in% .hydroeval_registry_allowed_reference_role)) expect_false(anyNA(public_metrics$reference_role)) for (index in seq_len(nrow(public_metrics))) { provenance <- .hydroeval_get_metric_provenance( public_metrics$provenance_key[[index]] ) expect_true(length(provenance$primary_refs) >= 1L) expect_true(all(provenance$primary_refs %in% bib_keys)) expect_true(all(provenance$interpretive_refs %in% bib_keys)) } }) test_that("batch plan keeps registry display labels for future presentation output", { plan <- .hydroeval_build_metric_batch_plan( observed = c(1, 2, 3, 4), simulated = c(2, 2, 4, 4), metrics = c("r_squared", "rho", "kge_2012") ) metric_labels <- stats::setNames(plan$metrics$display_label, plan$metrics$canonical_id) schema_index <- match("display_label", plan$result_schema$column) expect_false(anyNA(plan$metrics$display_label)) expect_identical(metric_labels[["r_squared"]], "R²") expect_identical(metric_labels[["rho"]], "ρ") expect_identical(metric_labels[["kge_2012"]], "KGE (2012)") expect_false(is.na(schema_index)) expect_identical(plan$result_schema$type[[schema_index]], "character") expect_match(plan$result_schema$meaning[[schema_index]], "label") }) test_that("new wave wrappers return unnamed scalar doubles on success", { observed <- c(1, 2, 3, 4) simulated <- c(2, 2, 4, 4) for (metric_name in setdiff(new_wave_public_metrics, c("mae", "rmse", "pbias", "nse", "kge_2009"))) { value <- new_wave_metric_wrappers[[metric_name]](observed, simulated) expect_type(value, "double") expect_length(value, 1L) expect_null(names(value)) expect_false(is.na(value)) } }) test_that("batch 1 and batch 2 metrics compute exact known values", { observed <- c(1, 2, 3, 4) simulated <- c(2, 2, 4, 4) expect_equal(me(observed, simulated), 0.5, tolerance = 1e-12) expect_equal(medae(observed, simulated), 0.5, tolerance = 1e-12) expect_equal(mse(observed, simulated), 0.5, tolerance = 1e-12) expect_equal(sse(observed, simulated), 2, tolerance = 1e-12) expect_equal(sae(observed, simulated), 2, tolerance = 1e-12) expect_equal(max_error(observed, simulated), 1, tolerance = 1e-12) expect_equal(rae(observed, simulated), 0.5, tolerance = 1e-12) expect_equal(rrse(observed, simulated), sqrt(2 / 5), tolerance = 1e-12) expect_equal(nrmse(observed, simulated), sqrt(0.5) / 3, tolerance = 1e-12) expect_equal(r_squared(observed, simulated), 0.8, tolerance = 1e-12) }) test_that("batch 3, batch 4, and batch 5 metrics compute exact known values", { observed <- c(1, 2, 3, 4) simulated <- c(2, 2, 4, 4) msle_expected <- mean((log1p(simulated) - log1p(observed))^2) rmsle_expected <- sqrt(msle_expected) r_expected <- stats::cor(observed, simulated, method = "pearson") rho_expected <- stats::cor(observed, simulated, method = "spearman") ccc_expected <- 32 / 39 kge_2012_expected <- 1 - sqrt( (stats::cor(observed, simulated) - 1)^2 + (mean(simulated) / mean(observed) - 1)^2 + (((stats::sd(simulated) / mean(simulated)) / (stats::sd(observed) / mean(observed))) - 1)^2 ) expect_equal(mape(observed, simulated), 100 / 3, tolerance = 1e-12) expect_equal(smape(observed, simulated), 500 / 21, tolerance = 1e-12) expect_equal(wape(observed, simulated), 20, tolerance = 1e-12) expect_equal(msle(observed, simulated), msle_expected, tolerance = 1e-12) expect_equal(rmsle(observed, simulated), rmsle_expected, tolerance = 1e-12) expect_equal(evs(observed, simulated), 0.8, tolerance = 1e-12) expect_equal(r(observed, simulated), r_expected, tolerance = 1e-12) expect_equal(rho(observed, simulated), rho_expected, tolerance = 1e-12) expect_equal(ccc(observed, simulated), ccc_expected, tolerance = 1e-12) expect_equal(rsr(observed, simulated), sqrt(3 / 10), tolerance = 1e-12) expect_equal(ubrmse(observed, simulated), 0.5, tolerance = 1e-12) expect_equal(ve(observed, simulated), 0.8, tolerance = 1e-12) expect_equal(d(observed, simulated), 8 / 9, tolerance = 1e-12) expect_equal(d_r(observed, simulated), 0.75, tolerance = 1e-12) expect_equal(rsd(observed, simulated), sqrt(4 / 5), tolerance = 1e-12) expect_equal(kge_2012(observed, simulated), kge_2012_expected, tolerance = 1e-12) }) test_that("new wave metrics return ideal values under perfect agreement", { observed <- c(1, 2, 3, 4) simulated <- c(1, 2, 3, 4) expect_equal(me(observed, simulated), 0, tolerance = 1e-12) expect_equal(medae(observed, simulated), 0, tolerance = 1e-12) expect_equal(mse(observed, simulated), 0, tolerance = 1e-12) expect_equal(sse(observed, simulated), 0, tolerance = 1e-12) expect_equal(sae(observed, simulated), 0, tolerance = 1e-12) expect_equal(max_error(observed, simulated), 0, tolerance = 1e-12) expect_equal(rae(observed, simulated), 0, tolerance = 1e-12) expect_equal(rrse(observed, simulated), 0, tolerance = 1e-12) expect_equal(nrmse(observed, simulated), 0, tolerance = 1e-12) expect_equal(r_squared(observed, simulated), 1, tolerance = 1e-12) expect_equal(mape(observed, simulated), 0, tolerance = 1e-12) expect_equal(smape(observed, simulated), 0, tolerance = 1e-12) expect_equal(wape(observed, simulated), 0, tolerance = 1e-12) expect_equal(msle(observed, simulated), 0, tolerance = 1e-12) expect_equal(rmsle(observed, simulated), 0, tolerance = 1e-12) expect_equal(evs(observed, simulated), 1, tolerance = 1e-12) expect_equal(r(observed, simulated), 1, tolerance = 1e-12) expect_equal(rho(observed, simulated), 1, tolerance = 1e-12) expect_equal(ccc(observed, simulated), 1, tolerance = 1e-12) expect_equal(rsr(observed, simulated), 0, tolerance = 1e-12) expect_equal(ubrmse(observed, simulated), 0, tolerance = 1e-12) expect_equal(ve(observed, simulated), 1, tolerance = 1e-12) expect_equal(d(observed, simulated), 1, tolerance = 1e-12) expect_equal(d_r(observed, simulated), 1, tolerance = 1e-12) expect_equal(rsd(observed, simulated), 1, tolerance = 1e-12) expect_equal(kge_2012(observed, simulated), 1, tolerance = 1e-12) }) test_that("new wave wrappers honor NA omit and fail policies", { observed <- c(1, NA, 2, 3, 4) simulated <- c(2, 99, 2, 4, 4) kept_observed <- c(1, 2, 3, 4) kept_simulated <- c(2, 2, 4, 4) for (metric_name in setdiff(new_wave_public_metrics, c("mae", "rmse", "pbias", "nse", "kge_2009"))) { expect_equal( new_wave_metric_wrappers[[metric_name]]( observed = observed, simulated = simulated, na_policy = "omit" ), new_wave_metric_wrappers[[metric_name]]( observed = kept_observed, simulated = kept_simulated, na_policy = "omit" ), tolerance = 1e-12 ) expect_error( new_wave_metric_wrappers[[metric_name]]( observed = observed, simulated = simulated, na_policy = "fail" ), class = "hydroeval_validation_error", regexp = "Missing `NA` values are not allowed" ) } }) test_that("constant observed degeneracy is explicit where required", { observed <- c(3, 3, 3, 3) simulated <- c(2, 3, 4, 5) for (metric_name in c("rae", "rrse", "nrmse", "evs", "rsr", "rsd")) { expect_error( new_wave_metric_wrappers[[metric_name]](observed, simulated), class = "hydroeval_metric_degeneracy", regexp = "constant_observed_series" ) } }) test_that("correlation-style degeneracy is explicit for constant series", { observed <- c(1, 2, 3, 4) constant_simulated <- c(2, 2, 2, 2) for (metric_name in c("r", "rho", "r_squared")) { expect_error( new_wave_metric_wrappers[[metric_name]](observed, constant_simulated), class = "hydroeval_metric_degeneracy", regexp = "constant_simulated_series" ) } }) test_that("mape, wape, msle, and rmsle reject approved degeneracy cases", { expect_error( mape(c(0, 1, 2), c(1, 1, 2)), class = "hydroeval_metric_degeneracy", regexp = "zero_observed_values" ) expect_error( wape(c(0, 0, 0), c(1, 2, 3)), class = "hydroeval_metric_degeneracy", regexp = "zero_absolute_observed_sum" ) expect_error( msle(c(-1, 1, 2), c(0, 1, 2)), class = "hydroeval_metric_degeneracy", regexp = "negative_observed_values" ) expect_error( rmsle(c(1, 2, 3), c(0, -1, 2)), class = "hydroeval_metric_degeneracy", regexp = "negative_simulated_values" ) }) test_that("ccc, d, and d_r reject denominator-collapse degeneracy", { constant_pair <- c(2, 2, 2) expect_error( ccc(constant_pair, constant_pair), class = "hydroeval_metric_degeneracy", regexp = "undefined_concordance_denominator" ) expect_error( d(constant_pair, constant_pair), class = "hydroeval_metric_degeneracy", regexp = "undefined_agreement_denominator" ) expect_error( d_r(constant_pair, constant_pair), class = "hydroeval_metric_degeneracy", regexp = "undefined_refined_agreement_denominator" ) }) test_that("kge_2012 rejects undefined component cases explicitly", { expect_error( kge_2012(c(-1, 0, 1), c(0, 1, 2)), class = "hydroeval_metric_degeneracy", regexp = "zero_observed_mean" ) expect_error( kge_2012(c(1, 2, 3), c(-1, 0, 1)), class = "hydroeval_metric_degeneracy", regexp = "zero_simulated_mean" ) expect_error( kge_2012(c(1, 2, 3), c(5, 5, 5)), class = "hydroeval_metric_degeneracy", regexp = "constant_simulated_series" ) }) test_that("scenario tests lock tricky semantic choices", { observed <- c(1, 2, 3, 4) simulated <- c(2, 2, 4, 4) smape_observed <- c(0, 2, 4) smape_simulated <- c(0, 1, 5) ccc_observed <- c(1, 2, 3) ccc_simulated <- c(2, 2, 2) d_r_branch_two_observed <- c(1, 2, 3, 4) d_r_branch_two_simulated <- c(10, 10, 10, 10) kge_observed <- c(1, 2, 3, 4) kge_simulated <- c(1, 3, 2, 5) expect_equal(r_squared(observed, simulated), 0.8, tolerance = 1e-12) expect_equal(nse(observed, simulated), 0.6, tolerance = 1e-12) expect_false(isTRUE(all.equal(r_squared(observed, simulated), nse(observed, simulated)))) expect_equal( nrmse(c(10, 20, 30, 40), c(10, 20, 30, 50)), 1 / 6, tolerance = 1e-12 ) expect_equal( smape(smape_observed, smape_simulated), 800 / 27, tolerance = 1e-12 ) expect_equal(ccc(ccc_observed, ccc_simulated), 0, tolerance = 1e-12) expect_equal(d_r(observed, simulated), 0.75, tolerance = 1e-12) expect_equal(d_r(d_r_branch_two_observed, d_r_branch_two_simulated), 11 / 15, tolerance = 1e-12) expect_false( isTRUE( all.equal( kge_2009(kge_observed, kge_simulated), kge_2012(kge_observed, kge_simulated), tolerance = 1e-12 ) ) ) }) test_that("high-risk metrics follow their locked component formulas", { observed <- c(10, 20, 30, 40) simulated <- c(12, 18, 33, 39) rmse_expected <- sqrt(mean((simulated - observed)^2)) rsr_expected <- rmse_expected / stats::sd(observed) d_expected <- 1 - sum((simulated - observed)^2) / sum((abs(simulated - mean(observed)) + abs(observed - mean(observed)))^2) kge_2012_expected <- 1 - sqrt( (stats::cor(observed, simulated) - 1)^2 + (mean(simulated) / mean(observed) - 1)^2 + (((stats::sd(simulated) / mean(simulated)) / (stats::sd(observed) / mean(observed))) - 1)^2 ) expect_equal(nrmse(observed, simulated), rmse_expected / 30, tolerance = 1e-12) expect_equal(rsr(observed, simulated), rsr_expected, tolerance = 1e-12) expect_equal(d(observed, simulated), d_expected, tolerance = 1e-12) expect_equal(kge_2012(observed, simulated), kge_2012_expected, tolerance = 1e-12) })