gof <- hydroeval_release_get_export("gof") gof_compare <- hydroeval_release_get_export("gof_compare") test_that("gof_compare stays numerically aligned with standalone gof across researcher dataset collections", { calibration_sets <- hydroeval_release_dataset_collection("calibration_validation_test") stressed_sets <- hydroeval_release_dataset_collection("baseline_calibrated_stressed") calibration_compare <- gof_compare(calibration_sets, metrics = c("mae", "nse", "kge_2009")) stressed_compare <- gof_compare(stressed_sets) expect_s3_class(calibration_compare, "hydroeval_gof_compare") expect_identical(colnames(calibration_compare), c("calibration", "validation", "test")) expect_identical(rownames(calibration_compare), c("MAE", "NSE", "KGE (2009)")) expect_identical(attr(calibration_compare, "request")$datasets, c("calibration", "validation", "test")) expect_identical(attr(calibration_compare, "request")$metrics, c("mae", "nse", "kge_2009")) for (dataset_name in names(calibration_sets)) { expected <- as.matrix( gof( observed = calibration_sets[[dataset_name]]$obs, simulated = calibration_sets[[dataset_name]]$sim, metrics = c("mae", "nse", "kge_2009") ) )[, 1L] expect_equal( unname(calibration_compare[, dataset_name]), unname(expected), tolerance = 1e-12 ) } expect_s3_class(stressed_compare, "hydroeval_gof_compare") expect_identical(colnames(stressed_compare), c("baseline", "calibrated", "stressed")) expect_identical(attr(stressed_compare, "request")$datasets, c("baseline", "calibrated", "stressed")) hydroeval_release_expect_gof_compare_views_agree(calibration_compare) hydroeval_release_expect_gof_compare_views_agree(stressed_compare) }) test_that("gof_compare supports default, all, explicit metrics and preserves dataset names in output", { datasets <- hydroeval_release_dataset_collection("baseline_calibrated_stressed") default_compare <- gof_compare(datasets) all_compare <- gof_compare(datasets, metrics = "all") explicit_compare <- gof_compare(datasets, metrics = c("pbias", "rho", "kge_2012")) explicit_data <- as.data.frame(explicit_compare) expect_identical(attr(default_compare, "metric_ids"), hydroeval_release_gof_default_metric_ids) expect_identical(attr(all_compare, "metric_ids"), hydroeval_release_gof_all_metric_ids) expect_identical(attr(explicit_compare, "metric_ids"), c("pbias", "rho", "kge_2012")) expect_identical(rownames(explicit_compare), c("PBIAS (%)", "\u03C1", "KGE (2012)")) expect_identical(colnames(explicit_compare), names(datasets)) expect_identical(names(explicit_data), c("metric", "display_label", "baseline", "calibrated", "stressed")) }) test_that("gof_compare print, matrix, and data-frame views agree without mutating stored values", { datasets <- hydroeval_release_dataset_collection("baseline_calibrated_stressed") comparison <- gof_compare(datasets, metrics = c("pbias", "nrmse", "kge_2009")) values_before <- unclass(comparison) printed <- paste(capture.output(print(comparison)), collapse = "\n") hydroeval_release_expect_gof_compare_views_agree(comparison) expect_match(printed, "baseline") expect_match(printed, "calibrated") expect_match(printed, "stressed") expect_match(printed, "PBIAS \\(%\\)") expect_match(printed, "NRMSE") expect_match(printed, "KGE \\(2009\\)") expect_identical(unclass(comparison), values_before) }) test_that("gof_compare forwards na_policy and degeneracy failures through gof without relaxing contracts", { missing <- hydroeval_release_scenario("missing_omit") kept_dataset <- list( obs = missing$kept_observed, sim = missing$kept_simulated ) comparison <- gof_compare( datasets = list( omitted = list(obs = missing$observed, sim = missing$simulated), kept = kept_dataset ), metrics = c("mae", "nse", "kge_2009"), na_policy = "omit" ) constant_observed <- hydroeval_release_scenario("constant_observed") single_point <- hydroeval_release_scenario("single_point") expect_equal( unname(comparison[, "omitted"]), unname(comparison[, "kept"]), tolerance = 1e-12 ) expect_error( gof_compare( datasets = list( omitted = list(obs = missing$observed, sim = missing$simulated) ), metrics = c("mae", "nse", "kge_2009"), na_policy = "fail" ), class = "hydroeval_validation_error", regexp = "Missing `NA` values are not allowed" ) expect_error( gof_compare( datasets = list( bad = list(obs = constant_observed$observed, sim = constant_observed$simulated) ), metrics = c("nse") ), class = "hydroeval_metric_degeneracy", regexp = "constant_observed_series" ) expect_error( gof_compare( datasets = list( bad = list(obs = single_point$observed, sim = single_point$simulated) ) ), class = "hydroeval_validation_error", regexp = "Need at least 2 complete paired observations" ) }) test_that("gof_compare rejects malformed dataset structures and naming failures clearly", { valid_dataset <- hydroeval_release_dataset_collection("baseline_calibrated_stressed")$baseline expect_error( gof_compare(list()), class = "hydroeval_compare_error", regexp = "non-empty named list" ) expect_error( gof_compare(list(list(obs = valid_dataset$obs, sim = valid_dataset$sim))), class = "hydroeval_compare_error", regexp = "non-empty names" ) expect_error( gof_compare( structure( list( list(obs = valid_dataset$obs, sim = valid_dataset$sim), list(obs = valid_dataset$obs, sim = valid_dataset$sim) ), names = c("dup", "dup") ) ), class = "hydroeval_compare_error", regexp = "Duplicate names" ) expect_error( gof_compare( list( wrong = list(observed = valid_dataset$obs, simulated = valid_dataset$sim) ) ), class = "hydroeval_compare_error", regexp = "exactly the components `obs` and `sim`" ) expect_error( gof_compare( list( extra = list(obs = valid_dataset$obs, sim = valid_dataset$sim, meta = "x") ) ), class = "hydroeval_compare_error", regexp = "exactly the components `obs` and `sim`" ) expect_error( gof_compare( list( bad = valid_dataset$obs ) ), class = "hydroeval_compare_error", regexp = "must be a list containing exactly `obs` and `sim`" ) })