gof_default_metric_ids <- c(
  "me",
  "mae",
  "mse",
  "rmse",
  "nrmse",
  "pbias",
  "rsr",
  "rsd",
  "nse",
  "d",
  "d_r",
  "r",
  "r_squared",
  "ve",
  "kge_2009",
  "kge_2012"
)

gof_all_metric_ids <- c(
  "mae", "rmse", "pbias", "nse", "kge_2009",
  "me", "medae", "mse", "sse", "sae", "max_error",
  "rae", "rrse", "nrmse", "r_squared",
  "mape", "smape", "wape", "msle", "rmsle", "evs",
  "r", "rho", "ccc",
  "rsr", "ubrmse", "ve", "d", "d_r", "rsd", "kge_2012"
)

gof_default_display_labels <- c(
  "ME",
  "MAE",
  "MSE",
  "RMSE",
  "NRMSE",
  "PBIAS (%)",
  "RSR",
  "rSD",
  "NSE",
  "d",
  "dr",
  "r",
  "R\u00B2",
  "VE",
  "KGE (2009)",
  "KGE (2012)"
)

test_that("gof default metric selection uses the locked registry-driven set", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4)
  )

  expect_s3_class(summary, "hydroeval_gof")
  expect_identical(summary$metric_ids, gof_default_metric_ids)
  expect_identical(summary$display_labels, gof_default_display_labels)
  expect_identical(summary$request$metrics, "default")
  expect_identical(summary$request$resolved_metric_ids, gof_default_metric_ids)
  expect_identical(summary$request$default_metric_ids, gof_default_metric_ids)
  expect_identical(summary$request$selection_mode, "gof_default")
})

test_that("gof all returns the full implemented public registry surface", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = "all"
  )

  expect_s3_class(summary, "hydroeval_gof")
  expect_gt(length(summary$metric_ids), length(gof_default_metric_ids))
  expect_identical(summary$metric_ids, gof_all_metric_ids)
  expect_identical(
    summary$metric_ids,
    .hydroeval_registry_view(
      include_non_public = FALSE,
      status = "planned"
    )$canonical_id
  )
  expect_identical(summary$request$metrics, "all")
  expect_identical(summary$request$resolved_metric_ids, gof_all_metric_ids)
  expect_identical(summary$request$default_metric_ids, gof_default_metric_ids)
  expect_identical(summary$request$selection_mode, "gof_all")
})

test_that("gof all ordering is stable and excludes deferred or non-public metrics", {
  summary_one <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = "all"
  )
  summary_two <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = "all"
  )

  expect_identical(summary_one$metric_ids, summary_two$metric_ids)
  expect_false("kge_2021" %in% summary_one$metric_ids)
  expect_false(anyNA(summary_one$display_labels))
  expect_true(all(nzchar(summary_one$display_labels)))
})

test_that("gof explicit metric subset selection works through canonical registry IDs", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = c("pbias", "rho", "kge_2012")
  )

  expect_identical(summary$metric_ids, c("pbias", "rho", "kge_2012"))
  expect_identical(summary$display_labels, c("PBIAS (%)", "\u03C1", "KGE (2012)"))
  expect_identical(summary$request$metrics, c("pbias", "rho", "kge_2012"))
  expect_identical(summary$request$selection_mode, "gof_explicit")
})

test_that("gof output object is stable and programmatically usable", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = c("mae", "nse", "kge_2009")
  )

  expect_named(summary, c("values", "metric_ids", "display_labels", "request"))
  expect_type(summary$values, "double")
  expect_identical(names(summary$values), summary$metric_ids)
  expect_length(summary$values, 3L)
  expect_identical(summary$display_labels, c("MAE", "NSE", "KGE (2009)"))
  expect_identical(summary$request$na_policy, "omit")
})

test_that("gof print uses scientific display labels rather than canonical IDs", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = c("pbias", "nrmse", "kge_2009", "kge_2012", "r_squared", "rho")
  )
  printed <- paste(capture.output(print(summary)), collapse = "\n")

  expect_match(printed, "PBIAS \\(%\\)")
  expect_match(printed, "NRMSE")
  expect_match(printed, "KGE \\(2009\\)")
  expect_match(printed, "KGE \\(2012\\)")
  expect_match(printed, "R\u00B2")
  expect_match(printed, "\u03C1")
  expect_no_match(printed, "pbias")
  expect_no_match(printed, "nrmse")
  expect_no_match(printed, "kge_2009")
  expect_no_match(printed, "kge_2012")
  expect_no_match(printed, "r_squared")
  expect_no_match(printed, "rho")
})

test_that("gof print renders as a one-column matrix style comparison view", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = c("mae", "rmse", "nse")
  )
  printed <- capture.output(print(summary))

  expect_true(length(printed) >= 4L)
  expect_match(printed[[1L]], "value")
  expect_match(printed[[2L]], "MAE")
  expect_match(printed[[3L]], "RMSE")
  expect_match(printed[[4L]], "NSE")
})

test_that("gof all print uses full-surface display labels", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = "all"
  )
  printed <- paste(capture.output(print(summary)), collapse = "\n")

  expect_match(printed, "PBIAS \\(%\\)")
  expect_match(printed, "NRMSE")
  expect_match(printed, "KGE \\(2009\\)")
  expect_match(printed, "KGE \\(2012\\)")
  expect_match(printed, "R\u00B2")
  expect_match(printed, "\u03C1")
  expect_no_match(printed, "kge_2021")
})

test_that("gof perfect-fit printing avoids unnecessary decimal padding", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(1, 2, 3, 4),
    metrics = c("me", "mae", "nse", "kge_2009")
  )
  printed <- paste(capture.output(print(summary)), collapse = "\n")

  expect_match(printed, "\\b0\\b")
  expect_match(printed, "\\b1\\b")
  expect_no_match(printed, "0\\.000000")
  expect_no_match(printed, "1\\.000000")
})

test_that("as.matrix.hydroeval_gof returns one-column matrix with display labels", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = c("pbias", "rho", "kge_2012")
  )
  matrix_view <- as.matrix(summary)

  expect_true(is.matrix(matrix_view))
  expect_identical(dim(matrix_view), c(3L, 1L))
  expect_identical(colnames(matrix_view), "value")
  expect_identical(rownames(matrix_view), summary$display_labels)
  expect_equal(unname(matrix_view[, 1L]), unname(summary$values), tolerance = 1e-12)
})

test_that("as.data.frame.hydroeval_gof returns programmatic metric and value columns", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = c("mae", "nse", "kge_2009")
  )
  data_view <- as.data.frame(summary)

  expect_identical(names(data_view), c("metric", "value"))
  expect_identical(data_view$metric, summary$metric_ids)
  expect_type(data_view$value, "double")
  expect_equal(data_view$value, unname(summary$values), tolerance = 1e-12)
})

test_that("gof print and coercion helpers do not round or mutate stored values", {
  summary <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(2, 2, 4, 4),
    metrics = c("rmse", "nrmse", "r_squared")
  )
  values_before <- summary$values

  capture.output(print(summary))
  matrix_view <- as.matrix(summary)
  data_view <- as.data.frame(summary)

  expect_identical(summary$values, values_before)
  expect_type(summary$values, "double")
  expect_equal(unname(matrix_view[, 1L]), unname(values_before), tolerance = 1e-12)
  expect_equal(data_view$value, unname(values_before), tolerance = 1e-12)
})

test_that("gof known-value scenario returns expected row values", {
  observed <- c(1, 2, 3, 4)
  simulated <- c(2, 2, 4, 4)
  correlation <- stats::cor(observed, simulated)
  alpha <- stats::sd(simulated) / stats::sd(observed)
  beta <- mean(simulated) / mean(observed)
  gamma <- (stats::sd(simulated) / mean(simulated)) /
    (stats::sd(observed) / mean(observed))

  summary <- gof(observed = observed, simulated = simulated)

  expect_equal(
    summary$values,
    c(
      me = 0.5,
      mae = 0.5,
      mse = 0.5,
      rmse = sqrt(0.5),
      nrmse = sqrt(0.5) / 3,
      pbias = 20,
      rsr = sqrt(3 / 10),
      rsd = sqrt(4 / 5),
      nse = 0.6,
      d = 8 / 9,
      d_r = 0.75,
      r = correlation,
      r_squared = 0.8,
      ve = 0.8,
      kge_2009 = 1 - sqrt((correlation - 1)^2 + (alpha - 1)^2 + (beta - 1)^2),
      kge_2012 = 1 - sqrt((correlation - 1)^2 + (beta - 1)^2 + (gamma - 1)^2)
    ),
    tolerance = 1e-12
  )
})

test_that("gof invalid metric requests fail clearly", {
  expect_error(
    gof(
      observed = c(1, 2, 3, 4),
      simulated = c(2, 2, 4, 4),
      metrics = c("mae", "not_a_metric")
    ),
    class = "hydroeval_metric_plan_error",
    regexp = "not_a_metric"
  )
})

test_that("gof rejects mixed sentinel selectors clearly", {
  expect_error(
    gof(
      observed = c(1, 2, 3, 4),
      simulated = c(2, 2, 4, 4),
      metrics = c("all", "mae")
    ),
    class = "hydroeval_metric_plan_error",
    regexp = "mixture"
  )

  expect_error(
    gof(
      observed = c(1, 2, 3, 4),
      simulated = c(2, 2, 4, 4),
      metrics = c("default", "mae")
    ),
    class = "hydroeval_metric_plan_error",
    regexp = "mixture"
  )
})