test_that("gof_compare combines multiple dataset summaries into a matrix subclass", {
  datasets <- list(
    baseline = list(
      obs = c(1, 2, 3, 4),
      sim = c(1.2, 1.9, 3.1, 3.8)
    ),
    calibrated = list(
      obs = c(1, 2, 3, 4),
      sim = c(1.1, 2.0, 2.9, 4.1)
    ),
    stressed = list(
      obs = c(1, 2, 3, 4),
      sim = c(2.0, 1.5, 4.0, 3.5)
    )
  )

  comparison <- gof_compare(
    datasets = datasets,
    metrics = c("mae", "nse", "kge_2009")
  )

  expected <- cbind(
    baseline = as.matrix(gof(datasets$baseline$obs, datasets$baseline$sim, metrics = c("mae", "nse", "kge_2009")))[, 1L],
    calibrated = as.matrix(gof(datasets$calibrated$obs, datasets$calibrated$sim, metrics = c("mae", "nse", "kge_2009")))[, 1L],
    stressed = as.matrix(gof(datasets$stressed$obs, datasets$stressed$sim, metrics = c("mae", "nse", "kge_2009")))[, 1L]
  )

  expect_s3_class(comparison, "hydroeval_gof_compare")
  expect_true(is.matrix(comparison))
  expect_identical(dim(comparison), c(3L, 3L))
  expect_identical(colnames(comparison), names(datasets))
  expect_identical(rownames(comparison), c("MAE", "NSE", "KGE (2009)"))
  expect_equal(unname(comparison[, ]), unname(expected[, ]), tolerance = 1e-12)
  expect_identical(attr(comparison, "metric_ids"), c("mae", "nse", "kge_2009"))
  expect_identical(attr(comparison, "display_labels"), c("MAE", "NSE", "KGE (2009)"))
  expect_identical(attr(comparison, "request")$datasets, names(datasets))
  expect_identical(attr(comparison, "request")$metrics, c("mae", "nse", "kge_2009"))
  expect_identical(attr(comparison, "request")$selection_mode, "gof_explicit")
})

test_that("gof_compare preserves dataset order and scientific row labels", {
  datasets <- list(
    zeta = list(
      obs = c(1, 2, 3, 4),
      sim = c(1, 2, 3, 4)
    ),
    alpha = list(
      obs = c(1, 2, 3, 4),
      sim = c(2, 2, 4, 4)
    )
  )

  comparison <- gof_compare(datasets = datasets, metrics = c("pbias", "rho", "kge_2012"))

  expect_identical(colnames(comparison), c("zeta", "alpha"))
  expect_identical(rownames(comparison), c("PBIAS (%)", "\u03C1", "KGE (2012)"))
})

test_that("as.matrix.hydroeval_gof_compare returns plain numeric matrix view", {
  comparison <- gof_compare(
    datasets = list(
      baseline = list(
        obs = c(1, 2, 3, 4),
        sim = c(1.2, 1.9, 3.1, 3.8)
      ),
      calibrated = list(
        obs = c(1, 2, 3, 4),
        sim = c(1.1, 2.0, 2.9, 4.1)
      )
    ),
    metrics = c("mae", "nse")
  )

  matrix_view <- as.matrix(comparison)

  expect_true(is.matrix(matrix_view))
  expect_false(inherits(matrix_view, "hydroeval_gof_compare"))
  expect_identical(dimnames(matrix_view), dimnames(comparison))
  expect_null(attr(matrix_view, "metric_ids", exact = TRUE))
  expect_null(attr(matrix_view, "display_labels", exact = TRUE))
  expect_null(attr(matrix_view, "request", exact = TRUE))
  expect_equal(unname(matrix_view[, ]), unname(comparison[, ]), tolerance = 1e-12)
})

test_that("gof_compare metric alignment validation fails clearly", {
  baseline <- gof(
    observed = c(1, 2, 3, 4),
    simulated = c(1.2, 1.9, 3.1, 3.8),
    metrics = c("mae", "nse")
  )
  mismatched <- baseline
  mismatched$metric_ids <- c("nse", "mae")

  expect_error(
    .hydroeval_combine_gof_summaries(
      summaries = list(first = baseline, second = mismatched),
      dataset_names = c("first", "second")
    ),
    class = "hydroeval_compare_error",
    regexp = "metric_ids"
  )
})

test_that("gof_compare as.data.frame returns wide programmatic view", {
  comparison <- gof_compare(
    datasets = list(
      baseline = list(
        obs = c(1, 2, 3, 4),
        sim = c(1.2, 1.9, 3.1, 3.8)
      ),
      calibrated = list(
        obs = c(1, 2, 3, 4),
        sim = c(1.1, 2.0, 2.9, 4.1)
      )
    ),
    metrics = c("mae", "nse")
  )

  data_view <- as.data.frame(comparison)

  expect_identical(names(data_view), c("metric", "display_label", "baseline", "calibrated"))
  expect_identical(data_view$metric, c("mae", "nse"))
  expect_identical(data_view$display_label, c("MAE", "NSE"))
  expect_type(data_view$baseline, "double")
  expect_type(data_view$calibrated, "double")
  expect_equal(data_view$baseline, unname(comparison[, "baseline"]), tolerance = 1e-12)
  expect_equal(data_view$calibrated, unname(comparison[, "calibrated"]), tolerance = 1e-12)
})

test_that("gof_compare print renders dataset headers and row labels without mutation", {
  comparison <- gof_compare(
    datasets = list(
      baseline = list(
        obs = c(1, 2, 3, 4),
        sim = c(2, 2, 4, 4)
      ),
      calibrated = list(
        obs = c(1, 2, 3, 4),
        sim = c(1, 2, 3, 4)
      )
    ),
    metrics = c("pbias", "nrmse", "kge_2009")
  )
  values_before <- unclass(comparison)
  printed <- paste(capture.output(print(comparison)), collapse = "\n")

  expect_match(printed, "baseline")
  expect_match(printed, "calibrated")
  expect_match(printed, "PBIAS \\(%\\)")
  expect_match(printed, "NRMSE")
  expect_match(printed, "KGE \\(2009\\)")
  expect_no_match(printed, "attr\\(")
  expect_equal(unclass(comparison), values_before, tolerance = 1e-12)
})

test_that("gof_compare perfect-fit printing avoids unnecessary decimal padding", {
  comparison <- gof_compare(
    datasets = list(
      baseline = list(
        obs = c(1, 2, 3, 4),
        sim = c(1, 2, 3, 4)
      ),
      calibrated = list(
        obs = c(1, 2, 3, 4),
        sim = c(1, 2, 3, 4)
      )
    ),
    metrics = c("me", "mae", "nse", "kge_2009")
  )
  printed <- paste(capture.output(print(comparison)), collapse = "\n")

  expect_match(printed, "\\b0\\b")
  expect_match(printed, "\\b1\\b")
  expect_no_match(printed, "0\\.0000000")
  expect_no_match(printed, "1\\.0000000")
  expect_no_match(printed, "0\\.00\\b")
  expect_no_match(printed, "1\\.00\\b")
})

test_that("gof_compare non-integer printing stays compact without trailing zeros", {
  comparison <- gof_compare(
    datasets = list(
      baseline = list(
        obs = c(1, 2, 3, 4),
        sim = c(1.2, 1.9, 3.1, 3.8)
      ),
      calibrated = list(
        obs = c(1, 2, 3, 4),
        sim = c(1.0, 2.1, 2.9, 4.2)
      )
    ),
    metrics = c("mae", "nse", "kge_2009")
  )
  values_before <- unclass(comparison)
  matrix_before <- as.matrix(comparison)
  data_before <- as.data.frame(comparison)
  printed <- paste(capture.output(print(comparison)), collapse = "\n")

  expect_match(printed, "\\b0\\.15\\b")
  expect_match(printed, "\\b0\\.98\\b")
  expect_match(printed, "\\b0\\.1\\b")
  expect_match(printed, "\\b0\\.988\\b")
  expect_no_match(printed, "0\\.1500000")
  expect_no_match(printed, "0\\.9800000")
  expect_no_match(printed, "0\\.1000000")
  expect_identical(unclass(comparison), values_before)
  expect_equal(as.matrix(comparison), matrix_before, tolerance = 1e-12)
  expect_equal(as.data.frame(comparison), data_before, tolerance = 1e-12)
})

test_that("gof_compare rejects non-NULL row.names for data-frame coercion", {
  comparison <- gof_compare(
    datasets = list(
      baseline = list(
        obs = c(1, 2, 3, 4),
        sim = c(1.2, 1.9, 3.1, 3.8)
      ),
      calibrated = list(
        obs = c(1, 2, 3, 4),
        sim = c(1.1, 2.0, 2.9, 4.1)
      )
    ),
    metrics = c("mae", "nse")
  )

  expect_error(
    as.data.frame(comparison, row.names = c("a", "b")),
    regexp = "row.names",
    fixed = FALSE
  )
})