# integration test for malaria_do_analysis function

if (!"package:climatehealth" %in% search()) {
  pkgload::load_all(".", export_all = TRUE, helpers = FALSE, quiet = TRUE)
}

if (!exists("suppress_plot")) {
  source("tests/testthat/helper-utils.R", local = FALSE)
}

# Create temp_dir to be used by all MH tests
temp_dir <- tempdir()
temp_dir <- file.path(temp_dir, "malaria_tests")
if (!file.exists(temp_dir)) dir.create(temp_dir)

# helpers to produce synthetic data
make_synthetic_map_m <- function() {
  regions   <- c("RegionA", "RegionA", "RegionB")
  districts <- c("D1",      "D2",      "D3")

  # Three touching squares
  polys <- list(
    list(matrix(c(0,0, 1,0, 1,1, 0,1, 0,0), ncol = 2, byrow = TRUE)),
    list(matrix(c(1,0, 2,0, 2,1, 1,1, 1,0), ncol = 2, byrow = TRUE)),
    list(matrix(c(2,0, 3,0, 3,1, 2,1, 2,0), ncol = 2, byrow = TRUE))
  )

  sf::st_sf(
    region   = regions,
    district = districts,
    geometry = sf::st_sfc(lapply(polys, sf::st_polygon), crs = 4326)
  )
}

make_health_fixture_m <- function() {
  rd <- tibble::tibble(
    region   = c("RegionA", "RegionA", "RegionB"),
    district = c("D1", "D2", "D3")
  )

  rd |>
    tidyr::crossing(
      year = 2020L,
      month = 1:12
    ) |>
    dplyr::arrange(region, district, month) |>
    dplyr::mutate(
      malaria = round(50 + 10*sin(month/12*2*pi) + as.numeric(factor(district))*3),
      tot_pop = 1000L
    )
}


make_climate_fixture_m <- function() {
  tibble::tibble(district = c("D1","D2","D3")) |>
    tidyr::crossing(year = 2020, month = 1:12) |>
    dplyr::mutate(
      # stable but non-collinear seasonal temperature curves
      tmin       = 20 + sin((month+0)*pi/6)  + as.numeric(factor(district))*0.1,
      tmean      = 22 + sin((month+1)*pi/6)  + as.numeric(factor(district))*0.2,
      tmax       = 25 + sin((month+2)*pi/6)  + as.numeric(factor(district))*0.3,

      rainfall   = 50 + 40*sin((month-2)*pi/6) + as.numeric(factor(district)),
      r_humidity = 60 + 5*cos(month*pi/6)      + as.numeric(factor(district))*0.5,
      runoff     = 5 + 2*sin((month+3)*pi/6)   + as.numeric(factor(district))*0.2
    )
}

mock_min_data <- tibble(
  region="A",
  district="D1",
  year=2020,
  month=1,
  time=1,
  district_code=1,
  region_code=1,
  malaria=5,
  tot_pop=100,
  tmin=20,
  tmean=21,
  tmax=22,
  rainfall=10,
  r_humidity=50,
  runoff=5,
  E=5
)

# tests/testthat/test_malaria.R
test_that("malaria_do_analysis runs end-to-end on synthetic data", {
  skip_if_not_installed("sf")
  skip_if_not_installed("INLA")  # optionally guard if your CI lacks INLA

  health  <- make_health_fixture_m()
  climate <- make_climate_fixture_m()

  # Write a tiny shapefile into a unique temp path
  map_stub <- tempfile("synthetic_map_malaria_")
  map_path <- paste0(map_stub, ".shp")
  on.exit(unlink(Sys.glob(paste0(map_stub, ".*"))), add = TRUE)
  map_m <- make_synthetic_map_m() |> sf::st_transform(3857)
  sf::st_write(map_m, map_path, quiet = TRUE, delete_dsn = TRUE)

  res <- suppress_plot(suppressWarnings(
    malaria_do_analysis(
      health_data_path  = health,        # pass data.frame directly
      climate_data_path = climate,       # pass data.frame directly
      map_path          = map_path,
      region_col        = "region",
      district_col      = "district",
      date_col          = NULL,
      year_col          = "year",
      month_col         = "month",
      case_col          = "malaria",
      tot_pop_col       = "tot_pop",
      tmin_col          = "tmin",
      tmean_col         = "tmean",
      tmax_col          = "tmax",
      rainfall_col      = "rainfall",
      r_humidity_col    = "r_humidity",
      runoff_col        = "runoff",
      geometry_col      = "geometry",
      spi_col           = NULL,
      ndvi_col          = NULL,
      max_lag           = 2,
      nk                = 1,
      basis_matrices_choices = "tmax",
      inla_param        = c("tmax"),
      param_term        = "tmax",
      level             = "district",
      param_threshold   = 1,
      filter_year       = NULL,
      family            = "nbinomial",
      group_by_year     = FALSE,
      cumulative        = FALSE,
      config            = FALSE,
      save_csv          = FALSE,
      save_model        = FALSE,
      save_fig          = FALSE,
      output_dir        = NULL
    )
  ))

  extract_rr <- function(rr_entry) {
    tibble::tibble(
      predvar  = rr_entry$predvar,
      RR       = rr_entry$allRRfit,
      RR_low   = rr_entry$allRRlow,
      RR_high  = rr_entry$allRRhigh
    )
  }

  rr_list <- res$rr_df[[1]]

  # Validate high‑level structure of rr_df
  expect_true(is.list(res$rr_df))
  expect_equal(length(res$rr_df), 1)
  expect_true("All Years" %in% names(res$rr_df))

  # Validate that rr_list is a list of crosspred objects keyed by district
  expect_true(is.list(rr_list))
  expect_true(all(names(rr_list) %in% c("D1","D2","D3")))
  expect_true(all(purrr::map_lgl(rr_list, ~ inherits(.x, "crosspred"))))

  # Build RR table for deeper testing
  rr_tables <- purrr::imap_dfr(
    rr_list,
    ~ extract_rr(.x) |> dplyr::mutate(district = .y),
    .id = "district_id"
  )

  # RR table should have expected columns
  expect_true(all(c("predvar","RR","RR_low","RR_high","district") %in% names(rr_tables)))

  # Predvar is numeric
  expect_type(rr_tables$predvar, "double")

  # RR values must be numeric and finite
  expect_type(rr_tables$RR, "double")
  expect_true(all(is.finite(rr_tables$RR)))

  # Confidence interval sanity
  expect_true(all(rr_tables$RR_low <= rr_tables$RR))
  expect_true(all(rr_tables$RR <= rr_tables$RR_high))

  # District identifiers present and correct
  expect_true(all(rr_tables$district %in% c("D1","D2","D3")))

  # RR table should have multiple rows
  expect_gt(nrow(rr_tables), 10)

  # Validate INLA output structure
  expect_true(is.list(res$inla_result))
  expect_true("model" %in% names(res$inla_result))
  expect_true(inherits(res$inla_result$model, "inla"))

  # Model should include fixed effects summary
  expect_true("summary.fixed" %in% names(res$inla_result$model))
  expect_true(is.data.frame(res$inla_result$model$summary.fixed))

  # Random effects should exist
  expect_true(any(grepl("random", names(res$inla_result$model))),
              info = "INLA model should contain some random effect components")

  # Spatial & seasonal components
  # These may be NULL if level != "country", but object names should still exist
  expect_true(all(c("reff_plot_monthly","reff_plot_yearly") %in% names(res)))

  # RR map objects exist
  expect_true("rr_map_plot" %in% names(res))

  # Attribution output
  attr_df <- res$attr_frac_num
  expect_true(is.data.frame(attr_df))

  # Must contain core ID variables
  expect_true(all(c("region","district","year","month") %in% names(attr_df)))

  # Attribution numeric columns (does not assert specific names)
  numeric_cols <- purrr::keep(attr_df, is.numeric)
  expect_true(length(numeric_cols) >= 1)

  # Ensure at least one numeric column has at least one finite value
  expect_true(any(is.finite(unlist(numeric_cols))))

  # Ensure at least one numeric column has non-NA values
  expect_true(any(purrr::map_lgl(numeric_cols, ~ any(!is.na(.x)))))

  # Ensure attribution table is not empty
  expect_gt(nrow(attr_df), 5)

  # Combined-data
  # ensure combined_data workflow didn't silently fail
  expect_true("attr_frac_num" %in% names(res))
  expect_gt(nrow(res$attr_frac_num), 5)

  # A simple check that the model produced more than trivial output
  expect_true(nrow(rr_tables) > 10)
})

test_that("malaria_do_analysis errors when save_fig=TRUE and output_dir=NULL", {
  skip_if_not_installed("INLA")

  expect_error(
    malaria_do_analysis(
      health_data_path = tibble::tibble(),
      climate_data_path = tibble::tibble(),
      map_path = system.file("shape/nc.shp", package = "sf"),
      region_col = "region",
      district_col = "district",
      year_col = "year",
      month_col = "month",
      case_col = "malaria",
      tot_pop_col = "tot_pop",
      tmin_col = "tmin",
      tmean_col = "tmean",
      tmax_col = "tmax",
      rainfall_col = "rainfall",
      r_humidity_col = "r_humidity",
      runoff_col = "runoff",
      geometry_col = "geometry",
      basis_matrices_choices = "tmax",
      inla_param = "tmax",
      param_term = "tmax",
      save_fig = TRUE,
      output_dir = NULL
    ),
    regexp = "output_dir"
  )
})