# Integration Tests for Zenodo Downloads
#
# These tests actually download data from Zenodo to verify:
# 1. URLs are accessible
# 2. MD5 checksums match
# 3. Data loads correctly
#
# IMPORTANT: These tests are SKIPPED by default because they:
# - Require network access
# - Download large files (>100 MB total)
# - Take several minutes to complete
#
# To run these tests manually before a release:
#   testthat::test_file("tests/testthat/test-zenodo-integration.R")
# Or with an environment variable:
#   EMBURDEN_RUN_INTEGRATION_TESTS=1 R CMD check

test_that("Zenodo integration tests are skipped unless explicitly enabled", {
  skip_on_cran()
  skip_on_ci()

  # Skip unless explicitly requested
  run_integration <- Sys.getenv("EMBURDEN_RUN_INTEGRATION_TESTS", "0")
  skip_if(run_integration != "1", "Integration tests disabled. Set EMBURDEN_RUN_INTEGRATION_TESTS=1 to enable.")

  # If we got here, integration tests are enabled
  cat("\n\n")
  cat("==========================================\n")
  cat("  ZENODO INTEGRATION TESTS\n")
  cat("  (Full downloads + validation)\n")
  cat("==========================================\n\n")
})


test_that("Zenodo AMI 2022 download works with correct checksum", {
  skip_on_cran()
  skip_on_ci()
  run_integration <- Sys.getenv("EMBURDEN_RUN_INTEGRATION_TESTS", "0")
  skip_if(run_integration != "1", "Integration tests disabled")

  cat("Downloading AMI 2022 from Zenodo...\n")

  # Clear cache to force fresh download
  clear_dataset_cache("ami", "2022", verbose = FALSE)

  # Download from Zenodo (verbose=TRUE to show progress)
  data <- download_from_zenodo("ami", "2022", verbose = TRUE)

  # Should have successfully downloaded
  expect_false(is.null(data))
  expect_s3_class(data, "data.frame")

  # Check data structure
  expect_true("geoid" %in% names(data))
  expect_true("income_bracket" %in% names(data))
  expect_true("households" %in% names(data))

  # Should have substantial data
  expect_gt(nrow(data), 100000)

  cat("  SUCCESS: AMI 2022 downloaded and validated\n\n")
})


test_that("Zenodo FPL 2022 download works with correct checksum", {
  skip_on_cran()
  skip_on_ci()
  run_integration <- Sys.getenv("EMBURDEN_RUN_INTEGRATION_TESTS", "0")
  skip_if(run_integration != "1", "Integration tests disabled")

  cat("Downloading FPL 2022 from Zenodo...\n")

  clear_dataset_cache("fpl", "2022", verbose = FALSE)
  data <- download_from_zenodo("fpl", "2022", verbose = TRUE)

  expect_false(is.null(data))
  expect_s3_class(data, "data.frame")
  expect_gt(nrow(data), 100000)

  cat("  SUCCESS: FPL 2022 downloaded and validated\n\n")
})


test_that("Zenodo AMI 2018 download works with correct checksum", {
  skip_on_cran()
  skip_on_ci()
  run_integration <- Sys.getenv("EMBURDEN_RUN_INTEGRATION_TESTS", "0")
  skip_if(run_integration != "1", "Integration tests disabled")

  cat("Downloading AMI 2018 from Zenodo...\n")

  clear_dataset_cache("ami", "2018", verbose = FALSE)
  data <- download_from_zenodo("ami", "2018", verbose = TRUE)

  expect_false(is.null(data))
  expect_s3_class(data, "data.frame")
  expect_gt(nrow(data), 100000)

  cat("  SUCCESS: AMI 2018 downloaded and validated\n\n")
})


test_that("Zenodo FPL 2018 download works with correct checksum", {
  skip_on_cran()
  skip_on_ci()
  run_integration <- Sys.getenv("EMBURDEN_RUN_INTEGRATION_TESTS", "0")
  skip_if(run_integration != "1", "Integration tests disabled")

  cat("Downloading FPL 2018 from Zenodo...\n")

  clear_dataset_cache("fpl", "2018", verbose = FALSE)
  data <- download_from_zenodo("fpl", "2018", verbose = TRUE)

  expect_false(is.null(data))
  expect_s3_class(data, "data.frame")
  expect_gt(nrow(data), 100000)

  cat("  SUCCESS: FPL 2018 downloaded and validated\n\n")
})


test_that("All Zenodo datasets have different data (no duplicates)", {
  skip_on_cran()
  skip_on_ci()
  run_integration <- Sys.getenv("EMBURDEN_RUN_INTEGRATION_TESTS", "0")
  skip_if(run_integration != "1", "Integration tests disabled")

  cat("Verifying all datasets are distinct...\n")

  # Load all 4 datasets
  ami_2022 <- download_from_zenodo("ami", "2022", verbose = FALSE)
  fpl_2022 <- download_from_zenodo("fpl", "2022", verbose = FALSE)
  ami_2018 <- download_from_zenodo("ami", "2018", verbose = FALSE)
  fpl_2018 <- download_from_zenodo("fpl", "2018", verbose = FALSE)

  # Check row counts are different (would be identical if cached same data)
  expect_false(nrow(ami_2022) == nrow(ami_2018))
  expect_false(nrow(fpl_2022) == nrow(fpl_2018))

  # Check income bracket distributions differ between AMI and FPL
  ami_2022_brackets <- unique(ami_2022$income_bracket)
  fpl_2022_brackets <- unique(fpl_2022$income_bracket)
  expect_false(identical(sort(ami_2022_brackets), sort(fpl_2022_brackets)))

  cat("  SUCCESS: All datasets are distinct\n\n")
})


test_that("Downloaded data matches state-manifest.json metadata", {
  skip_on_cran()
  skip_on_ci()
  run_integration <- Sys.getenv("EMBURDEN_RUN_INTEGRATION_TESTS", "0")
  skip_if(run_integration != "1", "Integration tests disabled")

  cat("Cross-checking with state-manifest.json...\n")

  # Read manifest
  manifest_path <- system.file("../../zenodo-upload-nationwide/state-manifest.json", package = "emburden")
  if (!file.exists(manifest_path)) {
    skip("state-manifest.json not available in package")
  }

  manifest <- jsonlite::read_json(manifest_path)

  # Check AMI 2022 row count matches
  ami_2022 <- download_from_zenodo("ami", "2022", verbose = FALSE)
  expect_equal(nrow(ami_2022), manifest$nationwide$ami_2022$rows,
               tolerance = 100)  # Allow small variance

  cat("  SUCCESS: Data matches manifest metadata\n\n")
})


# Cleanup after integration tests
test_that("Cleanup after integration tests", {
  skip_on_cran()
  skip_on_ci()
  run_integration <- Sys.getenv("EMBURDEN_RUN_INTEGRATION_TESTS", "0")
  skip_if(run_integration != "1", "Integration tests disabled")

  cat("\n")
  cat("==========================================\n")
  cat("  INTEGRATION TESTS COMPLETE\n")
  cat("==========================================\n\n")
  cat("All Zenodo downloads validated successfully!\n")
  cat("Safe to proceed with release.\n\n")
})