# tests for pns functions
# tests for PNS (Pesquisa Nacional de Saude) module

# ============================================================================
# basic info functions
# ============================================================================

test_that("pns_years returns expected years", {
  years <- pns_years()

  expect_type(years, "character")
  expect_equal(length(years), 2L)
  expect_true("2013" %in% years)
  expect_true("2019" %in% years)
})

test_that("pns_info returns expected structure", {
  info <- pns_info()

  expect_type(info, "list")
  expect_true("name" %in% names(info))
  expect_true("available_years" %in% names(info))
  expect_true("sidra_tables" %in% names(info))
  expect_true("url" %in% names(info))
  expect_equal(info$available_years, pns_years())
  expect_equal(info$sidra_tables, 2222)
})

test_that("pns_info accepts year parameter", {
  info <- pns_info(2019)

  expect_true("year_details" %in% names(info))
  expect_true("sample_size" %in% names(info$year_details))
  expect_true("reference_period" %in% names(info$year_details))
})

# ============================================================================
# modules functions
# ============================================================================

test_that("pns_modules returns tibble with expected columns", {
  modules <- pns_modules()

  expect_s3_class(modules, "tbl_df")
  expect_true(all(c("module", "name", "name_en") %in% names(modules)))
  expect_true(nrow(modules) > 0)
})

test_that("pns_modules filters by year correctly", {
  modules_2013 <- pns_modules(year = 2013)
  modules_2019 <- pns_modules(year = 2019)
  modules_all <- pns_modules()

  expect_true(nrow(modules_2013) > 0)
  expect_true(nrow(modules_2019) > 0)
  # 2019 has Y and Z modules not in 2013
  expect_true("Y" %in% modules_2019$module)
  expect_true("Z" %in% modules_2019$module)
})

test_that("pns_modules validates year parameter", {
  expect_error(pns_modules(year = 1999), "Invalid year")
})

# ============================================================================
# dictionary and variables functions
# ============================================================================

test_that("pns_dictionary validates year parameter", {
  expect_error(
    pns_dictionary(year = 1999, cache_dir = tempdir()),
    "Invalid year"
  )
})

test_that("pns_dictionary downloads and returns tibble", {
  skip_on_cran()
  skip_if_no_integration()

  dict <- pns_dictionary(year = 2019, cache_dir = tempdir())

  expect_s3_class(dict, "tbl_df")
  expect_true(nrow(dict) > 0)
  expect_true("year" %in% names(dict))
})

test_that("pns_dictionary uses cache on second call", {
  skip_on_cran()
  skip_if_no_integration()

  test_cache <- file.path(tempdir(), "test_pns_dict_cache")
  unlink(test_cache, recursive = TRUE)
  on.exit(unlink(test_cache, recursive = TRUE), add = TRUE)

  # first call downloads
  dict1 <- pns_dictionary(year = 2019, cache_dir = test_cache)

  # verify cache exists
  cache_files <- list.files(test_cache, pattern = "pns_dictionary_2019")
  expect_true(length(cache_files) > 0)

  # second call should use cache
  dict2 <- pns_dictionary(year = 2019, cache_dir = test_cache)

  expect_equal(nrow(dict1), nrow(dict2))
})

test_that("pns_variables returns tibble", {
  skip_on_cran()
  skip_if_no_integration()

  vars <- pns_variables(year = 2019, cache_dir = tempdir())

  expect_s3_class(vars, "tbl_df")
  expect_true(nrow(vars) > 0)
  expect_true("year" %in% names(vars))
})

# ============================================================================
# validation functions
# ============================================================================

test_that("validate_pns_year validates correctly", {
  # valid years
  expect_equal(validate_pns_year(2013), 2013L)
  expect_equal(validate_pns_year(2019), 2019L)
  expect_equal(validate_pns_year(c(2013, 2019)), c(2013L, 2019L))

  # NULL returns all
  expect_equal(validate_pns_year(NULL), c(2013L, 2019L))

  # invalid years
  expect_error(validate_pns_year(2015), "Invalid year")
  expect_error(validate_pns_year(2020), "Invalid year")
  expect_error(validate_pns_year(c(2013, 2020)), "Invalid year")
})

test_that("pns_data validates year parameter", {
  expect_error(
    pns_data(year = 1999, cache_dir = tempdir()),
    "Invalid year"
  )
  expect_error(
    pns_data(year = 2020, cache_dir = tempdir()),
    "Invalid year"
  )
})

# ============================================================================
# cache functions
# ============================================================================

test_that("pns_cache_dir creates directory", {
  dir <- pns_cache_dir()

  expect_type(dir, "character")
  expect_true(dir.exists(dir))
})

test_that("pns_cache_dir respects custom cache_dir", {
  custom_dir <- file.path(tempdir(), "custom_pns_cache")
  on.exit(unlink(custom_dir, recursive = TRUE), add = TRUE)

  dir <- pns_cache_dir(custom_dir)

  expect_true(dir.exists(dir))
  expect_match(dir, "pns")
})

test_that("pns_cache_status returns tibble", {
  status <- pns_cache_status()

  expect_s3_class(status, "tbl_df")
  expect_true(all(c("file", "size_mb", "modified") %in% names(status)))
})

test_that("pns_clear_cache handles empty cache", {
  temp_cache <- file.path(tempdir(), "empty_pns_test")
  on.exit(unlink(temp_cache, recursive = TRUE), add = TRUE)

  expect_no_error(pns_clear_cache(cache_dir = temp_cache))
})

# ============================================================================
# SIDRA functions - catalog
# ============================================================================

test_that("pns_sidra_tables returns tibble with expected columns", {
  result <- pns_sidra_tables()

  expect_s3_class(result, "tbl_df")
  expect_true(all(
    c("table_code", "table_name", "theme", "theme_label") %in% names(result)
  ))
  expect_true(nrow(result) > 0)
})

test_that("pns_sidra_tables filters by theme", {
  result <- pns_sidra_tables(theme = "chronic_diseases")

  expect_true(all(result$theme == "chronic_diseases"))
  expect_true(nrow(result) > 0)
})

test_that("pns_sidra_tables handles invalid theme", {
  expect_error(pns_sidra_tables(theme = "nonexistent"), "Invalid theme")
})

test_that("pns_sidra_tables filters by year", {
  result_2019 <- pns_sidra_tables(year = 2019)
  result_2013 <- pns_sidra_tables(year = 2013)

  expect_true(nrow(result_2019) > 0)
  expect_true(nrow(result_2013) > 0)
})

test_that("pns_sidra_tables returns multiple themes", {
  result <- pns_sidra_tables()
  expect_true(length(unique(result$theme)) > 1)
})

# ============================================================================
# SIDRA functions - search
# ============================================================================

test_that("pns_sidra_search finds tables by keyword", {
  result <- pns_sidra_search("diabetes")

  expect_s3_class(result, "tbl_df")
  expect_true(nrow(result) > 0)
})

test_that("pns_sidra_search is case-insensitive", {
  result_lower <- pns_sidra_search("diabetes")
  result_upper <- pns_sidra_search("DIABETES")

  expect_equal(nrow(result_lower), nrow(result_upper))
})

test_that("pns_sidra_search returns empty tibble for no match", {
  result <- pns_sidra_search("xyznonexistent")

  expect_s3_class(result, "tbl_df")
  expect_equal(nrow(result), 0)
})

test_that("pns_sidra_search requires minimum keyword length", {
  expect_error(pns_sidra_search("a"), "at least 2 characters")
})

test_that("pns_sidra_search handles accent-insensitive search", {
  result1 <- pns_sidra_search("hipertensao")
  result2 <- pns_sidra_search("hipertens")

  expect_true(nrow(result1) > 0 || nrow(result2) > 0)
})

# ============================================================================
# SIDRA functions - data retrieval
# ============================================================================

test_that("pns_sidra_data validates territorial_level", {
  expect_error(
    pns_sidra_data(table = 4751, territorial_level = "invalid"),
    "Invalid territorial_level"
  )
})

test_that("pns_sidra_data returns tibble from API", {
  skip_on_cran()
  skip_if_no_integration()

  result <- pns_sidra_data(
    table = 4751,
    territorial_level = "brazil",
    year = 2019
  )

  expect_s3_class(result, "tbl_df")
  expect_true(nrow(result) > 0)
})

test_that("pns_sidra_data returns data by state", {
  skip_on_cran()
  skip_if_no_integration()

  result <- pns_sidra_data(
    table = 4751,
    territorial_level = "state",
    year = 2019
  )

  expect_s3_class(result, "tbl_df")
  expect_true(nrow(result) > 0)
  # should have multiple rows (one per state or more)
  expect_true(nrow(result) > 1)
})

test_that("pns_sidra_data raw parameter works", {
  skip_on_cran()
  skip_if_no_integration()

  result_raw <- pns_sidra_data(
    table = 4751,
    territorial_level = "brazil",
    year = 2019,
    raw = TRUE
  )
  result_clean <- pns_sidra_data(
    table = 4751,
    territorial_level = "brazil",
    year = 2019,
    raw = FALSE
  )

  # raw has header row, so >= clean
  expect_true(nrow(result_raw) >= nrow(result_clean))
})

test_that("pns_sidra_data handles multiple years", {
  skip_on_cran()
  skip_if_no_integration()

  result <- pns_sidra_data(
    table = 4751,
    territorial_level = "brazil",
    year = c(2013, 2019)
  )

  expect_s3_class(result, "tbl_df")
  expect_true(nrow(result) > 0)
})

test_that("pns_sidra_data warns for unknown table", {
  skip_on_cran()
  skip_if_no_integration()

  # table 99999 should not exist in catalog - expect warning about catalog
  # and then an error from the API (invalid table)
  expect_warning(
    tryCatch(
      pns_sidra_data(table = 99999, territorial_level = "brazil", year = 2019),
      error = function(e) NULL
    ),
    "not found in internal catalog"
  )
})

# ============================================================================
# microdata download - integration tests
# ============================================================================

test_that("pns_data downloads and returns tibble", {
  skip_on_cran()
  skip_if_no_integration()

  test_cache <- file.path(tempdir(), "test_pns_download")
  unlink(test_cache, recursive = TRUE)
  on.exit(unlink(test_cache, recursive = TRUE), add = TRUE)

  # test with 2019 (smaller file)
  df <- pns_data(year = 2019, cache_dir = test_cache)

  expect_s3_class(df, "tbl_df")
  expect_true(nrow(df) > 0)
  expect_true(ncol(df) > 0)
  expect_true("year" %in% names(df))
})

test_that("pns_data handles variable selection", {
  skip_on_cran()
  skip_if_no_integration()

  test_cache <- file.path(tempdir(), "test_pns_vars")
  unlink(test_cache, recursive = TRUE)
  on.exit(unlink(test_cache, recursive = TRUE), add = TRUE)

  # get full data first
  df_full <- pns_data(year = 2019, cache_dir = test_cache)

  # select specific variables (make sure to pick fewer than total)
  all_vars <- setdiff(names(df_full), "year")
  vars_to_select <- head(all_vars, min(5, length(all_vars) - 1))

  df <- pns_data(
    year = 2019,
    vars = vars_to_select,
    cache_dir = test_cache
  )

  expect_true("year" %in% names(df))
  expect_true(all(toupper(vars_to_select) %in% names(df)))
  # should have year + selected vars = 6 columns (or fewer if data has < 6 vars)
  expect_equal(ncol(df), length(vars_to_select) + 1)  # +1 for year column
})

test_that("pns_data uses cache on second call", {
  skip_on_cran()
  skip_if_no_integration()

  test_cache <- file.path(tempdir(), "test_pns_cache_reuse")
  unlink(test_cache, recursive = TRUE)
  on.exit(unlink(test_cache, recursive = TRUE), add = TRUE)

  # first call downloads
  df1 <- pns_data(year = 2019, cache_dir = test_cache)

  # verify partitioned cache exists
  cache_files <- list.files(file.path(test_cache, "pns_data"),
                            recursive = TRUE, pattern = "\\.parquet$")
  expect_true(length(cache_files) > 0)

  # second call should use cache
  df2 <- pns_data(year = 2019, cache_dir = test_cache)

  expect_equal(nrow(df1), nrow(df2))
  expect_equal(ncol(df1), ncol(df2))
})