## SETUP AND TEST FIXTURES -----------------------------------------------------

# Basic invalid object types
null <- NULL
na <- NA
boolean <- TRUE
number_random <- sample(1:1000, 1)
string_random <- paste0(sample(letters, 5), collapse = "")
vector_strings <- c("foo", "bar")
list_strings <- list("foo", "bar")
df <- mtcars
matrix <- as.matrix(mtcars)

# Invalid JSON file (not a valid Document AI response)
fill <- list("a" = 1, "b" = 2)
json <- jsonlite::toJSON(fill)
madeup_json_file <- tempfile(fileext = ".json")
write(json, madeup_json_file)

# Real JSON paths (to be used in tests)
sample_json <- testthat::test_path("examples", "output.json")
sample_json_tables <- testthat::test_path("examples", "table_form_parsed.json")
sample_json_blank <- testthat::test_path("examples", "output_blank.json")

## HELPER FUNCTIONS FOR TESTING ------------------------------------------------

# Test invalid inputs for sync type
test_sync_invalid_inputs <- function(extract_function, function_name) {
  test_that(paste0(function_name, " errors with invalid object for sync type"), {
    expect_error(extract_function(null), "Invalid object: not a valid HTTP response")
    expect_error(extract_function(na), "Invalid object: not a valid HTTP response")
    expect_error(extract_function(boolean), "Invalid object: not a valid HTTP response")
    expect_error(extract_function(number_random), "Invalid object: not a valid HTTP response")
    expect_error(extract_function(string_random), "Invalid object: not a valid HTTP response")
    expect_error(extract_function(vector_strings), "Invalid object: not a valid HTTP response")
    expect_error(extract_function(list_strings), "Invalid object: not a valid HTTP response")
    expect_error(extract_function(df), "Invalid object: not a valid HTTP response")
    expect_error(extract_function(matrix), "Invalid object: not a valid HTTP response")
  })
}

# Test invalid inputs for async type - file path errors
test_async_filepath_errors <- function(extract_function, function_name) {
  test_that(paste0(function_name, " errors with invalid filepaths for async type"), {
    expect_error(extract_function(string_random, type = "async"), "Invalid object: file is not a \\.json file or does not exist")
    expect_error(extract_function("wrong.txt", type = "async"), "Invalid object: file is not a \\.json file or does not exist")
    expect_error(extract_function("fake.json", type = "async"), "Invalid object: file is not a \\.json file or does not exist")
  })
}

# Test invalid inputs for async type - object type errors
test_async_object_type_errors <- function(extract_function, function_name) {
  test_that(paste0(function_name, " errors with invalid object types for async type"), {
    expect_error(extract_function(number_random, type = "async"), "Invalid object: must be a single character string filepath")
    expect_error(extract_function(vector_strings, type = "async"), "Invalid object: must be a single character string filepath")
    expect_error(extract_function(null, type = "async"), "Invalid object: must be a single character string filepath")
    expect_error(extract_function(boolean, type = "async"), "Invalid object: must be a single character string filepath")
  })
}

# Test type parameter validation
test_type_parameter <- function(extract_function, function_name) {
  test_that(paste0(function_name, " validates type parameter"), {
    expect_error(extract_function(sample_json, type = "invalid"), "Invalid type parameter.")
    expect_error(extract_function(sample_json, type = c("sync", "async")), "Invalid type parameter.")
    expect_error(extract_function(sample_json, type = NULL), "Invalid type parameter.")
    expect_error(extract_function(sample_json, type = 123), "Invalid type parameter.")
  })
}

# Test handling of invalid JSON format
test_invalid_json_format <- function(extract_function, function_name) {
  test_that(paste0(function_name, " handles JSON not from DAI"), {
    expect_error(extract_function(madeup_json_file, type = "async"), "JSON not in right format. Is it from DAI?")
  })
}

## GET_TEXT --------------------------------------------------------------------

test_sync_invalid_inputs(get_text, "get_text()")
test_async_filepath_errors(get_text, "get_text()")
test_async_object_type_errors(get_text, "get_text()")
test_type_parameter(get_text, "get_text()")
test_invalid_json_format(get_text, "get_text()")

test_that("get_text() validates save_to_file parameter", {
  # String value
  expect_error(
    get_text(sample_json, type = "async", save_to_file = "yes"),
    "Invalid save_to_file argument. Must be either TRUE or FALSE."
  )

  # Numeric value (now properly rejected)
  expect_error(
    get_text(sample_json, type = "async", save_to_file = 1),
    "Invalid save_to_file argument. Must be either TRUE or FALSE."
  )

  # Vector (now properly rejected with clear message)
  expect_error(
    get_text(sample_json, type = "async", save_to_file = c(TRUE, FALSE)),
    "Invalid save_to_file argument. Must be either TRUE or FALSE."
  )

  # NA (now properly rejected)
  expect_error(
    get_text(sample_json, type = "async", save_to_file = NA),
    "Invalid save_to_file argument. Must be either TRUE or FALSE."
  )
})

test_that("get_text() validates dest_dir parameter", {
  expect_error(
    get_text(sample_json, type = "async", save_to_file = TRUE, dest_dir = 123),
    "Invalid dest_dir argument. Must be a valid folder path."
  )
  expect_error(
    get_text(sample_json, type = "async", save_to_file = TRUE, dest_dir = c("path1", "path2")),
    "Invalid dest_dir argument. Must be a valid folder path."
  )
  expect_error(
    get_text(sample_json, type = "async", save_to_file = TRUE, dest_dir = TRUE),
    "Invalid dest_dir argument. Must be a valid folder path."
  )
})

test_that("get_text() validates outfile_stem parameter", {
  expect_error(
    get_text(sample_json, type = "async", save_to_file = TRUE, outfile_stem = c("name1", "name2")),
    "Invalid outfile_stem argument. Must be NULL or a string."
  )
  expect_error(
    get_text(sample_json, type = "async", save_to_file = TRUE, outfile_stem = 123),
    "Invalid outfile_stem argument. Must be NULL or a string."
  )
  expect_error(
    get_text(sample_json, type = "async", save_to_file = TRUE, outfile_stem = TRUE),
    "Invalid outfile_stem argument. Must be NULL or a string."
  )
})

test_that("get_text() gets text from DAI response from example file", {
  skip_on_cran()
  skip_on_ci()
  skip_if_offline()
  file <- testthat::test_path("examples", "image.jpg")
  response <- dai_sync(file)
  text <- get_text(response)
  expect_type(text, "character")
  expect_true(nchar(text) > 0)
})

test_that("get_text() gets text from example json file", {
  text <- get_text(sample_json, type = "async")
  expect_type(text, "character")
  expect_true(nchar(text) > 0)
})

test_that("get_text() saves to file with custom outfile_stem", {
  temp_dir <- tempdir()
  custom_stem <- paste0("test_", format(Sys.time(), "%Y%m%d_%H%M%S"))

  text <- get_text(sample_json,
    type = "async", save_to_file = TRUE,
    dest_dir = temp_dir, outfile_stem = custom_stem
  )

  expected_file <- file.path(temp_dir, paste0(custom_stem, ".txt"))
  expect_true(file.exists(expected_file))
  expect_type(text, "character")

  # Clean up
  unlink(expected_file)
})

test_that("get_text() saves to file with default outfile_stem for async", {
  temp_dir <- tempdir()

  text <- get_text(sample_json, type = "async", save_to_file = TRUE, dest_dir = temp_dir)

  expected_file <- file.path(temp_dir, "output.txt")
  expect_true(file.exists(expected_file))

  # Clean up
  unlink(expected_file)
})

test_that("get_text() saves to file with default outfile_stem for sync", {
  skip_on_cran()
  skip_on_ci()
  skip_if_offline()

  file <- testthat::test_path("examples", "image.jpg")
  response <- dai_sync(file)
  temp_dir <- tempdir()

  text <- get_text(response, save_to_file = TRUE, dest_dir = temp_dir)

  expected_file <- file.path(temp_dir, "output.txt")
  expect_true(file.exists(expected_file))

  # Clean up
  unlink(expected_file)
})

test_that("get_text() handles blank documents with warning", {
  expect_warning(
    text <- get_text(sample_json_blank, type = "async"),
    "DAI found no text. The document may be blank."
  )
  expect_equal(text, "")
})

test_that("get_text() returns text without saving when save_to_file = FALSE", {
  temp_dir <- tempdir()

  text <- get_text(sample_json, type = "async", save_to_file = FALSE, dest_dir = temp_dir)

  # Check that no file was created
  files <- list.files(temp_dir, pattern = "output\\.txt")
  expect_equal(length(files), 0)
  expect_type(text, "character")
})

test_that("get_text() handles dest_dir with trailing slash", {
  temp_dir <- paste0(tempdir(), "/")

  text <- get_text(sample_json,
    type = "async", save_to_file = TRUE,
    dest_dir = temp_dir, outfile_stem = "edge_case"
  )

  # Should still work despite trailing slash
  expect_type(text, "character")

  # Clean up
  unlink(file.path(temp_dir, "edge_case.txt"))
})

test_that("get_text() handles special characters in outfile_stem", {
  temp_dir <- tempdir()
  special_stem <- "test_file_2024"

  text <- get_text(sample_json,
    type = "async", save_to_file = TRUE,
    dest_dir = temp_dir, outfile_stem = special_stem
  )

  expected_file <- file.path(temp_dir, paste0(special_stem, ".txt"))
  expect_true(file.exists(expected_file))

  # Clean up
  unlink(expected_file)
})

## GET_TABLES ------------------------------------------------------------------

test_sync_invalid_inputs(get_tables, "get_tables()")
test_async_filepath_errors(get_tables, "get_tables()")
test_async_object_type_errors(get_tables, "get_tables()")
test_type_parameter(get_tables, "get_tables()")
test_invalid_json_format(get_tables, "get_tables()")

test_that("get_tables() warns of response not containing text", {
  skip_on_cran()
  skip_on_ci()
  skip_if_offline()
  wrong <- dai_async("random.pdf")
  expect_error(get_tables(wrong), "The supplied object is not from a successful HTTP request.")
  blank <- dai_sync(testthat::test_path("examples", "blank.tiff"))
  expect_error(get_tables(blank), "DAI found no text. Was the page blank?")
})

test_that("get_tables() warns of file not containing text or proper format", {
  blank <- testthat::test_path("examples", "output_blank.json")
  expect_error(get_tables(blank, type = "async"), "DAI found no text. Was the document blank?")
})

test_that("get_tables() gets tables from example json file", {
  tbls <- get_tables(sample_json_tables, type = "async")
  expect_type(tbls, "list")
  expect_true(length(tbls) > 0)
  expect_true(is.data.frame(tbls[[1]]))
  expect_true(ncol(tbls[[1]]) > 0)
  expect_true(nrow(tbls[[1]]) >= 0)
})

test_that("get_tables() returns all tables from multi-table document", {
  tbls <- get_tables(sample_json_tables, type = "async")
  # Check that each element in the list is a data frame
  expect_true(all(sapply(tbls, is.data.frame)))
})

test_that("get_tables() handles empty table cells", {
  # This tests the helper function's ability to handle NULL or empty cells
  tbls <- get_tables(sample_json_tables, type = "async")

  # Tables should still be valid data frames even with empty cells
  expect_true(all(sapply(tbls, is.data.frame)))
})

## GET_ENTITIES ----------------------------------------------------------------

test_sync_invalid_inputs(get_entities, "get_entities()")
test_async_filepath_errors(get_entities, "get_entities()")
test_async_object_type_errors(get_entities, "get_entities()")
test_type_parameter(get_entities, "get_entities()")
test_invalid_json_format(get_entities, "get_entities()")

test_that("get_entities() warns of response not containing text", {
  skip_on_cran()
  skip_on_ci()
  skip_if_offline()
  wrong <- dai_async("random.pdf")
  expect_error(get_entities(wrong), "The supplied object is not from a successful HTTP request.")
  blank <- dai_sync(testthat::test_path("examples", "blank.tiff"))
  expect_error(get_entities(blank), "DAI found no text. Was the page blank?")
})

test_that("get_entities() warns of file not containing text", {
  blank <- testthat::test_path("examples", "output_blank.json")
  expect_error(get_entities(blank, type = "async"), "DAI found no text. Was the document blank?")
})

test_that("get_entities() gets entities from example file", {
  ents <- get_entities(sample_json_tables, type = "async")
  expect_type(ents, "list")
  expect_true(length(ents) > 0)
  expect_true(is.data.frame(ents[[1]]))

  # Check that expected columns exist
  expected_cols <- c(
    "id", "mentionText", "type", "confidence",
    "start_ind", "end_ind", "left", "right", "top", "bottom"
  )
  expect_true(all(expected_cols %in% names(ents[[1]])))
})

test_that("get_entities() handles multiple entity pages", {
  ents <- get_entities(sample_json_tables, type = "async")
  # Each element should be a data frame
  expect_true(all(sapply(ents, is.data.frame)))
})

test_that("get_entities() entity data frames have correct structure", {
  ents <- get_entities(sample_json_tables, type = "async")

  for (ent_df in ents) {
    # Check column types
    expect_true(is.numeric(ent_df$id))
    expect_true(is.character(ent_df$mentionText))
    expect_true(is.character(ent_df$type))
    expect_true(is.numeric(ent_df$confidence))
    expect_true(is.numeric(ent_df$start_ind))
    expect_true(is.numeric(ent_df$end_ind))
    expect_true(is.numeric(ent_df$left))
    expect_true(is.numeric(ent_df$right))
    expect_true(is.numeric(ent_df$top))
    expect_true(is.numeric(ent_df$bottom))

    # Check that confidence is between 0 and 1
    expect_true(all(ent_df$confidence >= 0 & ent_df$confidence <= 1))

    # Check that coordinates make sense
    expect_true(all(ent_df$left <= ent_df$right))
    expect_true(all(ent_df$top <= ent_df$bottom))
  }
})

## INTEGRATION TESTS -----------------------------------------------------------

test_that("get_text() and get_tables() work together on same file", {
  text <- get_text(sample_json_tables, type = "async")
  tbls <- get_tables(sample_json_tables, type = "async")

  expect_type(text, "character")
  expect_type(tbls, "list")
  expect_true(length(tbls) > 0)
})

test_that("get_text() and get_entities() work together on same file", {
  text <- get_text(sample_json_tables, type = "async")
  ents <- get_entities(sample_json_tables, type = "async")

  expect_type(text, "character")
  expect_type(ents, "list")
  expect_true(length(ents) > 0)
})

test_that("All three extraction functions work on same file", {
  text <- get_text(sample_json_tables, type = "async")
  tbls <- get_tables(sample_json_tables, type = "async")
  ents <- get_entities(sample_json_tables, type = "async")

  expect_type(text, "character")
  expect_type(tbls, "list")
  expect_type(ents, "list")
})

## CLEANUP ---------------------------------------------------------------------

unlink(madeup_json_file, force = TRUE)