test_that("counts are good", {

  s1 <- screen_adr(
    .data = adr_,
    meddra = meddra_,
    term_level = "pt"
  ) %>% dplyr::filter(term %in% c("Pneumonitis", "Diarrhoea", "Colitis"))

  s1_true <- data.table(
    term = c("Pneumonitis", "Diarrhoea", "Colitis"),
    n = c(92, 69, 32),
    percentage = c(92/750*100, 69/750*100, 32/750*100)
  )

  expect_equal(s1, s1_true)

  s2 <- screen_adr(
    .data = adr_,
    meddra = meddra_,
    term_level = "hlt"
  ) %>% dplyr::filter(term %in% c(
    "Lower respiratory tract inflammatory and immunologic conditions",
    "Diarrhoea (excl infective)", "Colitis (excl infective)"
  ))

  s2_true <- data.table(
    term = c("Lower respiratory tract inflammatory and immunologic conditions",
             "Diarrhoea (excl infective)", "Colitis (excl infective)"),
    n = c(92, 69, 40),
    percentage = c(92/750*100, 69/750*100, 40/750*100)
  )

  expect_equal(s2, s2_true)

  s3 <- screen_adr(
    .data = adr_,
    meddra = meddra_,
    term_level = "soc"
  ) %>% dplyr::filter(term %in% c(
    "Gastrointestinal disorders", "Respiratory, thoracic and mediastinal disorders"
  ))

  s3_true <- data.table(
    term = c("Respiratory, thoracic and mediastinal disorders", "Gastrointestinal disorders"),
    n = c(110, 104),
    percentage = c(110/750*100, 104/750*100)
  )

  expect_equal(s3, s3_true)

  s4 <- screen_adr(
    .data = adr_,
    meddra = meddra_,
    term_level = "hlgt"
  ) %>% dplyr::filter(term %in% c(
    "Lower respiratory tract disorders (excl obstruction and infection)",
    "Gastrointestinal motility and defaecation conditions",
    "Gastrointestinal inflammatory conditions"
  ))

  s4_true <- data.table(
    term = c("Lower respiratory tract disorders (excl obstruction and infection)",
             "Gastrointestinal motility and defaecation conditions",
             "Gastrointestinal inflammatory conditions"),
    n = c(103, 69, 46),
    percentage = c(103/750*100, 69/750*100, 46/750*100)
  )

  expect_equal(s4, s4_true)

})

test_that("screen_adr works with top_n", {
  result <- screen_adr(
    .data = adr_, meddra = meddra_, term_level = "pt", top_n = 3
  )
  expect_equal(nrow(result), 3)  # Ensure it returns only top 3 terms
  expect_true("Pneumonitis" %in% result$term && "Diarrhoea" %in% result$term &&
                NA %in% result$term)  # Top 2 frequent terms are correct
})

test_that("screen_adr works with freq_threshold", {
  result <- screen_adr(
    .data = adr_, meddra = meddra_, term_level = "pt", freq_threshold = 0.05
  )
  expect_true(all(result$percentage >= 5))  # All terms meet the frequency threshold
  expect_true("Pneumonitis" %in% result$term || "Diarrhoea" %in% result$term)  # Ensure
  # specific terms are present if they meet threshold
})

test_that("screen_adr handles both freq_threshold and top_n specified with a warning", {
  expect_warning(
    result <- screen_adr(
      .data = adr_, meddra = meddra_, term_level = "pt", top_n = 3, freq_threshold = 0.1
    ),
    "Both 'freq_threshold' and 'top_n' are specified. Only 'top_n' will be applied."
  )
  expect_equal(nrow(result), 3)  # Only top_n should be applied
  expect_true("Pneumonitis" %in% result$term && "Diarrhoea" %in% result$term)  # Ensure
  # the top 3 terms are correct
})

test_that("screen_adr returns correct columns", {
  result <- screen_adr(
    .data = adr_, meddra = meddra_, term_level = "pt", top_n = 3
  )
  expect_true(all(c("term", "n", "percentage") %in% names(result)))  # Check if all
  # expected columns are present
})

test_that("screen_adr returns empty data table for high freq_threshold", {
  result <- screen_adr(
    .data = adr_, meddra = meddra_, term_level = "pt", freq_threshold = 1
  )
  expect_equal(nrow(result), 0)  # Should return empty data table if threshold is too high
})

test_that("screen_adr handles term_level mismatch", {
  expect_error(
    screen_adr(
      .data = adr_, meddra = meddra_, term_level = "invalid_level"
    ),
    "Invalid 'term_level' specified. Choose from 'soc', 'hlgt', 'hlt', 'pt', 'llt'."
  )
})

test_that("result is exact with low frequency ae compounds", {
  adr_test <- data.frame(
    UMCReportId = 1:50,
    MedDRA_Id = c(rep(1,49), 2)
  )

  meddra_test <- data.frame(
    hlt_name = c("ae", "ae"),
    llt_code = c(1, 2)
  )

  s_test <- screen_adr(
    .data = adr_test,
    meddra = meddra_test,
    term_level = "hlt",
    freq_threshold = 0.05
  )

  s_true <- data.table(
    term = "ae",
    n = 50,
    percentage = 100
  )

  expect_equal(s_test, s_true)
})



test_that("result is correct with double adr entries per case", {
  adr_test <- data.frame(
    UMCReportId = c(1, 1, 2),
    MedDRA_Id = c(1, 1, 2)
  )

  meddra_test <- data.frame(
    hlt_name = c("ae", "other"),
    llt_code = c(1, 2)
  )

  s_test2 <- screen_adr(
    .data = adr_test,
    meddra = meddra_test,
    term_level = "hlt",
    freq_threshold = 0.05
  )

  s_true2 <- data.table(
    term = c("ae", "other"),
    n = c(1, 1),
    percentage = c(50, 50)
  )

  expect_equal(s_test2, s_true2)
})

test_that("multiple entries in meddra do not count several folds", {
  adr_test <- data.frame(
    UMCReportId = c(1, 2),
    MedDRA_Id = c(1, 2)
  )

  meddra_test <- data.frame(
    hlt_name = c("ae1", "ae1", "ae2"),
    llt_code = c(1, 1, 2)
  )

  s_test <-
    screen_adr(
      .data = adr_test,
      meddra = meddra_test,
      term_level = "hlt",
      freq_threshold = 0.05
    )

  s_true <- data.table(
    term = c("ae1", "ae2"),
    n = c(1, 1),
    percentage = c(50, 50)
  )

  expect_equal(s_test, s_true)

})

test_that("unused llt codes don't show up in output", {
  adr_test <- data.frame(
    UMCReportId = c(1, 2),
    MedDRA_Id = c(1, 2)
  )

  meddra_test <- data.frame(
    hlt_name = c("ae1", "ae2", "ae3"),
    llt_code = c(1, 2, 3)
  )

  s_test <-
    screen_adr(
      .data = adr_test,
      meddra = meddra_test,
      term_level = "hlt"
    )

  s_true <- data.table(
    term = c("ae1", "ae2"),
    n = c(1, 1),
    percentage = c(50, 50)
  )

  expect_equal(s_test, s_true)
})

test_that("an llt code used in two hlt codes is counted twice", {
  adr_test <- data.frame(
    UMCReportId = c(1, 2, 2),
    MedDRA_Id = c(1, 1, 2)
  )

  meddra_test <- data.frame(
    hlt_name = c("ae1", "ae2", "ae3"),
    llt_code = c(1, 1, 2)
  )

  s_test <-
    screen_adr(
      .data = adr_test,
      meddra = meddra_test,
      term_level = "hlt"
    )

  s_true <- data.table(
    term = c("ae1", "ae2", "ae3"),
    n = c(2, 2, 1),
    percentage = c(100, 100, 50)
  )

  expect_equal(s_test, s_true)

  # consequence: sum(n) is larger than number of cases

  expect_true(
    sum(s_test$n) > length(unique(adr_test$UMCReportId))
  )

  # but still, no unique n is larger than number of cases

  expect_true(
    all(s_test$n <= length(unique(adr_test$UMCReportId)))
    )
})

test_that("works with arrow Tables", {
  adr_test <- data.table(
    UMCReportId = c(1, 2, 3, 4),
    MedDRA_Id = c(1, 1, 2, 3)
  ) |>
    arrow::as_arrow_table()

  meddra_test <- data.table(
    hlt_name = c("ae1", "ae2", "ae3"),
    llt_code = c(1, 2, 3)
  ) |>
    arrow::as_arrow_table()

  s_test <-
    screen_adr(
      .data = adr_test,
      meddra = meddra_test,
      term_level = "hlt"
    )

  s_true <- data.table(
    term = c("ae1", "ae2", "ae3"),
    n = c(2, 1, 1),
    percentage = c(50, 25, 25)
  )

  expect_equal(s_test, s_true)
})