# The principles of the algorithm have been published and peer reviewed thus the
# statistical core principles are out of scope of the validation.
#
# Instead we will formulate expectations from the stakeholders point of view for
# which we will then write validation test cases:
#
# We expect that sites that report fewer event counts have negative reporting probabilities
# while sites with high counts have positive reporting probabilities. When outlying
# sites are plotted we expect them to stand out in respect to their reporting levels.
#
# - We generate two data sets one including under-reporting and one including
#.  over reporting sites.
# - Each data set contains 4 Studies with 500 patients over 50 sites with 10
#   patients per site for which 2 sites are under or over-reporting events with
#.  either 25%, 50%, 75% or 100%.
# - Test Simulation, count events at all sites, check that outlier sites
#   have fewer or more events as expected
# - Apply {simaerep}
# - Test reporting probability, which should be negative for sites with low event
#   counts and positive for sites with high event counts.
# - Generate Plots for Test Cases
# - Visually inspect plots and set them up as reference plots for unit testing.
#   Plot integrity will be tested, every change on the plots requires repeated
#   visual inspection.


df_grid <- tibble(
    factor_event_rate = c(1, 0.75, 0.5, 0.25),
    study_id = c("100", "075", "050", "025")
  ) %>%
  mutate(
    type = list(c(-1, 1))
  ) %>%
  unnest(type) %>%
  mutate(
    study_id = ifelse(type == -1, glue("study_{study_id}_ur"), glue("study_{study_id}_or")),
    factor_event_rate = factor_event_rate * type,
    data = map(
      factor_event_rate,
      ~ sim_test_data_study(
        n_pat = 500,
        n_sites = 50,
        ratio_out = 0.04,
        factor_event_rate = .,
        event_rates = (dgamma(seq(1, 20, 0.5), shape = 5, rate = 2) * 5) + 0.1,
        max_visit = 20,
        max_visit_sd = 10
      )
    ),
    data = map2(data, study_id, ~ mutate(.x, study_id = .y))
  )

df_visit_val <- bind_rows(df_grid$data)

test_that(paste("validation requirement - Event reporting probability must correlate with",
                "with Pearson cofficient > 0.9 and P value < 0.001 with the average event rate",
                "in test set with 25, 50, 75, 100% simulated event over and under-reporting."), {

  evrep <- simaerep(df_visit_val)

  df_comp <- df_visit_val %>%
    distinct(study_id, site_id, is_out, event_per_visit_mean) %>%
    left_join(evrep$df_eval, by = c("study_id", "site_id")) %>%
    filter(is_out)

  correl <- cor.test(df_comp$event_per_visit_mean, df_comp$event_prob)

  expect_true(correl$estimate > 0.9)
  expect_true(correl$p.value < 0.001)

})


test_that(paste("validation requirement - visually inspect plotted event reporting probability",
                "outlier in a test set with 25, 50, 75, 100% over and under-reporting"), {

  skip_on_cran() # graphics engines on CRAN not always compatible

  evrep <- simaerep(df_visit_val)

  for (study in unique(df_visit_val$study_id)) {
    # expect_snapshot_file will return different results on different systems
    vdiffr::expect_doppelganger(
      study,
      plot(evrep, study = study)
    )
  }

})