test_that("check summariseObservationPeriod works", {
  # helper function
  removeSettings <- function(x) {
    attr(x, "settings") <- NULL
    return(x)
  }
  nPoints <- 512

  # Load mock database
  cdm <- omopgenerics::cdmFromTables(
    tables = list(
      person = dplyr::tibble(
        person_id = as.integer(1:4),
        gender_concept_id = c(8507L, 8532L, 8532L, 8507L),
        year_of_birth = 2010L,
        month_of_birth = 1L,
        day_of_birth = 1L,
        race_concept_id = 0L,
        ethnicity_concept_id = 0L
      ),
      observation_period = dplyr::tibble(
        observation_period_id = as.integer(1:8),
        person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(),
        observation_period_start_date = as.Date(c(
          "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10",
          "2020-03-01", "2020-04-10", "2020-03-10"
        )),
        observation_period_end_date = as.Date(c(
          "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27",
          "2020-03-09", "2020-05-08", "2020-12-10"
        )),
        period_type_concept_id = 0L
      )
    ),
    cdmName = "mock data"
  )
  cdm <- CDMConnector::copyCdmTo(
    con = connection(), cdm = cdm, schema = schema())

  # simple run
  expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period))
  expect_no_error(
    resAllD <- summariseObservationPeriod(cdm$observation_period, estimates = "density"))
  expect_no_error(
    resAllN <- summariseObservationPeriod(cdm$observation_period,
                                          estimates = c(
                                            "mean", "sd", "min", "q05", "q25",
                                            "median", "q75", "q95", "max")))
  expect_equal(
    resAllD |> dplyr::filter(!is.na(variable_level)) |>
      dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings(),
    resAll |> dplyr::filter(!is.na(variable_level)) |>
      dplyr::mutate(estimate_value = as.numeric(estimate_value)) |> removeSettings()
  )

  # test estimates
  expect_no_error(
    resEst <- cdm$observation_period |>
      summariseObservationPeriod(estimates = c("mean", "median")))
  expect_true(all(
    resEst |>
      dplyr::filter(!.data$variable_name %in% c("number records", "number subjects")) |>
      dplyr::pull("estimate_name") |>
      unique() %in% c("mean", "median")
  ))

  # counts
  expect_identical(resAll$estimate_value[resAll$variable_name == "number records"], "8")
  x <- dplyr::tibble(
    group_level = c("overall", "1st", "2nd", "3rd"),
    variable_name = "number subjects",
    estimate_value = c("4", "4", "3", "1"))
  expect_identical(nrow(x), resAll |> dplyr::inner_join(x, by = colnames(x)) |> nrow())

  # records per person
  expect_identical(
    resAll |>
      dplyr::filter(
        variable_name == "records per person", estimate_name == "mean") |>
      dplyr::pull("estimate_value"),
    "2"
  )

  # duration
  expect_identical(
    resAll |>
      dplyr::filter(variable_name == "duration in days", estimate_name == "mean") |>
      dplyr::pull("estimate_value"),
    as.character(c(
      mean(c(20, 6, 113, 144, 18, 9, 29, 276)), mean(c(20, 18, 9, 276)),
      mean(c(6, 29, 144)), 113
    ))
  )

  # days to next observation period
  expect_identical(
    resAll |>
      dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean") |>
      dplyr::pull("estimate_value"),
    as.character(c(
      mean(c(5, 32, 136, 26)), mean(c(5, 32, 136)), 26, NA
    ))
  )

  # duration - density
  xx <- resAllD |>
    dplyr::filter(variable_name == "duration in days", !is.na(variable_level)) |>
    dplyr::group_by(group_level) |>
    dplyr::summarise(
      n = dplyr::n(),
      area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * (
        max(as.numeric(estimate_value[estimate_name == "density_x"])) -
          min(as.numeric(estimate_value[estimate_name == "density_x"]))
        )/(nPoints - 1)
    )
  expect_identical(xx$n |> unique() |> sort(decreasing = TRUE), c(as.integer(nPoints*2L),6L))
  expect_identical(xx$area |> round(2) |> unique() |> sort(decreasing = TRUE), c(1,0))

  # days to next observation period - density
  xx <- resAll |>
    dplyr::filter(variable_name == "days to next observation period",
                  !is.na(variable_level)) |>
    dplyr::group_by(group_level) |>
    dplyr::summarise(
      n = dplyr::n(),
      area = sum(as.numeric(estimate_value[estimate_name == "density_y"])) * (
        max(as.numeric(estimate_value[estimate_name == "density_x"])) -
          min(as.numeric(estimate_value[estimate_name == "density_x"]))
      )/(nPoints - 1)
    )
  expect_identical(xx$n |> unique() |> sort(decreasing = TRUE) , c(as.integer(nPoints*2L),6L))
  expect_identical(xx$area[xx$group_level != "2nd"] |> round(2) |> unique(), 1)

  # only one exposure per individual
  cdm$observation_period <- cdm$observation_period |>
    dplyr::group_by(person_id) |>
    dplyr::filter(observation_period_id == min(observation_period_id, na.rm = TRUE)) |>
    dplyr::ungroup() |>
    dplyr::compute(name = "observation_period", temporary = FALSE)

  expect_no_error(resOne <- summariseObservationPeriod(cdm$observation_period))

  # counts
  expect_identical(resOne$estimate_value[resOne$variable_name == "number records"], "4")
  x <- dplyr::tibble(
    group_level = c("overall", "1st"),
    variable_name = "number subjects",
    estimate_value = c("4", "4"))
  expect_identical(nrow(x), resOne |> dplyr::inner_join(x, by = colnames(x)) |> nrow())

  # Check result type
  checkResultType(resOne, "summarise_observation_period")

  # empty observation period
  cdm$observation_period <- cdm$observation_period |>
    dplyr::filter(person_id == 0) |>
    dplyr::compute(name = "observation_period", temporary = FALSE)

  expect_no_error(resEmpty <- summariseObservationPeriod(cdm$observation_period))
  expect_true(nrow(resEmpty) == 2)
  expect_identical(unique(resEmpty$estimate_value), "0")

  # table works
  expect_no_error(tableObservationPeriod(resAll))
  expect_no_error(tableObservationPeriod(resOne))
  expect_no_error(tableObservationPeriod(resEmpty))

  # plot works
  expect_no_error(plotObservationPeriod(resAll))
  expect_no_error(plotObservationPeriod(resOne))
  # expect_warning(plotObservationPeriod(resEmpty)) THIS TEST NEEDS DISCUSSION

  # check all plots combinations
  expect_no_error(
    resAll |>
      plotObservationPeriod(
        variableName = "number subjects", plotType = "barplot")
  )
  expect_error(
    resAll |>
      plotObservationPeriod(
        variableName = "number subjects", plotType = "boxplot")
  )
  expect_error(
    resAll |>
      plotObservationPeriod(
        variableName = "number subjects", plotType = "densityplot")
  )
  expect_error(
    resAll |>
      plotObservationPeriod(
        variableName = "number subjects", plotType = "random")
  )
  expect_error(
    resAll |>
      plotObservationPeriod(
        variableName = "duration in days", plotType = "barplot")
  )
  expect_no_error(
    resAll |>
      plotObservationPeriod(
        variableName = "duration in days", plotType = "boxplot")
  )
  expect_error(
    resAllN |>
      plotObservationPeriod(
        variableName = "duration in days", plotType = "densityplot")
  )
  expect_no_error(
    resAllD |>
      plotObservationPeriod(
        variableName = "duration in days", plotType = "densityplot")
  )
  expect_error(
    resAll |>
      plotObservationPeriod(
        variableName = "duration in days", plotType = "random")
  )
  expect_error(
    resAll |>
      plotObservationPeriod(
        variableName = "records per person", plotType = "barplot")
  )
  expect_no_error(
    resAll |>
      plotObservationPeriod(
        variableName = "records per person", plotType = "boxplot")
  )
  expect_error(
    resAllN |>
      plotObservationPeriod(
        variableName = "records per person", plotType = "densityplot")
  )
  expect_no_error(
    resAllD |>
      plotObservationPeriod(
        variableName = "records per person", plotType = "densityplot")
  )
  expect_error(
    resAll |>
      plotObservationPeriod(
        variableName = "records per person", plotType = "random")
  )
  expect_error(
    resAll |>
      plotObservationPeriod(
        variableName = "days to next observation period", plotType = "barplot")
  )
  expect_no_error(
    resAll |>
      plotObservationPeriod(
        variableName = "days to next observation period", plotType = "boxplot")
  )
  expect_error(
    resAllN |>
      plotObservationPeriod(
        variableName = "days to next observation period", plotType = "densityplot")
  )
  expect_no_error(
    resAllD |>
      plotObservationPeriod(
        variableName = "days to next observation period", plotType = "densityplot")
  )
  expect_error(
    resAll |>
      plotObservationPeriod(
        variableName = "days to next observation period", plotType = "random")
  )

  PatientProfiles::mockDisconnect(cdm = cdm)
})

test_that("check it works with mockOmopSketch", {
  cdm <- mockOmopSketch(numberIndividuals = 5, seed = 1)

  sop <- summariseObservationPeriod(cdm$observation_period)

  # counts
  expect_identical(sop$estimate_value[sop$variable_name == "number records"], "5")
  x <- dplyr::tibble(
    strata_level = c("overall", "1st"),
    variable_name = "number subjects",
    estimate_value = c("5","5"))
  expect_identical(nrow(x), sop |> dplyr::inner_join(x, by = colnames(x)) |> nrow())

  # records per person
  expect_identical(
    sop |>
      dplyr::filter(
        variable_name == "records per person", estimate_name != "sd", !grepl("density", estimate_name)) |>
      dplyr::pull("estimate_value"),
    c(rep("1",8))
  )

  # duration
  expect_identical(
    sop |>
      dplyr::filter(variable_name == "duration in days", estimate_name %in% c("min","q25","median","q75","max")) |>
      dplyr::pull("estimate_value") |>
      unique() |>
      sort(),
    as.character(
      cdm$observation_period |>
        dplyr::mutate(duration = observation_period_end_date - observation_period_start_date + 1) |>
        dplyr::pull(duration) |>
        as.character() |>
        sort()
    )
  )

  # days to next observation period
  expect_identical(
    sop |>
      dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean") |>
      dplyr::pull("estimate_value"),
    as.character(c(NA,NA))
  )

  # Check result type
  omopgenerics::validateResultArgument(sop)

  # table works
  expect_no_error(tableObservationPeriod(sop))

  # plot works
  expect_no_error(plotObservationPeriod(sop))

  PatientProfiles::mockDisconnect(cdm = cdm)

})

test_that("check summariseObservationPeriod strata works", {
  # helper function
  removeSettings <- function(x) {
    attr(x, "settings") <- NULL
    return(x)
  }
  nPoints <- 512

  # Load mock database
  cdm <- omopgenerics::cdmFromTables(
    tables = list(
      person = dplyr::tibble(
        person_id = as.integer(1:4),
        gender_concept_id = c(8507L, 8532L, 8532L, 8507L),
        year_of_birth = c(2010L, 2010L, 2011L, 2012L),
        month_of_birth = 1L,
        day_of_birth = 1L,
        race_concept_id = 0L,
        ethnicity_concept_id = 0L
      ),
      observation_period = dplyr::tibble(
        observation_period_id = as.integer(1:8),
        person_id = c(1, 1, 1, 2, 2, 3, 3, 4) |> as.integer(),
        observation_period_start_date = as.Date(c(
          "2020-03-01", "2020-03-25", "2020-04-25", "2020-08-10", "2020-03-10",
          "2020-03-01", "2020-04-10", "2020-03-10"
        )),
        observation_period_end_date = as.Date(c(
          "2020-03-20", "2020-03-30", "2020-08-15", "2020-12-31", "2020-03-27",
          "2020-03-09", "2020-05-08", "2020-12-10"
        )),
        period_type_concept_id = 0L
      )
    ),
    cdmName = "mock data"
  )
  cdm <- CDMConnector::copyCdmTo(
    con = connection(), cdm = cdm, schema = schema())

  # simple run
  expect_no_error(resAll <- summariseObservationPeriod(cdm$observation_period,
                                                          estimates = c("mean", "sd", "min", "max", "median", "density")))
  expect_no_error(resStrata <- summariseObservationPeriod(cdm$observation_period,
                                                       estimates = c("mean", "sd", "min", "max", "median", "density"),
                                                       ageGroup = list("<10" = c(0,9), ">=10" = c(10, Inf)),
                                                       sex = TRUE))
  # test overall
  x <- resStrata |>
    dplyr::filter(strata_name == "overall", strata_level == "overall") |>
    dplyr::rename("strata" = "estimate_value") |>
    dplyr::inner_join(
      resAll |>
        dplyr::rename("all" = "estimate_value")
    )
  expect_identical(x$strata, x$all)

  # check strata groups have the expected value
  expect_identical(resStrata |>
    dplyr::filter(variable_name == "number subjects",
                  strata_level == "Female",
                  group_level == "2nd") |>
    dplyr::pull("estimate_value"),"2")

  expect_identical(resStrata |>
                     dplyr::filter(variable_name == "number subjects",
                                   strata_level == ">=10 &&& Male",
                                   group_level == "3rd") |>
                     dplyr::pull("estimate_value"),"1")

  # duration
  expect_identical(
    resStrata |>
      dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == ">=10") |>
      dplyr::pull("estimate_value"),
    as.character(c(
      mean(c(20, 18)),
      mean(c(6, 144)),
      mean(113)))
    )

  expect_identical(
    resStrata |>
      dplyr::filter(variable_name == "duration in days", estimate_name == "mean", strata_level == "<10") |>
      dplyr::pull("estimate_value"),
    as.character(c(
      mean(c(9, 276)),
      mean(c(29))))
  )

  # days to next observation period
  expect_identical(
    resStrata |>
      dplyr::filter(variable_name == "days to next observation period", estimate_name == "mean",
                    strata_level == "<10 &&& Female", group_level == "1st") |>
      dplyr::pull("estimate_value"), "32"
  )

  PatientProfiles::mockDisconnect(cdm = cdm)
})