test_cohort_collapse <- function(con, write_schema) {

  # Nuria's examples
  cohort_input <- tibble::tribble(
  ~cohort_definition_id, ~subject_id, ~cohort_start_date, ~cohort_end_date,
                      1,           3,       "2000-01-11",     "2000-02-21",
                      1,           3,       "2000-01-11",     "2000-02-21",
                      1,           3,       "2000-02-21",     "2000-04-04",
                      1,           3,       "2000-04-04",     "2000-05-15",
                      1,           3,       "2000-05-16",     "2000-06-26",
                      1,           3,       "2000-07-18",     "2000-08-28",
                      1,           3,       "2000-08-23",     "2000-10-03",
                      1,           3,       "2000-10-05",     "2000-11-15",
                      1,           3,       "2000-11-10",     "2000-12-21",
                      1,           4,       "2000-01-11",     "2000-12-21",
                      1,           4,       "2000-01-11",     "2000-02-21",
                      1,           4,       "2000-02-21",     "2000-04-04",
                      1,           4,       "2000-04-04",     "2000-05-15",
                      1,           4,       "2000-05-16",     "2000-06-26",
                      1,           4,       "2000-07-18",     "2000-08-28",
                      1,           4,       "2000-08-23",     "2000-10-03",
                      1,           4,       "2000-10-05",     "2000-11-15",
                      1,           4,       "2000-11-10",     "2000-12-21",
                      2,           3,       "2001-08-30",     "2001-09-28",
                      2,           3,       "2001-09-27",     "2001-10-26",
                      2,           3,       "2001-10-22",     "2001-11-20",
                      2,           3,       "2001-11-21",     "2001-12-20",
                      2,           3,       "2001-12-12",     "2002-01-10",
                      2,           3,       "2002-08-27",     "2002-09-25",
                      2,           3,       "2002-09-23",     "2002-10-22",
                      2,           3,       "2002-10-16",     "2002-11-14",
                      2,           3,       "2002-11-19",     "2002-12-18"
  ) %>% dplyr::mutate(dplyr::across(dplyr::matches("_date"), as.Date))

  expected_output <- tibble::tribble(
    ~cohort_definition_id, ~subject_id, ~cohort_start_date, ~cohort_end_date,
                        1,           3,       "2000-01-11",     "2000-05-15",
                        1,           3,       "2000-05-16",     "2000-06-26",
                        1,           3,       "2000-07-18",     "2000-10-03",
                        1,           3,       "2000-10-05",     "2000-12-21",
                        1,           4,       "2000-01-11",     "2000-12-21",
                        2,           3,       "2001-08-30",     "2001-11-20",
                        2,           3,       "2001-11-21",     "2002-01-10",
                        2,           3,       "2002-08-27",     "2002-11-14",
                        2,           3,       "2002-11-19",     "2002-12-18"
    ) %>% dplyr::mutate(dplyr::across(dplyr::matches("_date"), as.Date)) %>%
    dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date)

  if (dbms(con) == "oracle") {

    # TODO there is an issue inserting dates into an Oracle database
    cohort_input_oracle <- cohort_input %>%
      dplyr::mutate(dplyr::across(dplyr::matches("date"), as.character))

    DBI::dbWriteTable(con,
                      inSchema(write_schema, "tmp_cohort_collapse_input0"),
                      cohort_input_oracle,
                      overwrite = TRUE)

    input_db <- dplyr::tbl(con, inSchema(write_schema, "tmp_cohort_collapse_input0", dbms = dbms(con))) %>%
      dplyr::mutate(cohort_start_date = TO_DATE(cohort_start_date, "YYYY-MM-DD"),
                    cohort_end_date = TO_DATE(cohort_end_date, "YYYY-MM-DD")) %>%
      compute_query(name = "tmp_cohort_collapse_input",
                    temporary = FALSE,
                    schema = write_schema,
                    overwrite = TRUE)

    DBI::dbRemoveTable(con, inSchema(write_schema, "tmp_cohort_collapse_input0", dbms = dbms(con)))
  } else {
    DBI::dbWriteTable(con, inSchema(write_schema, "tmp_cohort_collapse_input", dbms = dbms(con)), cohort_input, overwrite = TRUE)
    input_db <- dplyr::tbl(con, inSchema(write_schema, "tmp_cohort_collapse_input", dbms = dbms(con)))
  }

  actual_output <- input_db %>%
    cohort_collapse() %>%
    dplyr::collect() %>%
    dplyr::tibble() %>%
    dplyr::arrange(.data$cohort_definition_id, .data$subject_id, .data$cohort_start_date) %>%
    dplyr::mutate(dplyr::across(dplyr::matches("date"), as.Date)) %>% # oracle returns datetimes
    dplyr::mutate(dplyr::across(dplyr::matches("id"), as.double)) # bigquery returns integers

  expect_identical(actual_output, expected_output)

  DBI::dbRemoveTable(con, inSchema(write_schema, "tmp_cohort_collapse_input", dbms = dbms(con)))


  # test every case (Allen's interval algebra) for two intervals and two people
  require(dplyr)
  intervals <- tibble::tribble(
    ~relationship,  ~cohort_definition_id, ~subject_id, ~cohort_start_date, ~cohort_end_date,
    "reference",    1,                     1,           "2022-01-05",       "2022-01-10",
    "precedes",     1,                     1,           "2022-01-01",       "2022-01-03",
    "meets",        1,                     1,           "2022-01-01",       "2022-01-04",
    "overlaps",     1,                     1,           "2022-01-01",       "2022-01-05",
    "finished_by",  1,                     1,           "2022-01-01",       "2022-01-10",
    "contains",     1,                     1,           "2022-01-01",       "2022-01-15",
    "starts",       1,                     1,           "2022-01-05",       "2022-01-07",
    "equals",       1,                     1,           "2022-01-05",       "2022-01-10",
    "reference",    1,                     2,           "2022-01-05",       "2022-01-10",
    "precedes",     1,                     2,           "2022-01-01",       "2022-01-03",
    "meets",        1,                     2,           "2022-01-01",       "2022-01-04",
    "overlaps",     1,                     2,           "2022-01-01",       "2022-01-05",
    "finished_by",  1,                     2,           "2022-01-01",       "2022-01-10",
    "contains",     1,                     2,           "2022-01-01",       "2022-01-15",
    "starts",       1,                     2,           "2022-01-05",       "2022-01-07",
    "equals",       1,                     2,           "2022-01-05",       "2022-01-10") %>%
    dplyr::mutate(dplyr::across(dplyr::matches("date"), as.Date))

  if (dbms(con) == "oracle") {
    # TODO there is an issue inserting dates into an Oracle database
    df <- intervals %>%
      dplyr::mutate(dplyr::across(dplyr::matches("date"), as.character))

    nm <- unique_table_name()
    DBI::dbWriteTable(con, nm, df, temporary = TRUE)

    db <- tbl(con, nm) %>%
      mutate(cohort_start_date = TO_DATE(cohort_start_date, "YYYY-MM-DD")) %>%
      mutate(cohort_end_date   = TO_DATE(cohort_end_date, "YYYY-MM-DD")) %>%
      compute_query()
  } else {
    DBI::dbWriteTable(con, inSchema(write_schema, "tmp_intervals", dbms = dbms(con)), intervals, overwrite = TRUE)
    db <- dplyr::tbl(con, inSchema(write_schema, "tmp_intervals", dbms = dbms(con)))
  }

  # precedes
  x <- db %>%
    dplyr::filter(relationship %in% c("reference", "precedes")) %>%
    dplyr::select(-"relationship") %>%
    cohort_collapse() %>%
    dplyr::collect() %>%
    dplyr::arrange(.data$subject_id, .data$cohort_start_date, .data$cohort_end_date)

  expected <- db %>%
    dplyr::filter(relationship %in% c("reference", "precedes")) %>%
    dplyr::select(-"relationship") %>%
    dplyr::collect() %>%
    dplyr::select("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date") %>%
    dplyr::arrange(.data$subject_id, .data$cohort_start_date, .data$cohort_end_date)


  expect_equal(colnames(x), c("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date"))
  expect_equal(x, expected)

  # meets
  x <- db %>%
    dplyr::filter(.data$relationship %in% c("reference", "meets")) %>%
    dplyr::select(-"relationship") %>%
    cohort_collapse() %>%
    dplyr::collect() %>%
    dplyr::select("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date") %>%
    dplyr::arrange(.data$subject_id, .data$cohort_start_date, .data$cohort_end_date)

  expected <- db %>%
    dplyr::filter(relationship %in% c("reference", "meets")) %>%
    dplyr::select(-"relationship") %>%
    dplyr::collect() %>%
    dplyr::select("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date") %>%
    dplyr::arrange(.data$subject_id, .data$cohort_start_date, .data$cohort_end_date)

  expect_equal(x, expected)

  # overlaps
  x <- db %>%
    dplyr::filter(relationship %in% c("reference", "overlaps")) %>%
    dplyr::select(-"relationship") %>%
    cohort_collapse() %>%
    dplyr::collect() %>%
    dplyr::select("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date") %>%
    dplyr::arrange(.data$subject_id, .data$cohort_start_date, .data$cohort_end_date)

  expected <- tibble::tribble(
    ~cohort_definition_id, ~subject_id, ~cohort_start_date, ~cohort_end_date,
    1,                     1,           "2022-01-01",       "2022-01-10",
    1,                     2,           "2022-01-01",       "2022-01-10") %>%
    dplyr::mutate(dplyr::across(dplyr::matches("date"), as.Date))

  expect_equal(x, expected)


  # finished_by
  x <- db %>%
    filter(relationship %in% c("reference", "finished_by")) %>%
    dplyr::select(-"relationship") %>%
    cohort_collapse() %>%
    collect() %>%
    dplyr::select("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date") %>%
    dplyr::arrange(.data$subject_id, .data$cohort_start_date, .data$cohort_end_date)

  expected <- tibble::tribble(
    ~cohort_definition_id, ~subject_id, ~cohort_start_date, ~cohort_end_date,
    1,                     1,           "2022-01-01",       "2022-01-10",
    1,                     2,           "2022-01-01",       "2022-01-10") %>%
    dplyr::mutate(dplyr::across(dplyr::matches("date"), as.Date))

  expect_equal(x, expected)

  # contains
  x <- db %>%
    filter(relationship %in% c("reference", "contains")) %>%
    dplyr::select(-"relationship") %>%
    cohort_collapse() %>%
    collect() %>%
    dplyr::select("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date") %>%
    dplyr::arrange(.data$subject_id, .data$cohort_start_date, .data$cohort_end_date)

  expected <- tibble::tribble(
    ~cohort_definition_id, ~subject_id, ~cohort_start_date, ~cohort_end_date,
    1,                     1,           "2022-01-01",       "2022-01-15",
    1,                     2,           "2022-01-01",       "2022-01-15") %>%
    dplyr::mutate(dplyr::across(dplyr::matches("date"), as.Date))

  expect_equal(x, expected)

  # starts
  x <- db %>%
    filter(relationship %in% c("reference", "starts")) %>%
    dplyr::select(-"relationship") %>%
    cohort_collapse() %>%
    collect() %>%
    dplyr::select("cohort_definition_id", "subject_id", "cohort_start_date", "cohort_end_date") %>%
    dplyr::arrange(.data$subject_id, .data$cohort_start_date, .data$cohort_end_date)

  expected <- tibble::tribble(
    ~cohort_definition_id, ~subject_id, ~cohort_start_date, ~cohort_end_date,
    1,                     1,           "2022-01-05",       "2022-01-10",
    1,                     2,           "2022-01-05",       "2022-01-10") %>%
    dplyr::mutate(dplyr::across(dplyr::matches("date"), as.Date))

  expect_equal(x, expected)

  # equals
  x <- db %>%
    filter(relationship %in% c("reference", "equals")) %>%
    dplyr::select(-"relationship") %>%
    cohort_collapse() %>%
    collect() %>%
    arrange(subject_id)

  expected <- tibble::tribble(
    ~cohort_definition_id, ~subject_id, ~cohort_start_date, ~cohort_end_date,
    1,                     1,           "2022-01-05",       "2022-01-10",
    1,                     2,           "2022-01-05",       "2022-01-10") %>%
    dplyr::mutate(dplyr::across(dplyr::matches("date"), as.Date))

  expect_equal(x, expected)

  DBI::dbRemoveTable(con, inSchema(write_schema, "tmp_intervals", dbms = dbms(con)))
}

dbToTest <- c(
  "duckdb"
  ,"postgres"
  ,"redshift"
  ,"sqlserver"
  # ,"oracle"
  ,"snowflake"
  ,"bigquery"
)

# dbtype = "duckdb"
for (dbtype in dbToTest) {
  test_that(glue::glue("{dbtype} - cohort_collapse"), {
    if (dbtype != "duckdb") skip_on_ci()
    write_schema <- get_write_schema(dbtype)
    con <- get_connection(dbtype)
    skip_if(any(write_schema == "") || is.null(con))
    test_cohort_collapse(con, write_schema)
    disconnect(con)
  })
}

# TODO pmin and pmax do not work on sqlserver - add issue to dbplyr