test_that("sort_to_uncross works with unsorted algorithm", {
    set.seed(42)
    # Generate raw data
    raw_df <- data.frame(
        method1 = sample(1:3, 100, TRUE),
        method2 = sample(1:3, 100, TRUE)
    )

    # Aggregate by combination
    data <- as.data.frame(dplyr::count(raw_df, method1, method2, name = "value"))
    cols = c("method1", "method2")

    unsorted_df <- sort_to_uncross(data, cols = cols, wt = "value", method = "none")
    # unsorted_df <- dplyr::ungroup()(unsorted_df)

    ground_truth_df_path <- normalizePath(testthat::test_path("ground_truth", "unsorted_df.rds"))

    if (!file.exists(ground_truth_df_path)) {
        saveRDS(unsorted_df, file = ground_truth_df_path)
    }

    ground_truth_df <- readRDS(ground_truth_df_path)
    ground_truth_df <- ground_truth_df[, c(cols, "value"), drop = FALSE]
    ground_truth_df <- ground_truth_df |> dplyr::ungroup()

    expect_equal(as.data.frame(unsorted_df), as.data.frame(ground_truth_df))
})

test_that("sort_to_uncross works with greedy_wolf algorithm", {
    set.seed(42)
    # Generate raw data
    raw_df <- data.frame(
        method1 = sample(1:3, 100, TRUE),
        method2 = sample(1:3, 100, TRUE)
    )

    # Aggregate by combination
    data <- as.data.frame(dplyr::count(raw_df, method1, method2, name = "value"))
    cols = c("method1", "method2")
    greedy_wolf_df <- sort_to_uncross(data, cols = cols, wt = "value", method = "greedy_wolf")

    ground_truth_df_path <- normalizePath(testthat::test_path("ground_truth", "greedy_wolf_df.rds"))

    if (!file.exists(ground_truth_df_path)) {
        saveRDS(greedy_wolf_df, file = ground_truth_df_path)
    }

    ground_truth_df <- readRDS(ground_truth_df_path)
    ground_truth_df <- ground_truth_df[, c(cols, "value"), drop = FALSE]
    ground_truth_df <- ground_truth_df |> dplyr::ungroup()

    expect_equal(as.data.frame(greedy_wolf_df), as.data.frame(ground_truth_df))
})

test_that("sort_to_uncross works with greedy_wblf algorithm", {
    set.seed(42)
    # Generate raw data
    raw_df <- data.frame(
        method1 = sample(1:3, 100, TRUE),
        method2 = sample(1:3, 100, TRUE)
    )

    # Aggregate by combination
    data <- as.data.frame(dplyr::count(raw_df, method1, method2, name = "value"))
    cols = c("method1", "method2")
    greedy_wblf_df <- sort_to_uncross(data, cols = cols, wt = "value", method = "greedy_wblf")

    ground_truth_df_path <- normalizePath(testthat::test_path("ground_truth", "greedy_wblf_df.rds"))

    if (!file.exists(ground_truth_df_path)) {
        saveRDS(greedy_wblf_df, file = ground_truth_df_path)
    }

    ground_truth_df <- readRDS(ground_truth_df_path)
    ground_truth_df <- ground_truth_df[, c(cols, "value"), drop = FALSE]
    ground_truth_df <- ground_truth_df |> dplyr::ungroup()

    expect_equal(as.data.frame(greedy_wblf_df), as.data.frame(ground_truth_df))
})


test_that("sort_to_uncross works with tsp algorithm", {
    set.seed(42)
    # Generate raw data
    raw_df <- data.frame(
        method1 = sample(1:3, 100, TRUE),
        method2 = sample(1:3, 100, TRUE)
    )

    # Aggregate by combination
    data <- as.data.frame(dplyr::count(raw_df, method1, method2, name = "value"))
    cols = c("method1", "method2")
    tsp_df <- sort_to_uncross(data, cols = cols, wt = "value", method = "tsp")

    ground_truth_df_path <- normalizePath(testthat::test_path("ground_truth", "tsp_df.rds"))

    if (!file.exists(ground_truth_df_path)) {
        saveRDS(tsp_df, file = ground_truth_df_path)
    }

    ground_truth_df <- readRDS(ground_truth_df_path)
    ground_truth_df <- ground_truth_df[, c(cols, "value"), drop = FALSE]
    ground_truth_df <- ground_truth_df |> dplyr::ungroup()

    expect_equal(as.data.frame(tsp_df), as.data.frame(ground_truth_df))
})


make_more_tsp_2_layer_df <- function() {
    data <- data.frame(
        tissue = c(
            1, 1, 1,
            2, 2, 2, 2, 2, 2,
            3, 3, 3, 3, 3, 3, 3,
            4, 4,
            5, 5, 5, 5, 5, 5, 5, 5, 5
        ),
        cluster = c(
            6, 6, 7,
            6, 7, 7, 7, 7, 7,
            6, 8, 8, 8, 8, 8, 8,
            8, 8,
            8, 8, 8, 8, 8, 8, 8, 8, 8
        )
    )
    column1 <- "tissue"
    column2 <- "cluster"
    cols <- c(column1, column2)

    list(
        data = data,
        cols = cols
    )
}

make_more_tsp_3_layer_df <- function() {
    data <- data.frame(
        tissue = c(
            "BRAIN", "BRAIN", "BRAIN",
            "STOMACH", "STOMACH", "STOMACH", "STOMACH", "STOMACH", "STOMACH",
            "HEART", "HEART", "HEART", "HEART", "HEART", "HEART", "HEART",
            "T CELL", "T CELL",
            "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL"
        ),
        cluster = c(
            1, 1, 2,
            1, 2, 2, 2, 2, 2,
            1, 3, 3, 3, 3, 3, 3,
            4, 4,
            4, 4, 4, 4, 4, 4, 4, 4, 4
        ),
        sex = c(
            "male", "female", "male",
            "female", "male", "female", "female", "male", "female",
            "male", "female", "male", "female", "male", "female", "male",
            "female", "male",
            "male", "male", "male", "male", "male", "male", "male", "male", "male"
        )
    )
    cols <- c("tissue", "cluster", "sex")

    list(
        data = data,
        cols = cols
    )
}

make_more_tsp_3_layer_df_with_2_identical_layers <- function() {
    data <- data.frame(
        tissue = c(
            "BRAIN", "BRAIN", "BRAIN",
            "STOMACH", "STOMACH", "STOMACH", "STOMACH", "STOMACH", "STOMACH",
            "HEART", "HEART", "HEART", "HEART", "HEART", "HEART", "HEART",
            "T CELL", "T CELL",
            "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL"
        ),
        sex = c(
            "male", "female", "male",
            "female", "male", "female", "female", "male", "female",
            "male", "female", "male", "female", "male", "female", "male",
            "female", "male",
            "female", "male", "female", "male", "female", "male", "female", "female", "male"
        ),
        cluster = c(
            "BRAIN", "BRAIN", "BRAIN",
            "STOMACH", "STOMACH", "STOMACH", "STOMACH", "STOMACH", "STOMACH",
            "HEART", "HEART", "HEART", "HEART", "HEART", "HEART", "HEART",
            "T CELL", "T CELL",
            "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL"
        )
    )
    cols <- c("tissue", "cluster", "sex")

    list(
        data = data,
        cols = cols
    )
}

test_that("Objective calculation, more_tsp.Rmd, 3 layers, unsorted", {
    set.seed(42)
    
    input <- make_more_tsp_3_layer_df()
    data <- input$data
    cols <- input$cols

    clus_df_gather <- prep_for_lodes(data = data, cols = cols)

    clus_df_gather_sorted <- sort_to_uncross(clus_df_gather, cols = cols, wt = "value", method = "none", column_method = "none")

    num <- compute_crossing_objective(clus_df_gather_sorted, cols = cols)$output_objective

    testthat::expect_equal(num, 225)
})

test_that("Objective calculation, more_tsp.Rmd, 3 layers, tsp, optimize_column_order FALSE", {
    set.seed(42)
    
    input <- make_more_tsp_3_layer_df()
    data <- input$data
    cols <- input$cols

    clus_df_gather <- prep_for_lodes(data = data, cols = cols)

    clus_df_gather_sorted <- sort_to_uncross(clus_df_gather, cols = cols, wt = "value", method = "tsp", column_method = "none", weight_scalar = 1)

    num <- compute_crossing_objective(clus_df_gather_sorted, cols = cols)$output_objective

    testthat::expect_equal(num, 57)
})

test_that("Objective calculation, more_tsp.Rmd, 3 layers, tsp, optimize_column_order TRUE", {
    set.seed(42)
    
    input <- make_more_tsp_3_layer_df()
    data <- input$data
    cols <- input$cols

    clus_df_gather <- prep_for_lodes(data = data, cols = cols)

    clus_df_gather_sorted <- sort_to_uncross(clus_df_gather, cols = cols, wt = "value", method = "tsp", column_method = "tsp", options = list(optimize_column_order_per_cycle = TRUE), weight_scalar = 1)

    num <- compute_crossing_objective(clus_df_gather_sorted, cols = cols)$output_objective

    testthat::expect_equal(num, 57)
})


test_that("Objective calculation, more_tsp.Rmd, 3 layers with 2 identical layers, unsorted", {
    input <- make_more_tsp_3_layer_df_with_2_identical_layers()
    data <- input$data
    cols <- input$cols

    clus_df_gather <- prep_for_lodes(data = data, cols = cols)

    clus_df_gather_sorted <- sort_to_uncross(clus_df_gather, cols = cols, wt = "value", method = "none", column_method = "none", weight_scalar = 1)

    num <- compute_crossing_objective(clus_df_gather_sorted, cols = cols)$output_objective

    testthat::expect_equal(num, 74)
})

test_that("Objective calculation, more_tsp.Rmd, 3 layers with 2 identical layers, tsp, optimize_column_order FALSE", {
    set.seed(42)
    
    input <- make_more_tsp_3_layer_df_with_2_identical_layers()
    data <- input$data
    cols <- input$cols

    clus_df_gather <- prep_for_lodes(data = data, cols = cols)

    clus_df_gather_sorted <- sort_to_uncross(clus_df_gather, cols = cols, wt = "value", method = "tsp", column_method = "none", weight_scalar = 1)

    num <- compute_crossing_objective(clus_df_gather_sorted, cols = cols)$output_objective

    testthat::expect_equal(num, 56)
})

test_that("Objective calculation, more_tsp.Rmd, 3 layers with 2 identical layers, tsp, optimize_column_order TRUE", {
    set.seed(42)
    
    input <- make_more_tsp_3_layer_df_with_2_identical_layers()
    data <- input$data
    cols <- input$cols

    clus_df_gather <- prep_for_lodes(data = data, cols = cols)

    clus_df_gather_sorted <- sort_to_uncross(clus_df_gather, cols = cols, wt = "value", method = "tsp", column_method = "tsp", options = list(optimize_column_order_per_cycle = TRUE), weight_scalar = 1)

    num <- compute_crossing_objective(clus_df_gather_sorted, cols = cols)$output_objective

    testthat::expect_equal(num, 56)
})

test_that("get_lode_clusters correctly handles multiple factor columns", {
    set.seed(429144)
    
    data <- data.frame(
        method1 = factor(LETTERS[sample(1:3, 100, TRUE)]),
        method2 = factor(LETTERS[27 - sample(1:3, 100, TRUE)])
    )
    
    # sanity check input
    expect_equal(lapply(data, levels), list(
        method1 = c("A", "B", "C"),
        method2 = c("X", "Y", "Z")
    ))
    
    cluster_mapping <- data |>
        get_lode_clusters(cols = c(method1, method2), method = "left")
    
    # ---- expectations (adjust to actual return type) ----
    
    expect_true(!is.null(cluster_mapping))
    expect_true(length(cluster_mapping) > 0)
    
    # If it's a data.frame
    if (is.data.frame(cluster_mapping)) {
        expect_true(all(c("method1", "method2") %in% names(cluster_mapping)))
    }
    
    # If it's a named list
    if (is.list(cluster_mapping)) {
        expect_true(all(c("method1", "method2") %in% names(cluster_mapping)))
    }
    
    expected <- list(
        method1 = list(
            A = 1L,
            B = 2L,
            C = 3L
        ),
        method2 = list(
            X = 4L,
            Y = 5L,
            Z = 6L
        )
    )
    
    expect_identical(cluster_mapping, expected)
})

test_that("get_lode_clusters correctly handles multiple factor columns with string column names", {
    set.seed(429144)
    
    data <- data.frame(
        method1 = factor(LETTERS[sample(1:3, 100, TRUE)]),
        method2 = factor(LETTERS[27 - sample(1:3, 100, TRUE)])
    )
    
    # sanity check input
    expect_equal(lapply(data, levels), list(
        method1 = c("A", "B", "C"),
        method2 = c("X", "Y", "Z")
    ))
    
    cluster_mapping <- data |>
        get_lode_clusters(cols = c("method1", "method2"), method = "left")
    
    # ---- expectations (adjust to actual return type) ----
    
    expect_true(!is.null(cluster_mapping))
    expect_true(length(cluster_mapping) > 0)
    
    # If it's a data.frame
    if (is.data.frame(cluster_mapping)) {
        expect_true(all(c("method1", "method2") %in% names(cluster_mapping)))
    }
    
    # If it's a named list
    if (is.list(cluster_mapping)) {
        expect_true(all(c("method1", "method2") %in% names(cluster_mapping)))
    }
    
    expected <- list(
        method1 = list(
            A = 1L,
            B = 2L,
            C = 3L
        ),
        method2 = list(
            X = 4L,
            Y = 5L,
            Z = 6L
        )
    )
    
    expect_identical(cluster_mapping, expected)
})

test_that("get_lode_clusters correctly handles multiple factor columns with method advanced", {
    set.seed(429144)
    
    data <- data.frame(
        method1 = factor(LETTERS[sample(1:3, 100, TRUE)]),
        method2 = factor(LETTERS[27 - sample(1:3, 100, TRUE)])
    )
    
    # sanity check input
    expect_equal(lapply(data, levels), list(
        method1 = c("A", "B", "C"),
        method2 = c("X", "Y", "Z")
    ))
    
    cluster_mapping <- data |>
        get_lode_clusters(cols = c("method1", "method2"), method = "advanced", resolution=10)
    
    # ---- expectations (adjust to actual return type) ----
    
    expect_true(!is.null(cluster_mapping))
    expect_true(length(cluster_mapping) > 0)
    
    # If it's a data.frame
    if (is.data.frame(cluster_mapping)) {
        expect_true(all(c("method1", "method2") %in% names(cluster_mapping)))
    }
    
    # If it's a named list
    if (is.list(cluster_mapping)) {
        expect_true(all(c("method1", "method2") %in% names(cluster_mapping)))
    }
    
    expected <- list(
        method1 = list(
            A = 1L,
            B = 1L,
            C = 2L
        ),
        method2 = list(
            X = 2L,
            Y = 1L,
            Z = 1L
        )
    )
    
    expect_identical(cluster_mapping, expected)
})