library(NAIR)


# Generate data -----------------------------------------------------------



set.seed(42)
dat <- simulateToyData()
dat[107, c("CloneCount", "CloneFrequency")] <- NA
dat[11, c("CloneCount", "CloneFrequency")] <- NaN
dat[5, c("CloneCount", "CloneFrequency")] <- -Inf
dat[103, c("CloneCount", "CloneFrequency")] <- Inf
dat[191, c("CloneCount", "CloneFrequency")] <- NA
dat[192, c("CloneCount", "CloneFrequency")] <- NaN
dat[193, c("CloneCount", "CloneFrequency")] <- -Inf
dat[198, c("CloneCount", "CloneFrequency")] <- Inf
suppressWarnings(
  net <- buildRepSeqNetwork(dat, "CloneSeq", plots = FALSE)
)
net <- addNodeStats(net)
net <- addClusterStats(net, count_col = "CloneCount",
                       cluster_id_name = "cluster_greedy"
)
net <- addClusterMembership(net = net, cluster_fun = "leiden",
                            cluster_id_name = "cluster_leiden"
)
suppressWarnings(
  net <- addPlots(net, print_plots = FALSE,
                  color_nodes_by = c("cluster_greedy", "cluster_leiden"),
                  color_scheme = "Viridis"
  )
)
suppressWarnings(
  net <- addPlots(net, print_plots = FALSE,
                  color_nodes_by = c("transitivity", "CloneCount"),
                  color_scheme = c("plasma-1", "default"),
                  size_nodes_by = "CloneCount",
                  node_size_limits = c(0.5, 2),
                  color_title = c("Transitivity", "Clone Count"),
                  size_title = "Clone Count"
  )
)
suppressWarnings(
  net <- addPlots(net, print_plots = FALSE,
                  size_nodes_by = 2
  )
)
suppressWarnings(
  net <- addPlots(net, print_plots = FALSE,
                  color_nodes_by = c("eigen_centrality", "authority_score"),
                  size_nodes_by = "eigen_centrality",
                  color_title = c("Eigen Centrality", "auto"),
                  plot_title = NULL,
                  plot_subtitle = NULL
  )
)
suppressWarnings(
  net <- addPlots(net, print_plots = FALSE,
                  color_nodes_by = "coreness",
                  size_nodes_by = "coreness",
                  color_legend = FALSE
  )
)
suppressWarnings(
  net <- addPlots(net, print_plots = FALSE,
                  color_nodes_by = "page_rank",
                  size_nodes_by = "page_rank",
                  color_title = NULL
  )
)
suppressWarnings(
  net2 <- buildRepSeqNetwork(dat, "CloneSeq", print_plots = FALSE,
                             drop_isolated_nodes = FALSE,
                             cluster_stats = TRUE,
                             count_col = "CloneCount"
  )
)
suppressWarnings(
  net3 <- buildRepSeqNetwork(dat, "CloneSeq", print_plots = FALSE,
                             cluster_stats = TRUE
  )
)
suppressWarnings(
  net4 <- buildRepSeqNetwork(dat, "CloneSeq", print_plots = FALSE,
                             node_stats = TRUE
  )
)
suppressWarnings(
  net5 <- buildRepSeqNetwork(dat, "CloneSeq", print_plots = FALSE)
)
dat2 <- simulateToyData(chains = 2)
suppressWarnings(
  sc_net <- buildRepSeqNetwork(
    dat2,
    seq_col = c("AlphaSeq", "BetaSeq"),
    count_col = "UMIs",
    node_stats = TRUE,
    stats_to_include = "all",
    cluster_stats = TRUE,
    color_nodes_by = "SampleID",
    size_nodes_by = "UMIs",
    node_size_limits = c(0.5, 3),
    print_plots = FALSE
  )
)
samples <- simulateToyData(sample_size = 10,
                           output_dir = tempdir()
)
sample_1 <- subset(samples, SampleID == "Sample1")
sample_2 <- subset(samples, SampleID == "Sample2")
utils::write.csv(sample_1, file.path(tempdir(), "Sample1.csv"),
                 row.names = FALSE
)
utils::write.csv(sample_2, file.path(tempdir(), "Sample2.csv"),
                 row.names = FALSE
)
utils::write.csv2(sample_1, file.path(tempdir(), "Sample1b.csv"),
                  row.names = FALSE
)
utils::write.csv2(sample_2, file.path(tempdir(), "Sample2b.csv"),
                  row.names = FALSE
)
utils::write.csv(sample_1, file.path(tempdir(), "Sample1c.csv"),
                 row.names = TRUE
)
utils::write.csv(sample_2, file.path(tempdir(), "Sample2c.csv"),
                 row.names = TRUE
)
utils::write.table(sample_1, file.path(tempdir(), "Sample1.txt"),
                   row.names = TRUE
)
utils::write.table(sample_2, file.path(tempdir(), "Sample2.txt"),
                   row.names = TRUE
)
utils::write.table(sample_1, file.path(tempdir(), "Sample1.tsv"), sep = "\t",
                   row.names = TRUE
)
utils::write.table(sample_2, file.path(tempdir(), "Sample2.tsv"), sep = "\t",
                   row.names = TRUE
)
utils::write.table(sample_1, file.path(tempdir(), "Sample1"),
                   sep = "\t", row.names = TRUE, dec = ",", na = "NA!"
)
utils::write.table(sample_2, file.path(tempdir(), "Sample2"),
                   sep = "\t", row.names = TRUE, dec = ",", na = "NA!"
)
utils::write.table(sample_1, file.path(tempdir(), "Sample1b"),
                   sep = "@", row.names = TRUE, col.names = FALSE
)
utils::write.table(sample_2, file.path(tempdir(), "Sample2b"),
                   sep = "@", row.names = TRUE, col.names = FALSE
)
save(sample_1, file = file.path(tempdir(), "Sample1.rda"))
save(sample_2, file = file.path(tempdir(), "Sample2.rda"))
x <- sample_1
save(x, file = file.path(tempdir(), "Sample1b.rda"))
x <- sample_2
save(x, file = file.path(tempdir(), "Sample2b.rda"))
samples_filtered <- filterInputData(samples,
                                    seq_col = "CloneSeq",
                                    min_seq_length = 13,
                                    drop_matches = "GGG",
                                    subset_cols = c("CloneSeq", "SampleID")
)
samples_filtered$SampleID[samples_filtered$SampleID == "Sample1"] <- "id01"
samples_filtered$SampleID[samples_filtered$SampleID == "Sample2"] <- "id02"

# Distance Functions ------------------------------------------------------



test_that("hamDistBounded works as expected", {

  expect_equal(
    hamDistBounded("foo", "foo", 3),
    0
  )
  expect_equal(
    hamDistBounded("foo", "fee", 3),
    2
  )
  expect_equal(
    hamDistBounded("foo", "fie", 3),
    2
  )
  expect_equal(
    hamDistBounded("foo", "foe", 3),
    1
  )
  expect_equal(
    hamDistBounded("foo", "fum", 3),
    2
  )
  expect_equal(
    hamDistBounded("foo", "bar", 3),
    3
  )
  expect_equal(
    hamDistBounded("foo", "fee", 1),
    -1
  )
  expect_equal(
    hamDistBounded("foo", "fie", 1),
    -1
  )
  expect_equal(
    hamDistBounded("foo", "foe", 1),
    1
  )
  expect_equal(
    hamDistBounded("foo", "fum", 1),
    -1
  )
  expect_equal(
    hamDistBounded("foo", "bar", 1),
    -1
  )
  expect_equal(
    hamDistBounded("foo", "fubar", 10),
    4
  )
  expect_equal(
    hamDistBounded("foo", "foobar", 10),
    3
  )
  expect_equal(
    hamDistBounded("foo", "barfoo", 10),
    6
  )
  expect_equal(
    hamDistBounded("1234567", "1.23457", 7),
    5
  )
  expect_equal(
    hamDistBounded("1234567", "1.23457", 3),
    -1
  )
  expect_equal(
    hamDistBounded("1234567890", "123456789", 10),
    1
  )
  expect_equal(
    hamDistBounded("1234567890", "234567890", 10),
    10
  )
  expect_equal(
    hamDistBounded("foobar", "fubar", 6),
    5
  )
  cloneSeq <- c("A", "AA", "AB", "BB")
  cloneSeqB <- c("A", "AA", "ACCC", "AB", "BB")
  distMat_ham <-
    sapply(cloneSeq,
           function(x) {
             sapply(cloneSeq,
                    function(y) { hamDistBounded(x, y, k = 5) }) })
  distMat_ham_truth <- matrix(1, 4, 4)
  diag(distMat_ham_truth) <- 0
  distMat_ham_truth[c(1, 2), 4] <- 2
  distMat_ham_truth[4, c(1, 2)] <- 2
  expect_equal(distMat_ham, distMat_ham_truth, ignore_attr = TRUE)
  distMatB_ham <-
    sapply(cloneSeqB,
           function(x) {
             sapply(cloneSeqB,
                    function(y) { hamDistBounded(x, y, k = 5) }) })
  distMatB_ham_truth <- matrix(1, 5, 5)
  diag(distMatB_ham_truth) <- 0
  distMatB_ham_truth[c(1, 2), 5] <- 2
  distMatB_ham_truth[5, c(1, 2)] <- 2
  distMatB_ham_truth[3, c(1, 2, 4)] <- 3
  distMatB_ham_truth[c(1, 2, 4), 3] <- 3
  distMatB_ham_truth[5, 3] <- distMatB_ham_truth[3, 5] <- 4
  expect_equal(distMatB_ham, distMatB_ham_truth, ignore_attr = TRUE)


})

test_that("levDistBounded works as expected", {

  expect_equal(
    levDistBounded("foo", "foo", -5),
    -1
  )
  expect_equal(
    levDistBounded("foo", "foo", 0),
    0
  )
  expect_equal(
    levDistBounded("foo", "bar", 0),
    -1
  )
  expect_equal(
    levDistBounded("foo", "foobar", 1),
    -1
  )
  expect_equal(
    levDistBounded("", "bar", 2),
    -1
  )
  expect_equal(
    levDistBounded("", "bar", 10),
    3
  )
  expect_equal(
    levDistBounded("foo", "", 2),
    -1
  )
  expect_equal(
    levDistBounded("foo", "", 10),
    3
  )
  expect_equal(
    levDistBounded("foobar", "foo__bar", 10),
    2
  )
  expect_equal(
    levDistBounded("foo__bar", "foobar", 10),
    2
  )
  expect_equal(
    levDistBounded("foo", "bar", 3),
    3
  )
  expect_equal(
    levDistBounded("1234567", "1.23457", 7),
    2
  )
  expect_equal(
    levDistBounded("1234567", "1.23457", 3),
    2
  )
  expect_equal(
    levDistBounded("1234567890", "123456789", 10),
    1
  )
  expect_equal(
    levDistBounded("1234567890", "234567890", 10),
    1
  )
  expect_equal(
    levDistBounded("foobar", "fubar", 6),
    2
  )
  cloneSeq <- c("A", "AA", "AB", "BB")
  cloneSeqB <- c("A", "AA", "ACCC", "AB", "BB")
  distMat_lev <-
    sapply(cloneSeq,
           function(x) {
             sapply(cloneSeq,
                    function(y) { levDistBounded(x, y, k = 5) }) })
  distMat_lev_truth <- matrix(1, 4, 4)
  diag(distMat_lev_truth) <- 0
  distMat_lev_truth[c(1, 2), 4] <- 2
  distMat_lev_truth[4, c(1, 2)] <- 2
  expect_equal(distMat_lev, distMat_lev_truth, ignore_attr = TRUE)

  # lev distance matrix for cloneSeqB
  distMatB_lev <-
    sapply(cloneSeqB,
           function(x) {
             sapply(cloneSeqB,
                    function(y) { levDistBounded(x, y, k = 5) }) })
  distMatB_lev_truth <- matrix(1, 5, 5)
  diag(distMatB_lev_truth) <- 0
  distMatB_lev_truth[c(1, 2), 5] <- 2
  distMatB_lev_truth[5, c(1, 2)] <- 2
  distMatB_lev_truth[3, c(1, 2, 4)] <- 3
  distMatB_lev_truth[c(1, 2, 4), 3] <- 3
  distMatB_lev_truth[5, 3] <- distMatB_lev_truth[3, 5] <- 4
  expect_equal(distMatB_lev, distMatB_lev_truth, ignore_attr = TRUE)
  expect_equal(levDistBounded("AA", "CACC", 10), 3)
  expect_equal(levDistBounded("CACC", "AA", 10), 3)
  expect_equal(levDistBounded("AA", "ACCC", 10), 3)
  expect_equal(levDistBounded("ACCC", "AA", 10), 3)
  expect_equal(levDistBounded("AA", "CCCA", 10), 3)
  expect_equal(levDistBounded("CCCA", "AA", 10), 3)
  expect_equal(levDistBounded("A", "CCC", 10), 3)
  expect_equal(levDistBounded("CCC", "A", 10), 3)
  expect_equal(levDistBounded("A", "CC", 10), 2)
  expect_equal(levDistBounded("CC", "A", 10), 2)
  expect_equal(10,
               levDistBounded(
                 "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
                 "ABC*****IJKLMNOPQRST*****Z", 10
               )
  )
  expect_equal(-1,
               levDistBounded(
                 "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
                 "ABC*****IJKLMNOPQRST*****Z", 9
               )
  )
  expect_equal(levDistBounded("", "A", 10), 1)
  expect_equal(levDistBounded("", "", 10), 0)
  expect_equal(levDistBounded("A", "", 10), 1)
  expect_equal(levDistBounded("A", "A", 10), 0)
  expect_equal(levDistBounded("AAAA", "AABA", 10), 1)
  expect_equal(levDistBounded("AAAA", "AABBA", 1), -1)
  expect_equal(levDistBounded("AAAA", "AABBA", 2), 2)
  expect_equal(levDistBounded("AAAA", "", 10), 4)
  expect_equal(levDistBounded("", "AAAA", 10), 4)
})



# Adjacency Matrix --------------------------------------------------------




test_that("generateAdjacencyMatrix behaves as expected", {
  methodCutoffLim <- function(method, cutoff) {
    if ((method == "sort" & cutoff != 1) ||
        (method == "pattern" & cutoff > 2)) {
      return("default")
    } else {
      return(method)
    }
  }
  for (method in c("default", "pattern", "sort")) {
    cloneSeq <- c("A", "AA", "AB", "BB")
    cloneSeqB <- c("A", "AA", "ACCC", "AB", "BB")
    expect_warning(
      adjMat_k0 <- generateAdjacencyMatrix(
        cloneSeq, dist_cutoff = 0, dist_type = "levenshtein",
        method = methodCutoffLim(method, 0)
      )
    )
    adjMat_k1 <- generateAdjacencyMatrix(
      cloneSeq, dist_cutoff = 1, dist_type = "levenshtein",
      method = methodCutoffLim(method, 1)
    )
    adjMat_k2 <- generateAdjacencyMatrix(
      cloneSeq, dist_cutoff = 2, dist_type = "levenshtein",
      method = methodCutoffLim(method, 2)
    )
    
    expect_warning(
      adjMatB_k0 <- generateAdjacencyMatrix(
        cloneSeqB, dist_cutoff = 0, dist_type = "levenshtein",
        method = methodCutoffLim(method, 0)
      )
    )
    adjMatB_k1 <- generateAdjacencyMatrix(
      cloneSeqB, dist_cutoff = 1, dist_type = "levenshtein",
      method = methodCutoffLim(method, 1)
    )
    adjMatB_k2 <- generateAdjacencyMatrix(
      cloneSeqB, dist_cutoff = 2, dist_type = "levenshtein",
      method = methodCutoffLim(method, 2)
    )
    adjMatB_k3 <- generateAdjacencyMatrix(
      cloneSeqB, dist_cutoff = 3, dist_type = "levenshtein",
      method = methodCutoffLim(method, 3)
    )
    adjMatB_k4 <- generateAdjacencyMatrix(
      cloneSeqB, dist_cutoff = 4, dist_type = "levenshtein",
      method = methodCutoffLim(method, 4)
    )
    adjMat_k1_truth <- adjMat_k2_truth <- adjMatB_k2_truth <-
      matrix(1, nrow = 4, ncol = 4)
    adjMat_k1_truth[c(1, 2), 4] <- 0
    adjMat_k1_truth[4, c(1, 2)] <- 0
    adjMatB_k2_truth <- adjMat_k2_truth
    adjMatB_k1_truth <- adjMat_k1_truth
    adjMatB_k3_truth <- adjMatB_k4_truth <- matrix(1, nrow = 5, ncol = 5)
    adjMatB_k3_truth[3, 5] <- 0
    adjMatB_k3_truth[5, 3] <- 0

    expect_equal(0, length(adjMat_k0))
    expect_equal(
      as.matrix(adjMat_k1), adjMat_k1_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      1:4, as.numeric(dimnames(adjMat_k1)[[1]])
    )
    expect_equal(
      cloneSeq[1:4], dimnames(adjMat_k1)[[2]]
    )
    expect_equal(
      as.matrix(adjMat_k2), adjMat_k2_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      1:4, as.numeric(dimnames(adjMat_k2)[[1]])
    )
    expect_equal(
      cloneSeq[1:4], dimnames(adjMat_k2)[[2]]
    )

    expect_equal(0, length(adjMatB_k0))
    expect_equal(
      as.matrix(adjMatB_k1), adjMatB_k1_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      c(1, 2, 4, 5), as.numeric(dimnames(adjMatB_k1)[[1]])
    )
    expect_equal(
      cloneSeqB[c(1, 2, 4, 5)], dimnames(adjMatB_k1)[[2]]
    )
    expect_equal(
      as.matrix(adjMatB_k2), adjMatB_k2_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      c(1, 2, 4, 5), as.numeric(dimnames(adjMatB_k2)[[1]])
    )
    expect_equal(
      cloneSeqB[c(1, 2, 4, 5)], dimnames(adjMatB_k2)[[2]]
    )
    expect_equal(
      as.matrix(adjMatB_k3), adjMatB_k3_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      1:5, as.numeric(dimnames(adjMatB_k3)[[1]])
    )
    expect_equal(
      cloneSeqB[1:5], dimnames(adjMatB_k3)[[2]]
    )
    expect_equal(
      as.matrix(adjMatB_k4), adjMatB_k4_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      1:5, as.numeric(dimnames(adjMatB_k4)[[1]])
    )
    expect_equal(
      cloneSeqB[1:5], dimnames(adjMatB_k4)[[2]]
    )

    expect_warning(
      adjMat_k0 <- generateAdjacencyMatrix(
        cloneSeq, dist_cutoff = 0, dist_type = "hamming",
        method = methodCutoffLim(method, 0)
      )
    )
    adjMat_k1 <- generateAdjacencyMatrix(
      cloneSeq, dist_cutoff = 1, dist_type = "hamming",
      method = methodCutoffLim(method, 1)
    )
    adjMat_k2 <- generateAdjacencyMatrix(
      cloneSeq, dist_cutoff = 2, dist_type = "hamming",
      method = methodCutoffLim(method, 2)
    )

    expect_warning(
      adjMatB_k0 <- generateAdjacencyMatrix(
        cloneSeqB, dist_cutoff = 0, dist_type = "hamming",
        method = methodCutoffLim(method, 0)
      )
    )
    adjMatB_k1 <- generateAdjacencyMatrix(
      cloneSeqB, dist_cutoff = 1, dist_type = "hamming",
      method = methodCutoffLim(method, 1)
    )
    adjMatB_k2 <- generateAdjacencyMatrix(
      cloneSeqB, dist_cutoff = 2, dist_type = "hamming",
      method = methodCutoffLim(method, 2)
    )
    adjMatB_k3 <- generateAdjacencyMatrix(
      cloneSeqB, dist_cutoff = 3, dist_type = "hamming",
      method = methodCutoffLim(method, 3)
    )
    adjMatB_k4 <- generateAdjacencyMatrix(
      cloneSeqB, dist_cutoff = 4, dist_type = "hamming",
      method = methodCutoffLim(method, 4)
    )


    expect_equal(0, length(adjMat_k0))
    expect_equal(
      as.matrix(adjMat_k1), adjMat_k1_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      1:4, as.numeric(dimnames(adjMat_k1)[[1]])
    )
    expect_equal(
      cloneSeq[1:4], dimnames(adjMat_k1)[[2]]
    )
    expect_equal(
      as.matrix(adjMat_k2), adjMat_k2_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      1:4, as.numeric(dimnames(adjMat_k2)[[1]])
    )
    expect_equal(
      cloneSeq[1:4], dimnames(adjMat_k2)[[2]]
    )

    expect_equal(0, length(adjMatB_k0))
    expect_equal(
      as.matrix(adjMatB_k1), adjMatB_k1_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      c(1, 2, 4, 5), as.numeric(dimnames(adjMatB_k1)[[1]])
    )
    expect_equal(
      cloneSeqB[c(1, 2, 4, 5)], dimnames(adjMatB_k1)[[2]]
    )
    expect_equal(
      as.matrix(adjMatB_k2), adjMatB_k2_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      c(1, 2, 4, 5), as.numeric(dimnames(adjMatB_k2)[[1]])
    )
    expect_equal(
      cloneSeqB[c(1, 2, 4, 5)], dimnames(adjMatB_k2)[[2]]
    )
    expect_equal(
      as.matrix(adjMatB_k3), adjMatB_k3_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      1:5, as.numeric(dimnames(adjMatB_k3)[[1]])
    )
    expect_equal(
      cloneSeqB[1:5], dimnames(adjMatB_k3)[[2]]
    )
    expect_equal(
      as.matrix(adjMatB_k4), adjMatB_k4_truth,
      ignore_attr = TRUE
    )
    expect_equal(
      1:5, as.numeric(dimnames(adjMatB_k4)[[1]])
    )
    expect_equal(
      cloneSeqB[1:5], dimnames(adjMatB_k4)[[2]]
    )

    mat <- matrix(1, nrow = 4, ncol = 4)
    mat[c(1, 2), 4] <- 0
    mat[4, c(1, 2)] <- 0
    rownames(mat) <- c(1, 2, 3, 5)
    colnames(mat) <- c("fee", "fie", "foe", "foo")
    mat <- Matrix::Matrix(mat, sparse = TRUE)
    mat2 <- generateAdjacencyMatrix(
      c("fee", "fie", "foe", "fum", "foo"),
      method = methodCutoffLim(method, 1)
    )
    expect_s4_class(mat2, "sparseMatrix")
    expect_equal(mat, mat2)
    expect_equal(colnames(mat), colnames(mat2))
    expect_equal(rownames(mat), rownames(mat2))

    expect_warning(
      mat <- generateAdjacencyMatrix(
        c("foo", "foobar", "fubar", "bar"),
        method = methodCutoffLim(method, 1)
      )
    )
    expect_s4_class(mat, "sparseMatrix")
    expect_equal(dim(mat), c(0, 0))
    expect_warning(
      mat <- generateAdjacencyMatrix(
        c("foo", "foobar", "fubar", "bar"), dist_cutoff = 2,
        method = methodCutoffLim(method, 2)
      )
    )
    expect_s4_class(mat, "sparseMatrix")
    expect_equal(dim(mat), c(0, 0))

    mat <- as(diag(4), "dgCMatrix")
    mat2 <- generateAdjacencyMatrix(
      c("foo", "foobar", "fubar", "bar"),
      drop_isolated_nodes = FALSE,
      method = methodCutoffLim(method, 1)
    )
    expect_s4_class(mat2, "sparseMatrix")
    expect_equal(mat, mat2)

    mat <- matrix(1, nrow = 3, ncol = 3)
    mat[1, 3] <- mat[3, 1] <- 0
    rownames(mat) <- c(2, 3, 4)
    colnames(mat) <- c("foobar", "fubar", "bar")
    mat <- Matrix::Matrix(mat, sparse = TRUE)
    mat2 <- generateAdjacencyMatrix(
      c("foo", "foobar", "fubar", "bar"),
      dist_type = "levenshtein",
      dist_cutoff = 2,
      method = methodCutoffLim(method, 2)
    )
    expect_s4_class(mat2, "sparseMatrix")
    expect_equal(mat, mat2)
    expect_equal(colnames(mat), colnames(mat2))
    expect_equal(rownames(mat), rownames(mat2))

    mat <- matrix(1, nrow = 3, ncol = 3)
    mat[2, 3] <- mat[3, 2] <- 0
    rownames(mat) <- c(1, 2, 4)
    colnames(mat) <- c("foo", "foobar", "bar")
    mat <- Matrix::Matrix(mat, sparse = TRUE)
    mat2 <- generateAdjacencyMatrix(
      c("foo", "foobar", "fubar", "bar"),
      dist_cutoff = 3,
      method = methodCutoffLim(method, 3)
    )
    expect_s4_class(mat2, "sparseMatrix")
    expect_equal(mat, mat2)
    expect_equal(colnames(mat), colnames(mat2))
    expect_equal(rownames(mat), rownames(mat2))

    expect_false(file.exists(file.path(tempdir(), "col_ids.txt")))
  }
})



# Network Building --------------------------------------------------------

test_that("generateNetworkObjects works", {
  expect_true(.isBaseNetworkOutput(generateNetworkObjects(dat, "CloneSeq")))
})

test_that("generateNetworkGraph works", {
  foo <- generateNetworkGraph(net$adjacency_matrix)
  expect_true(.isIgraph(foo))
  expect_true(.doesIgraphMatchData(foo, net$node_data))
})



# Network Analysis --------------------------------------------------------

test_that("addNodeStats works", {
  expect_true(all(names(chooseNodeStats() %in% names(net$node_data))))
  expect_true(.isPosIntegerVector(net$node_data$degree))
  expect_true(.isNumericVector(net$node_data$transitivity))
  expect_true(.hasNAs(net$node_data$transitivity))
  expect_true(.isNumericVector(net$node_data$eigen_centrality))
  expect_true(.isNumericVector(net$node_data$authority_score))
})

test_that("addClusterMembership works", {
  expect_message(
    addClusterMembership(net = net, cluster_id_name = "cluster_greedy"),
    "already contains a variable named"
  )
  expect_true(
    .isPosIntegerVector(net$node_data$cluster_greedy, factor_ok = TRUE)
  )
  expect_true(
    .isPosIntegerVector(net$node_data$cluster_leiden, factor_ok = TRUE)
  )
  expect_equal(
    net$details$cluster_id_variable,
    c("fast_greedy" = "cluster_greedy", "leiden" = "cluster_leiden")
  )
})

test_that("addClusterStats works", {
  expect_true(.hasClusterData(net))
  expect_equal(net$details$cluster_data_goes_with, "cluster_greedy")
  expect_equal(net$details$count_col_for_cluster_data, "CloneCount")
  expect_true(all(
    c("cluster_id", "eigen_centrality_eigenvalue") %in% names(net$cluster_data)
  ))
  expect_true(
    .isPosIntegerVector(net$cluster_data$cluster_id, factor_ok = TRUE)
  )
  expect_true(
    .isPosIntegerVector(net$cluster_data$node_count)
  )
  expect_true(
    .isCharVector(net$cluster_data$seq_w_max_count)
  )
  expect_true(
    .isNumericVector(net$cluster_data$diameter_length)
  )
})

test_that("addClusterStats works with NA/Inf values in counts column", {
  expect_equal(net$cluster_data$agg_count[[3]], -Inf)
  expect_equal(net$cluster_data$max_count[[3]], 4422)
  expect_equal(net$cluster_data$agg_count[[6]], Inf)
  expect_equal(net$cluster_data$max_count[[6]], Inf)
  expect_true(.isBaseNetworkOutput(net2))
  expect_true(.hasClusterData(net2))
  expect_equal(net2$cluster_data$agg_count[[3]], -Inf)
  expect_equal(net2$cluster_data$max_count[[3]], 4422)
  expect_equal(net2$cluster_data$agg_count[[6]], Inf)
  expect_equal(net2$cluster_data$max_count[[6]], Inf)
  expect_equal(net2$cluster_data$agg_count[[95]], as.double(NA))
  expect_equal(net2$cluster_data$max_count[[95]], as.double(NA))
  expect_equal(net2$cluster_data$agg_count[[96]], as.double(NA))
  expect_equal(net2$cluster_data$max_count[[96]], as.double(NA))
  expect_equal(net2$cluster_data$agg_count[[97]], -Inf)
  expect_equal(net2$cluster_data$max_count[[97]], -Inf)
  expect_equal(net2$cluster_data$agg_count[[98]], Inf)
  expect_equal(net2$cluster_data$max_count[[98]], Inf)
})

# Loading -----------------------------------------------------------------

test_that("loadDataFromFileList works", {

  expect_error(
    loadDataFromFileList(
      c(tempfile(), tempfile(), tempfile(), tempfile(), tempfile()),
      input_type = "rds"
    ),
    "specifies one or more nonexistent files"
  )

  # RDS files
  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2, ".rds")),
      input_type = "rds"
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  # RDA files, same symbol
  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2, "b.rda")),
      input_type = "rda",
      data_symbols = "x"
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  # RDA files, different symbols
  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2, ".rda")),
      input_type = "rda",
      data_symbols = c("sample_1", "sample_2")
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  # csv files
  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2, ".csv")),
      input_type = "csv"
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  # semicolon-delimited with commas as decimals
  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2, "b.csv")),
      input_type = "csv2"
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  # csv files with row names in first column
  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2, "c.csv")),
      input_type = "csv",
      read.args = list(row.names = 1)
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  # space-separated files
  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2, ".txt")),
      input_type = "table",
      header = TRUE
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  # tab-separated files
  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2, ".tsv")),
      input_type = "tsv",
      header = TRUE
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  # files requiring custom arguments to read.table()
  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2)),
      input_type = "table",
      read.args = list(
        header = TRUE,
        sep = "\t",
        dec = ",",
        na.strings = "NA!",
        row.names = 1
      )
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  # check that read.args values of header/sep override argument values
  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2)),
      input_type = "table",
      header = FALSE, sep = "",
      read.args = list(
        header = TRUE,
        sep = "\t",
        dec = ",",
        na.strings = "NA!",
        row.names = 1
      )
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  all_samples <-
    loadDataFromFileList(
      file.path(tempdir(), paste0("Sample", 1:2, "b")),
      input_type = "table",
      sep = "@",
      read.args = list(
        row.names = 1,
        col.names = c("rownames",
                      "CloneSeq", "CloneFrequency",
                      "CloneCount", "SampleID"
        )
      )
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)


})

test_that("combineSamples works", {

  # No filters
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, ".rds")),
      input_type = "rds",
    )
  expect_equal(all_samples, samples, ignore_attr = TRUE)

  # RDS files
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, ".rds")),
      input_type = "rds",
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02"),
    )
  expect_equal(all_samples, samples_filtered, ignore_attr = TRUE)

  # check subject and group IDs
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, ".rds")),
      input_type = "rds",
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02"),
      subject_ids = c("s01", "s02"),
      group_ids = c("g1", "g2")
    )
  expect_equal(names(all_samples),
               c(names(samples_filtered), "SubjectID", "GroupID"))
  expect_equal(unique(all_samples$SubjectID), c("s01", "s02"))
  expect_equal(unique(all_samples$GroupID), c("g1", "g2"))

  # RDA files, same symbol
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, "b.rda")),
      input_type = "rda",
      data_symbols = "x",
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02")
    )
  expect_equal(all_samples, samples_filtered, ignore_attr = TRUE)

  # RDA files, different symbols
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, ".rda")),
      input_type = "rda",
      data_symbols = c("sample_1", "sample_2"),
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02")
    )
  expect_equal(all_samples, samples_filtered, ignore_attr = TRUE)

  # csv files
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, ".csv")),
      input_type = "csv",
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02")
    )
  expect_equal(all_samples, samples_filtered, ignore_attr = TRUE)

  # semicolon-delimited with commas as decimals
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, "b.csv")),
      input_type = "csv2",
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02")
    )
  expect_equal(all_samples, samples_filtered, ignore_attr = TRUE)

  # csv files with row names
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, "c.csv")),
      input_type = "csv",
      read.args = list(row.names = 1),
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02")
    )
  expect_equal(all_samples, samples_filtered, ignore_attr = TRUE)

  # space-separated files
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, ".txt")),
      input_type = "table",
      header = TRUE,
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02")
    )
  expect_equal(all_samples, samples_filtered, ignore_attr = TRUE)

  # tab-separated files
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, ".tsv")),
      input_type = "tsv",
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02")
    )
  expect_equal(all_samples, samples_filtered, ignore_attr = TRUE)
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2, ".tsv")),
      input_type = "table",
      header = TRUE,
      sep = "\t",
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02")
    )
  expect_equal(all_samples, samples_filtered, ignore_attr = TRUE)

  # custom files
  all_samples <-
    combineSamples(
      file.path(tempdir(), paste0("Sample", 1:2)),
      input_type = "table",
      read.args = list(
        header = TRUE, sep = "\t", dec = ",", na.strings = "NA!", row.names = 1
      ),
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGG",
      subset_cols = "CloneSeq",
      sample_ids = c("id01", "id02")
    )
  expect_equal(all_samples, samples_filtered, ignore_attr = TRUE)

})

# Saving ------------------------------------------------------------------

test_that("saveNetwork works", {

  # Individual
  suppressWarnings(saveNetwork(net3, tempdir(), "individual"))
  ndat <- read.csv(file.path(tempdir(), "MyRepSeqNetwork_NodeMetadata.csv"),
                   row.names = 1
  )
  foo <- all.equal(net3$node_data, ndat)
  expect_true(length(foo) == 1)
  expect_match(foo, "is not a factor")
  expect_equal(rownames(net3$node_data), rownames(ndat))
  cdat <- read.csv(file.path(tempdir(), "MyRepSeqNetwork_ClusterMetadata.csv"))
  foo <- all.equal(net3$cluster_data, cdat)
  expect_true(length(foo) == 1)
  expect_match(foo, "is not a factor")
  expect_equal(rownames(net3$cluster_data), rownames(cdat))
  expect_true(
    file.exists(file.path(tempdir(), "MyRepSeqNetwork_Details.txt"))
  )
  expect_true(
    file.exists(file.path(tempdir(), "MyRepSeqNetwork_EdgeList.txt"))
  )
  expect_true(
    file.exists(file.path(tempdir(), "MyRepSeqNetwork_AdjacencyMatrix.mtx"))
  )
  expect_true(
    file.exists(file.path(tempdir(), "MyRepSeqNetwork_Plots.rda"))
  )
  expect_true(
    file.exists(file.path(tempdir(), "MyRepSeqNetwork.pdf"))
  )
  expect_true(
    file.exists(file.path(tempdir(), "MyRepSeqNetwork_GraphLayout.txt"))
  )

  # rds
  suppressWarnings(saveNetwork(net3, tempdir()))
  net3b <- readRDS(file.path(tempdir(), "MyRepSeqNetwork.rds"))
  expect_equal(sapply(net3, typeof), sapply(net3b, typeof))
  expect_true(.isBaseNetworkOutput(net3b))
  expect_true(.hasClusterData(net3b))
  expect_true(.hasPlots(net3b))

  # rda
  suppressWarnings(saveNetwork(net3, tempdir(), "rda"))
  net3_copy <- net3
  load(file.path(tempdir(), "MyRepSeqNetwork.rda"))
  net3b <- net
  net3 <- net3_copy
  expect_equal(sapply(net3, typeof), sapply(net3b, typeof))
  expect_true(.isBaseNetworkOutput(net3b))
  expect_true(.hasClusterData(net3b))
  expect_true(.hasPlots(net3b))
})


test_that("saveNetworkPlots works", {
  suppressWarnings(
    expect_message(
      saveNetworkPlots(net3$plots, outfile = file.path(tempdir(), "plots.pdf"),
                       outfile_layout = file.path(tempdir(), "layout.txt"),
                       verbose = TRUE
      ),
      "saved to file"
    )
  )
  expect_true(file.exists(file.path(tempdir(), "plots.pdf")))
  expect_true(file.exists(file.path(tempdir(), "layout.txt")))
  expect_equal(
    matrix(scan(file.path(tempdir(), "layout.txt"), quiet = TRUE), ncol = 2),
    net3$plots$graph_layout, tolerance = 1e-04
  )

})



# Filtering and Subsetting ------------------------------------------------

test_that("filterInputData works", {
  filtered_data <-
    filterInputData(
      dat,
      seq_col = "CloneSeq",
      min_seq_length = 13,
      drop_matches = "GGGG",
      subset_cols =
        c("CloneSeq", "CloneFrequency", "SampleID")
    )
  expect_equal(nrow(filtered_data), 105)
})

test_that("getNeighborhood works", {
  set.seed(42)
  toy_data <- simulateToyData(sample_size = 500)

  # Get neighborhood around first clone sequence
  nbd <-
    getNeighborhood(
      toy_data,
      seq_col = "CloneSeq",
      target_seq = "GGGGGGGAATTGG"
    )
  expect_true(nrow(nbd) == 67)
  expect_true(ncol(nbd) == 4)
  nbd_t <- table(nbd$CloneSeq)
  expect_true(length(nbd_t) == 22)
  expect_true(max(nbd_t) == 9)
  expect_true(names(nbd_t)[which.max(nbd_t)] == "GGGGGGGAATTGG")
})

test_that("aggregateIdenticalClones works", {
  my_data <- data.frame(
    clone_seq = c("ATCG", rep("ACAC", 2), rep("GGGG", 4)),
    clone_count = rep(1, 7),
    clone_freq = rep(1/7, 7),
    time_point = c("t_0", rep(c("t_0", "t_1"), 3)),
    subject_id = c(rep(1, 5), rep(2, 2))
  )

  data_agg <-
    aggregateIdenticalClones(
      my_data,
      "clone_seq",
      "clone_count",
      "clone_freq",
    )
  expect_equal(dim(data_agg), c(3, 4))

  # group clones by time point
  data_agg_time <-
    aggregateIdenticalClones(
      my_data,
      "clone_seq",
      "clone_count",
      "clone_freq",
      grouping_cols = "time_point"
    )
  expect_equal(dim(data_agg_time), c(5, 5))

  # group clones by subject ID
  data_agg_subject <-
    aggregateIdenticalClones(
      my_data,
      "clone_seq",
      "clone_count",
      "clone_freq",
      grouping_cols = "subject_id"
    )
  expect_equal(dim(data_agg_subject), c(4, 5))

  # group clones by time point and subject ID
  data_agg_time_subject <-
    aggregateIdenticalClones(
      my_data,
      "clone_seq",
      "clone_count",
      "clone_freq",
      grouping_cols =
        c("subject_id", "time_point")
    )
  expect_equal(dim(data_agg_time_subject), c(7, 6))
})



# Visualization -----------------------------------------------------------


test_that("addPlots works", {
  expect_true(.hasPlots(net))
  expect_true(.hasElem(net$plots, "cluster_greedy"))
  expect_true(.hasElem(net$plots, "cluster_leiden"))
  expect_true(.isGgraph(net$plots$cluster_greedy))
  expect_true(.isGgraph(net$plots$cluster_leiden))
})

test_that("extractLayout works", {
  my_layout <- extractLayout(net$plots[[1]])
  expect_equal(my_layout, net$plots$graph_layout, ignore_attr = TRUE)
})







test_that("plots legends behave correctly", {

  get_guide <- function(plot, aes) {
    if (inherits(plot$guides, "Guides")) {
      plot$guides$guides[[aes]]
    } else {
      plot$guides[[aes]]
    }
  }

  get_guide_params <- function(plot, aes) {
    if (inherits(plot$guides, "Guides")) {
      plot$guides$guides[[aes]]$params
    } else {
      plot$guides[[aes]]
    }
  }

  get_guide_names <- function(plot) {
    if (inherits(plot$guides, "Guides")) {
      names(plot$guides$guides)
    } else {
      names(plot$guides)
    }
  }


  expect_equal(get_guide_names(net$plots$cluster_greedy),
               "colour"
  )
  expect_equal(get_guide_params(net$plots$cluster_greedy, "colour")$title,
               "cluster_greedy"
  )
  expect_equal(get_guide_params(net$plots$cluster_greedy, "colour")$name,
               "legend"
  )

  expect_equal(get_guide_names(net$plots$cluster_leiden),
               "colour"
  )
  expect_equal(get_guide(net$plots$cluster_leiden, "colour"),
               "none"
  )
  expect_match(net$plots$cluster_leiden$labels$subtitle,
               "Nodes colored by cluster_leiden"
  )

  expect_equal(get_guide_names(net$plots$transitivity),
               c("colour", "size")
  )
  expect_equal(get_guide_params(net$plots$transitivity, "colour")$title,
               "Transitivity"
  )
  expect_true(get_guide_params(net$plots$transitivity, "colour")$name %in%
              c("colorbar", "colourbar")
  )
  expect_equal(get_guide_params(net$plots$transitivity, "size")$title,
               "Clone Count"
  )
  expect_equal(get_guide_params(net$plots$transitivity, "size")$name,
               "legend"
  )

  expect_equal(get_guide_names(net$plots$CloneCount),
               c("colour", "size")
  )
  expect_equal(get_guide_params(net$plots$CloneCount, "colour")$title,
               "Clone Count"
  )
  expect_equal(get_guide_params(net$plots$CloneCount, "colour")$name,
               "legend"
  )
  expect_equal(get_guide_params(net$plots$CloneCount, "size")$title,
               "Clone Count"
  )
  expect_equal(get_guide_params(net$plots$CloneCount, "size")$name,
               "legend"
  )

  expect_equal(get_guide_names(net$plots$eigen_centrality),
               c("colour", "size")
  )
  expect_equal(get_guide_params(net$plots$eigen_centrality, "colour")$title,
               "Eigen Centrality"
  )
  expect_true(get_guide_params(net$plots$eigen_centrality, "colour")$name %in%
              c("colorbar", "colourbar")
  )
  expect_equal(get_guide_params(net$plots$eigen_centrality, "size")$title,
               "eigen_centrality"
  )
  expect_equal(get_guide_params(net$plots$eigen_centrality, "size")$name,
               "legend"
  )

  expect_equal(get_guide_names(net$plots$authority_score),
               c("colour", "size")
  )
  expect_equal(get_guide_params(net$plots$authority_score, "colour")$title,
               "authority_score"
  )
  expect_true(get_guide_params(net$plots$authority_score, "colour")$name %in%
              c("colorbar", "colourbar")
  )
  expect_equal(get_guide_params(net$plots$authority_score, "size")$title,
               "eigen_centrality"
  )
  expect_equal(get_guide_params(net$plots$authority_score, "size")$name,
               "legend"
  )

  expect_equal(get_guide_names(net$plots$coreness),
               c("colour", "size")
  )
  expect_equal(get_guide(net$plots$coreness, "colour"),
               "none"
  )
  expect_equal(get_guide_params(net$plots$coreness, "size")$title,
               "coreness"
  )
  expect_equal(get_guide_params(net$plots$coreness, "size")$name,
               "legend"
  )

  expect_equal(get_guide_names(net$plots$page_rank),
               c("colour", "size")
  )
  expect_null(get_guide_params(net$plots$page_rank, "colour")$title
  )
  expect_true(get_guide_params(net$plots$page_rank, "colour")$name %in%
              c("colorbar", "colourbar")
  )
  expect_equal(get_guide_params(net$plots$page_rank, "size")$title,
               "page_rank"
  )
  expect_equal(get_guide_params(net$plots$page_rank, "size")$name,
               "legend"
  )

  expect_equal(get_guide_names(net2$plots$CloneCount),
               "colour"
  )
  expect_equal(get_guide_params(net2$plots$CloneCount, "colour")$title,
               "CloneCount"
  )
  expect_true(get_guide_params(net2$plots$CloneCount, "colour")$name %in%
              c("colorbar", "colourbar")
  )

  expect_equal(names(net3$plots), c("cluster_id", "graph_layout"))
  expect_equal(get_guide_names(net3$plots$cluster_id),
               "colour"
  )
  expect_equal(get_guide_params(net3$plots$cluster_id, "colour")$title,
               "cluster_id"
  )
  expect_equal(get_guide_params(net3$plots$cluster_id, "colour")$name,
               "legend"
  )

  expect_equal(names(net4$plots), c("degree", "graph_layout"))
  expect_equal(get_guide_names(net4$plots$degree),
               "colour"
  )
  expect_equal(get_guide_params(net4$plots$degree, "colour")$title,
               "degree"
  )
  expect_true(get_guide_params(net4$plots$degree, "colour")$name %in%
              c("colorbar", "colourbar")
  )

  expect_equal(names(net5$plots), c("uniform_color", "graph_layout"))
  expect_null(get_guide_names(net5$plots$uniform_color))

  expect_equal(get_guide_names(sc_net$plots$SampleID),
               c("colour", "size")
  )
  expect_equal(get_guide_params(sc_net$plots$SampleID, "colour")$title,
               "SampleID"
  )
  expect_equal(get_guide_params(sc_net$plots$SampleID, "colour")$name,
               "legend"
  )
  expect_equal(get_guide_params(sc_net$plots$SampleID, "size")$title,
               "UMIs"
  )
  expect_equal(get_guide_params(sc_net$plots$SampleID, "size")$name,
               "legend"
  )

})


test_that("addPlots layout detection works", {
  expect_equal(extractLayout(net$plots$cluster_greedy), net$plots$graph_layout,
               ignore_attr = TRUE
  )
  expect_equal(extractLayout(net$plots$cluster_leiden), net$plots$graph_layout,
               ignore_attr = TRUE
  )
  expect_equal(extractLayout(net$plots$cluster_leiden), net$plots$graph_layout,
               ignore_attr = TRUE
  )
  expect_equal(extractLayout(net$plots$transitivity), net$plots$graph_layout,
               ignore_attr = TRUE
  )
  expect_equal(extractLayout(net$plots$CloneCount), net$plots$graph_layout,
               ignore_attr = TRUE
  )
  expect_equal(extractLayout(net$plots$eigen_centrality), net$plots$graph_layout,
               ignore_attr = TRUE
  )
  expect_equal(extractLayout(net$plots$authority_score), net$plots$graph_layout,
               ignore_attr = TRUE
  )
  expect_equal(extractLayout(net$plots$coreness), net$plots$graph_layout,
               ignore_attr = TRUE
  )
  expect_equal(extractLayout(net$plots$page_rank), net$plots$graph_layout,
               ignore_attr = TRUE
  )
  expect_match(
    all.equal(extractLayout(net2$plots$CloneCount), net$plots$graph_layout,
              check.attributes = FALSE
    ),
    "400, 244"
  )
  expect_match(
    all.equal(extractLayout(net3$plots[[1]]), net$plots$graph_layout,
              check.attributes = FALSE
    ),
    "Mean relative difference"
  )
  expect_match(
    all.equal(extractLayout(net4$plots[[1]]), net$plots$graph_layout,
              check.attributes = FALSE
    ),
    "Mean relative difference"
  )
  expect_match(
    all.equal(extractLayout(net5$plots[[1]]), net$plots$graph_layout,
              check.attributes = FALSE
    ),
    "Mean relative difference"
  )

  expect_equal(extractLayout(net2$plots[[1]]), net2$plots$graph_layout,
               ignore_attr = TRUE
  )

  expect_equal(extractLayout(net3$plots[[1]]), net3$plots$graph_layout,
               ignore_attr = TRUE
  )

  expect_equal(extractLayout(net4$plots[[1]]), net4$plots$graph_layout,
               ignore_attr = TRUE
  )

  expect_equal(extractLayout(net5$plots[[1]]), net5$plots$graph_layout,
               ignore_attr = TRUE
  )

})


# Dual Chain Analysis -----------------------------------------------------

test_that("buildRepSeqNetwork works with dual-chain", {
  expect_true(.isBaseNetworkOutput(sc_net))
  expect_true(.hasPlots(sc_net))
  expect_true(.hasClusterData(sc_net))
  expect_true(.hasDetails(sc_net))
  expect_equal(sc_net$details$seq_col,
               c(a_col = "AlphaSeq", b_col = "BetaSeq")
  )
  expect_true("mean_A_seq_length" %in% names(sc_net$cluster_data))
})




# Associated Clusters -----------------------------------------------------



set.seed(42)

## Simulate 30 samples from two groups (treatment/control) ##
n_control <- n_treatment <- 15
n_samples <- n_control + n_treatment
sample_size <- 30 # (seqs per sample)
base_seqs <- # first five are associated with treatment
  c("CASSGAYEQYF", "CSVDLGKGNNEQFF", "CASSIEGQLSTDTQYF",
    "CASSEEGQLSTDTQYF", "CASSPEGQLSTDTQYF",
    "RASSLAGNTEAFF", "CASSHRGTDTQYF", "CASDAGVFQPQHF")
# Relative generation probabilities by control/treatment group
pgen_c <- matrix(rep(c(rep(1, 5), rep(30, 3)), times = n_control),
                 nrow = n_control, byrow = TRUE)
pgen_t <- matrix(rep(c(1, 1, rep(1/3, 3), rep(2, 3)), times = n_treatment),
                 nrow = n_treatment, byrow = TRUE)
pgen <- rbind(pgen_c, pgen_t)
simulateToyData(
  samples = n_samples,
  sample_size = sample_size,
  prefix_length = 1,
  prefix_chars = c("", ""),
  prefix_probs = cbind(rep(1, n_samples), rep(0, n_samples)),
  affixes = base_seqs,
  affix_probs = pgen,
  num_edits = 0,
  output_dir = tempdir(),
  no_return = TRUE
)
sample_files <-
  file.path(tempdir(),
            paste0("Sample", 1:n_samples, ".rds")
  )
group_labels <- c(rep("reference", n_control),
                  rep("comparison", n_treatment))
associated_seqs <-
  findAssociatedSeqs(
    file_list = sample_files,
    input_type = "rds",
    group_ids = group_labels,
    seq_col = "CloneSeq",
    min_seq_length = NULL,
    drop_matches = NULL,
    min_sample_membership = 0,
    pval_cutoff = 0.1
  )

dir2 <- tempfile()
dir2b <- tempfile()
dir2c <- tempfile()
dir2d <- tempfile()
dir2e <- tempfile()

test_that("findAssociatedSeqs works", {
  expect_true(is.data.frame(associated_seqs))
  expect_true(nrow(associated_seqs) == 4)
  expect_true(ncol(associated_seqs) == 6)

  associated_seqs2 <-
    findAssociatedSeqs(
      file_list = sample_files,
      input_type = "rds",
      group_ids = group_labels,
      subject_ids = rep(1:15, each = 2),
      seq_col = "CloneSeq",
      min_seq_length = NULL,
      drop_matches = NULL,
      min_sample_membership = 0,
      pval_cutoff = 0.1
    )
  expect_true(nrow(associated_seqs2) == 3)
  expect_true(ncol(associated_seqs2) == 9)

})

findAssociatedClones(
  file_list = sample_files,
  input_type = "rds",
  group_ids = group_labels,
  seq_col = "CloneSeq",
  assoc_seqs = associated_seqs$ReceptorSeq,
  min_seq_length = NULL,
  drop_matches = NULL,
  output_dir = dir2
)

tmp <- readRDS(list.files(dir2, full.names = TRUE)[[1]])
test_that("findAssociatedClones works", {

  expect_true(length(list.files(dir2)) == 4)
  expect_true(nrow(tmp) == 54)
  expect_true(ncol(tmp) == 6)
})

test_that("buildAssociatedClusterNetwork works", {
  associated_clusters <-
    buildAssociatedClusterNetwork(
      file_list = list.files(dir2,
                             full.names = TRUE
      ),
      seq_col = "CloneSeq",
      size_nodes_by = 1.5,
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(associated_clusters))
  expect_true(nrow(associated_clusters$node_data) == 175)
  expect_true(ncol(associated_clusters$node_data) == 16)

})


findAssociatedClones(
  file_list = sample_files,
  input_type = "rds",
  group_ids = group_labels,
  seq_col = "CloneSeq",
  assoc_seqs = associated_seqs$ReceptorSeq,
  min_seq_length = NULL,
  drop_matches = NULL,
  output_dir = dir2b,
  output_type = "rda"
)
findAssociatedClones(
  file_list = sample_files,
  input_type = "rds",
  group_ids = group_labels,
  seq_col = "CloneSeq",
  assoc_seqs = associated_seqs$ReceptorSeq,
  min_seq_length = NULL,
  drop_matches = NULL,
  output_dir = dir2c,
  output_type = "csv"
)
findAssociatedClones(
  file_list = sample_files,
  input_type = "rds",
  group_ids = group_labels,
  seq_col = "CloneSeq",
  assoc_seqs = associated_seqs$ReceptorSeq,
  min_seq_length = NULL,
  drop_matches = NULL,
  output_dir = dir2d,
  output_type = "tsv"
)
findAssociatedClones(
  file_list = sample_files,
  input_type = "rds",
  group_ids = group_labels,
  seq_col = "CloneSeq",
  assoc_seqs = associated_seqs$ReceptorSeq,
  min_seq_length = NULL,
  drop_matches = NULL,
  output_dir = dir2e,
  output_type = "table"
)

test_that("associated clusters functions exchange non-default intermediate data types correctly", {

  associated_clusters <-
    buildAssociatedClusterNetwork(
      file_list = list.files(dir2b,
                             full.names = TRUE
      ),
      input_type = "rda",
      seq_col = "CloneSeq",
      size_nodes_by = 1.5,
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(associated_clusters))
  expect_true(nrow(associated_clusters$node_data) == 175)
  expect_true(ncol(associated_clusters$node_data) == 16)


  associated_clusters <-
    buildAssociatedClusterNetwork(
      file_list = list.files(dir2c,
                             full.names = TRUE
      ),
      input_type = "csv",
      seq_col = "CloneSeq",
      size_nodes_by = 1.5,
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(associated_clusters))
  expect_true(nrow(associated_clusters$node_data) == 175)
  expect_true(ncol(associated_clusters$node_data) == 16)


  associated_clusters <-
    buildAssociatedClusterNetwork(
      file_list = list.files(dir2d,
                             full.names = TRUE
      ),
      input_type = "tsv",
      seq_col = "CloneSeq",
      size_nodes_by = 1.5,
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(associated_clusters))
  expect_true(nrow(associated_clusters$node_data) == 175)
  expect_true(ncol(associated_clusters$node_data) == 16)


  associated_clusters <-
    buildAssociatedClusterNetwork(
      file_list = list.files(dir2e,
                             full.names = TRUE
      ),
      input_type = "table",
      seq_col = "CloneSeq",
      size_nodes_by = 1.5,
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(associated_clusters))
  expect_true(nrow(associated_clusters$node_data) == 175)
  expect_true(ncol(associated_clusters$node_data) == 16)

})



# Public Clusters ---------------------------------------------------------



set.seed(42)

## Simulate 30 samples with a mix of public/private sequences ##
n_samples <- 30
sample_size <- 30 # (seqs per sample)
base_seqs <- c(
  "CASSIEGQLSTDTQYF", "CASSEEGQLSTDTQYF", "CASSSVETQYF",
  "CASSPEGQLSTDTQYF", "RASSLAGNTEAFF", "CASSHRGTDTQYF", "CASDAGVFQPQHF",
  "CASSLTSGYNEQFF", "CASSETGYNEQFF", "CASSLTGGNEQFF", "CASSYLTGYNEQFF",
  "CASSLTGNEQFF", "CASSLNGYNEQFF", "CASSFPWDGYGYTF", "CASTLARQGGELFF",
  "CASTLSRQGGELFF", "CSVELLPTGPLETSYNEQFF", "CSVELLPTGPSETSYNEQFF",
  "CVELLPTGPSETSYNEQFF", "CASLAGGRTQETQYF", "CASRLAGGRTQETQYF",
  "CASSLAGGRTETQYF", "CASSLAGGRTQETQYF", "CASSRLAGGRTQETQYF",
  "CASQYGGGNQPQHF", "CASSLGGGNQPQHF", "CASSNGGGNQPQHF", "CASSYGGGGNQPQHF",
  "CASSYGGGQPQHF", "CASSYKGGNQPQHF", "CASSYTGGGNQPQHF",
  "CAWSSQETQYF", "CASSSPETQYF", "CASSGAYEQYF", "CSVDLGKGNNEQFF")
# Relative generation probabilities
pgen <- cbind(
  stats::toeplitz(0.6^(0:(sample_size - 1))),
  matrix(1, nrow = n_samples, ncol = length(base_seqs) - n_samples)
)
simulateToyData(
  samples = n_samples,
  sample_size = sample_size,
  prefix_length = 1,
  prefix_chars = c("", ""),
  prefix_probs = cbind(rep(1, n_samples), rep(0, n_samples)),
  affixes = base_seqs,
  affix_probs = pgen,
  num_edits = 0,
  output_dir = tempdir(),
  no_return = TRUE
)

sample_files <-
  file.path(tempdir(),
            paste0("Sample", 1:n_samples, ".rds")
  )
dir1 <- tempfile()
findPublicClusters(
  file_list = sample_files,
  input_type = "rds",
  seq_col = "CloneSeq",
  count_col = "CloneCount",
  min_seq_length = NULL,
  drop_matches = NULL,
  top_n_clusters = 3,
  min_node_count = 5,
  min_clone_count = 15000,
  output_dir = dir1
)

tmp <- readRDS(
  list.files(file.path(dir1, "node_meta_data"), full.names = TRUE)[[1]]
)


test_that("buildPublicClusterNetwork works", {
  public_clusters <-
    buildPublicClusterNetwork(
      file_list =
        list.files(
          file.path(dir1, "node_meta_data"),
          full.names = TRUE
        ),
      seq_col = "CloneSeq",
      count_col = "CloneCount",
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(public_clusters))
  expect_true(nrow(public_clusters$node_data) == 517)
  expect_true(ncol(public_clusters$node_data) == 28)
})

test_that("buildPublicClusterNetworkByRepresentative works", {
  public_clusters <-
    buildPublicClusterNetworkByRepresentative(
      file_list =
        list.files(
          file.path(dir1, "cluster_meta_data"),
          full.names = TRUE
        ),
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(public_clusters))
  expect_true(nrow(public_clusters$node_data) == 101)
  expect_true(ncol(public_clusters$node_data) == 31)
})




dir1b <- tempfile()
findPublicClusters(
  file_list = sample_files,
  input_type = "rds",
  seq_col = "CloneSeq",
  count_col = "CloneCount",
  min_seq_length = NULL,
  drop_matches = NULL,
  top_n_clusters = 3,
  min_node_count = 5,
  min_clone_count = 15000,
  output_dir = dir1b,
  output_type = "csv"
)

dir1c <- tempfile()
findPublicClusters(
  file_list = sample_files,
  input_type = "rds",
  seq_col = "CloneSeq",
  count_col = "CloneCount",
  min_seq_length = NULL,
  drop_matches = NULL,
  top_n_clusters = 3,
  min_node_count = 5,
  min_clone_count = 15000,
  output_dir = dir1c,
  output_type = "rda"
)

test_that("public clusters functions exchange non-default intermediate data types correctly", {

  public_clusters <-
    buildPublicClusterNetwork(
      file_list =
        list.files(
          file.path(dir1b, "node_meta_data"),
          full.names = TRUE
        ),
      input_type = "csv",
      seq_col = "CloneSeq",
      count_col = "CloneCount",
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(public_clusters))
  expect_true(nrow(public_clusters$node_data) == 517)
  expect_true(ncol(public_clusters$node_data) == 28)

  public_clusters <-
    buildPublicClusterNetwork(
      file_list =
        list.files(
          file.path(dir1c, "node_meta_data"),
          full.names = TRUE
        ),
      input_type = "rda",
      seq_col = "CloneSeq",
      count_col = "CloneCount",
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(public_clusters))
  expect_true(nrow(public_clusters$node_data) == 517)
  expect_true(ncol(public_clusters$node_data) == 28)


  public_clusters <-
    buildPublicClusterNetworkByRepresentative(
      file_list =
        list.files(
          file.path(dir1c, "cluster_meta_data"),
          full.names = TRUE
        ),
      input_type = "rda",
      size_nodes_by = 1,
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(public_clusters))
  expect_true(nrow(public_clusters$node_data) == 101)
  expect_true(ncol(public_clusters$node_data) == 31)

  public_clusters <-
    buildPublicClusterNetworkByRepresentative(
      file_list =
        list.files(
          file.path(dir1b, "cluster_meta_data"),
          full.names = TRUE
        ),
      input_type = "csv",
      size_nodes_by = 1,
      print_plots = FALSE
    )
  expect_true(.isBaseNetworkOutput(public_clusters))
  expect_true(nrow(public_clusters$node_data) == 101)
  expect_true(ncol(public_clusters$node_data) == 31)
})


test_that("findPublicClusters works", {
  expect_true(length(list.files(file.path(dir1, "node_meta_data"))) == 30)
  expect_true(length(list.files(file.path(dir1, "cluster_meta_data"))) == 30)
  # expect_equal(nrow(tmp), 18)  # sometimes reads as 15 (not consistently)
  expect_equal(ncol(tmp), 16)

})

# Clean up temp files -----------------------------------------------------



# clean up temp files
file.remove(
  file.path(
    tempdir(),
    c("MyRepSeqNetwork_NodeMetadata.csv",
      "MyRepSeqNetwork_ClusterMetadata.csv",
      "MyRepSeqNetwork_EdgeList.txt",
      "MyRepSeqNetwork_AdjacencyMatrix.mtx",
      "MyRepSeqNetwork_Details.txt",
      "MyRepSeqNetwork_Plots.rda",
      "MyRepSeqNetwork_GraphLayout.txt",
      "MyRepSeqNetwork.pdf",
      "MyRepSeqNetwork.rds",
      "MyRepSeqNetwork.rda"
    )
  )
)
file.remove(
  file.path(tempdir(),
            c(paste0("Sample", 1:30, ".rds"),
              paste0("Sample", 1:2, ".rda"),
              paste0("Sample", 1:2, "b.rda"),
              paste0("Sample", 1:2, ".csv"),
              paste0("Sample", 1:2, "b.csv"),
              paste0("Sample", 1:2, "c.csv"),
              paste0("Sample", 1:2, ".tsv"),
              paste0("Sample", 1:2, ".txt"),
              paste0("Sample", 1:2),
              paste0("Sample", 1:2, "b")
            )
  )
)
unlink(
  c(dir2, dir2b, dir2c, dir2d, dir2e, dir1, dir1b, dir1c),
  recursive = TRUE
)