test_that("Euclidean distance works and returns symmetric matrix", { x <- matrix(c(1, 2, 3, 4, 2, 3, 4, 5), nrow = 2, byrow = TRUE) d <- calculate_distances(x, method = "euclidean", output_format = "matrix") expect_true(is.matrix(d)) expect_equal(nrow(d), nrow(x)) expect_equal(ncol(d), nrow(x)) expect_equal(unname(diag(d)), rep(0, nrow(x))) expect_equal(d, t(d)) # symmetry }) test_that("Manhattan distance matches manual calculation", { x <- matrix(c(1, 2, 3, 2, 3, 4), nrow = 2, byrow = TRUE) d <- calculate_distances(x, method = "manhattan", output_format = "matrix") manual <- abs(1-2) + abs(2-3) + abs(3-4) expect_equal(d[1, 2], manual) }) test_that("Minkowski requires parameter p", { x <- matrix(1:6, nrow = 2) expect_error(calculate_distances(x, method = "minkowski")) expect_error(calculate_distances(x, method = "manhattan", p = 3)) d <- calculate_distances(x, method = "minkowski", p = 3) expect_true(inherits(d, "dist") || is.matrix(d)) }) test_that("Categorical distance works with matching_coefficient", { x <- data.frame(a = c("a", "b", "a"), b = c("x", "x", "y")) d <- calculate_distances(x, method = "matching_coefficient", output_format = "matrix") expect_true(all(d >= 0 & d <= 1)) }) test_that("Binary Jaccard distance returns warning on NaN cases", { x <- data.frame(v1 = c(0, 0), v2 = c(0, 0)) expect_warning( d <- calculate_distances(x, method = "jaccard", output_format = "matrix"), "NaN" ) expect_true(any(is.nan(d))) }) test_that("Dice and Hamming distances produce valid outputs", { x <- data.frame(v1 = c(0, 1, 1), v2 = c(1, 0, 1)) d_dice <- calculate_distances(x, method = "dice", output_format = "matrix") d_hamming <- calculate_distances(x, method = "hamming", output_format = "matrix") expect_true(all(d_dice >= 0 & d_dice <= 1, na.rm = TRUE)) expect_true(all(d_hamming >= 0 & d_hamming <= 1, na.rm = TRUE)) }) test_that("Cosine and correlation return distances with expected warnings", { x <- matrix(rnorm(50), nrow = 5) expect_warning( d_cos <- calculate_distances(x, method = "cosine"), "does not guarantee Euclidean" ) expect_warning( d_corr <- calculate_distances(x, method = "correlation"), "does not guarantee Euclidean" ) expect_true(all(d_cos >= 0, na.rm = TRUE)) expect_true(all(d_corr >= 0, na.rm = TRUE)) }) test_that("Mahalanobis distance runs without error", { set.seed(123) x <- matrix(rnorm(20), ncol = 4) d <- calculate_distances(x, method = "mahalanobis", output_format = "matrix") expect_true(all(d >= 0)) }) test_that("Gower distance works with auto detection and expected warning", { data("Data_HC_contamination", package = "dbrobust") df <- Data_HC_contamination expect_warning( d <- calculate_distances(df, method = "gower", output_format = "matrix"), "may not satisfy the Euclidean property" ) expect_true(is.matrix(d)) expect_equal(nrow(d), nrow(df)) }) test_that("Gower distance works with manual variable selection and expected warning", { data("Data_HC_contamination", package = "dbrobust") df <- Data_HC_contamination cont <- 1:4 bin <- 8:9 cat <- 5:7 expect_warning( d_manual <- calculate_distances(df, method = "gower", continuous_cols = cont, binary_cols = bin, categorical_cols = cat, output_format = "matrix"), "may not satisfy the Euclidean property" ) expect_true(is.matrix(d_manual)) expect_equal(nrow(d_manual), nrow(df)) }) test_that("Invalid method throws error", { x <- matrix(1:6, nrow = 2) expect_error(calculate_distances(x, method = "invalid_method")) }) test_that("Similarity output works with transformation options", { x <- data.frame(v1 = c(0, 1), v2 = c(1, 1)) d1 <- calculate_distances(x, method = "jaccard", output_format = "similarity", similarity_transform = "linear") d2 <- calculate_distances(x, method = "jaccard", output_format = "similarity", similarity_transform = "sqrt") expect_true(all(d1 >= 0 & d1 <= 1, na.rm = TRUE)) expect_true(all(d2 >= 0 & d2 <= 1, na.rm = TRUE)) }) test_that("Squared option squares distances but not similarities", { x <- matrix(c(1,2,3, 2,3,4), nrow = 2, byrow = TRUE) d1 <- calculate_distances(x, method = "euclidean", squared = FALSE, output_format = "matrix") d2 <- calculate_distances(x, method = "euclidean", squared = TRUE, output_format = "matrix") expect_equal(d2[1,2], d1[1,2]^2) sim <- calculate_distances(data.frame(v1=c(0,1), v2=c(1,1)), method = "jaccard", output_format = "similarity", squared = TRUE) expect_false(any(sim > 1)) # squared not applied to similarities })