# Author: Xuye Luo # Date: Dec 11, 2025 library(testthat) # Assuming 'Upsilon' package is loaded test_that("Functional pattern test", { # Functional relationship: y is functionally dependent on x (strong association) # Expected result: High statistic value, low p-value F0 <- matrix( c(0, 0, 9, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), nrow = 6, ncol = 3, byrow = TRUE ) # Run tests upsilon_stat_F0 <- upsilon.statistic(F0) upsilon_test_R_F0 <- upsilon.test(F0) # Check Upsilon results expect_equal(as.numeric(upsilon_stat_F0), 88.209, tolerance = 1e-5) expect_equal(as.numeric(upsilon_test_R_F0$statistic), 88.209, tolerance = 1e-5) expect_equal(as.numeric(upsilon_test_R_F0$parameter), 10) expect_equal(as.numeric(upsilon_test_R_F0$p.value), 1.212397e-14, tolerance = 1e-7) expect_equal(as.numeric(upsilon_test_R_F0$estimate), 0.99, tolerance = 1e-5) # Run Modified Chi-sq tests mod_chisq_stat_F0 <- modified.chisq.statistic(F0) mod_chisq_test_R_F0 <- modified.chisq.test(F0) # Check Modified Chi-sq results expect_equal(as.numeric(mod_chisq_stat_F0), 20, tolerance = 1e-5) expect_equal(as.numeric(mod_chisq_test_R_F0$statistic), 20, tolerance = 1e-5) expect_equal(as.numeric(mod_chisq_test_R_F0$parameter), 10) expect_equal(as.numeric(mod_chisq_test_R_F0$p.value), 0.02925269, tolerance = 1e-7) expect_equal(as.numeric(mod_chisq_test_R_F0$estimate), 0.7071068, tolerance = 1e-6) # Run Modified G-test mod_gtest_F0 <- modified.gtest(F0) # Check Modified G-test results expect_equal(as.numeric(mod_gtest_F0$statistic), 27.52555, tolerance = 1e-5) expect_equal(as.numeric(mod_gtest_F0$parameter), 10) expect_equal(as.numeric(mod_gtest_F0$p.value), 0.002149252, tolerance = 1e-5) }) test_that("Independent pattern test", { # Independence case: variables are statistically independent # Expected result: Low statistic value, high p-value I0 <- matrix( c(3, 4, 0, 0, 0, 0, 2, 5, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0), nrow = 6, ncol = 3 ) # Upsilon Test upsilon_stat_I0 <- upsilon.statistic(I0) upsilon_test_R_I0 <- upsilon.test(I0) expect_equal(as.numeric(upsilon_stat_I0), 0.927, tolerance = 1e-5) expect_equal(as.numeric(upsilon_test_R_I0$statistic), 0.927, tolerance = 1e-5) expect_equal(as.numeric(upsilon_test_R_I0$parameter), 10) expect_equal(as.numeric(upsilon_test_R_I0$p.value), 0.9998786, tolerance = 1e-7) expect_equal(as.numeric(upsilon_test_R_I0$estimate), 0.1014889, tolerance = 1e-6) # Modified Chi-sq Test mod_chisq_stat_I0 <- modified.chisq.statistic(I0) mod_chisq_test_R_I0 <- modified.chisq.test(I0) expect_equal(as.numeric(mod_chisq_stat_I0), 0.3244375, tolerance = 1e-6) expect_equal(as.numeric(mod_chisq_test_R_I0$statistic), 0.3244375, tolerance = 1e-6) expect_equal(as.numeric(mod_chisq_test_R_I0$parameter), 10) expect_equal(as.numeric(mod_chisq_test_R_I0$p.value), 0.9999992, tolerance = 1e-7) expect_equal(as.numeric(mod_chisq_test_R_I0$estimate), 0.09006074, tolerance = 1e-6) # Modified G-test mod_gtest_I0 <- modified.gtest(I0) expect_equal(as.numeric(mod_gtest_I0$statistic), 0.3232078, tolerance = 1e-6) expect_equal(as.numeric(mod_gtest_I0$parameter), 10) expect_equal(as.numeric(mod_gtest_I0$p.value), 0.9999992, tolerance = 1e-7) }) test_that("Empty pattern test", { # Edge case: Matrix containing only zeros # Expected result: Zero statistics, p-value of 1 E0 <- matrix(0, nrow = 6, ncol = 3) # Upsilon Test upsilon_stat_E0 <- upsilon.statistic(E0) upsilon_test_R_E0 <- upsilon.test(E0) expect_equal(as.numeric(upsilon_stat_E0), 0) expect_equal(as.numeric(upsilon_test_R_E0$statistic), 0) expect_equal(as.numeric(upsilon_test_R_E0$parameter), 10) expect_equal(as.numeric(upsilon_test_R_E0$p.value), 1) expect_equal(as.numeric(upsilon_test_R_E0$estimate), 0) # Modified Chi-sq Test mod_chisq_stat_E0 <- modified.chisq.statistic(E0) mod_chisq_test_R_E0 <- modified.chisq.test(E0) expect_equal(as.numeric(mod_chisq_stat_E0), 0) expect_equal(as.numeric(mod_chisq_test_R_E0$statistic), 0) expect_equal(as.numeric(mod_chisq_test_R_E0$parameter), 10) expect_equal(as.numeric(mod_chisq_test_R_E0$p.value), 1) expect_equal(as.numeric(mod_chisq_test_R_E0$estimate), 0) # Modified G-test mod_gtest_E0 <- modified.gtest(E0) expect_equal(as.numeric(mod_gtest_E0$statistic), 0) expect_equal(as.numeric(mod_gtest_E0$parameter), 10) expect_equal(as.numeric(mod_gtest_E0$p.value), 1) }) test_that("Goodness-of-Fit test", { # Testing the Goodness-of-Fit implementation G0 <- matrix(c(5, 0, 5, 0)) upsilon_gof_stat_G0 <- upsilon.gof.statistic(G0) upsilon_gof_test_R_G0 <- upsilon.gof.test(G0) expect_equal(as.numeric(upsilon_gof_stat_G0), 10) expect_equal(as.numeric(upsilon_gof_test_R_G0$statistic), 10) expect_equal(as.numeric(upsilon_gof_test_R_G0$parameter), 3) expect_equal(as.numeric(upsilon_gof_test_R_G0$p.value), 0.0185661355, tolerance = 1e-7) }) # --- R vs C++ Consistency Tests --- # Use set.seed() to ensure reproducibility for random samples test_that("Upsilon result consistency between R and CPP", { set.seed(123) # set seed for reproducibility x <- sample(1:10, 10, replace = TRUE) y <- sample(1:10, 10, replace = TRUE) U0 <- as.data.frame.array(table(x, y)) upsilon_stat_R <- upsilon.statistic(U0) upsilon_test_R <- upsilon.test(U0) upsilon_test_cpp <- fast.upsilon.test(x, y) # Comparing numeric results from R implementation vs C++ implementation expect_equal(as.numeric(upsilon_stat_R), as.numeric(upsilon_test_cpp$statistic)) expect_equal(as.numeric(upsilon_test_R$statistic), as.numeric(upsilon_test_cpp$statistic)) expect_equal(as.numeric(upsilon_test_R$p.value), as.numeric(upsilon_test_cpp$p.value)) expect_equal(as.numeric(upsilon_test_R$parameter), as.numeric(upsilon_test_cpp$parameter)) expect_equal(as.numeric(upsilon_test_R$estimate), as.numeric(upsilon_test_cpp$estimate)) }) test_that("No-NA Pearson's Chi-squared Test consistency between R and CPP", { set.seed(123) x <- sample(1:10, 10, replace = TRUE) y <- sample(1:10, 10, replace = TRUE) U0 <- as.data.frame.array(table(x, y)) chisq_stat_R <- modified.chisq.statistic(U0) chisq_test_R <- modified.chisq.test(U0) chisq_test_cpp <- fast.chisq.test(x, y) expect_equal(as.numeric(chisq_stat_R), as.numeric(chisq_test_cpp$statistic)) expect_equal(as.numeric(chisq_test_R$statistic), as.numeric(chisq_test_cpp$statistic)) expect_equal(as.numeric(chisq_test_R$p.value), as.numeric(chisq_test_cpp$p.value)) expect_equal(as.numeric(chisq_test_R$parameter), as.numeric(chisq_test_cpp$parameter)) expect_equal(as.numeric(chisq_test_R$estimate), as.numeric(chisq_test_cpp$estimate)) }) test_that("No-NA G-Test consistency between R and CPP", { set.seed(123) x <- sample(1:10, 10, replace = TRUE) y <- sample(1:10, 10, replace = TRUE) U0 <- as.data.frame.array(table(x, y)) gtest_R <- modified.gtest(U0) gtest_cpp <- fast.gtest(x, y) expect_equal(as.numeric(gtest_R$statistic), as.numeric(gtest_cpp$statistic)) expect_equal(as.numeric(gtest_R$p.value), as.numeric(gtest_cpp$p.value)) expect_equal(as.numeric(gtest_R$parameter), as.numeric(gtest_cpp$parameter)) expect_equal(as.numeric(gtest_R$estimate), as.numeric(gtest_cpp$estimate)) })