# test-golden-exact.R # Exact-value tests for compute functions with hand-computed expected values. # Complements test-golden.R (pipeline integration) with numeric verification. # =========================================================================== # Cohen's d family # =========================================================================== test_that("hedges_J computes correct correction factor", { # J = 1 - 3/(4*df - 1) # df=10: J = 1 - 3/39 = 1 - 0.07692... = 0.92308 expect_equal(hedges_J(10), 1 - 3/39, tolerance = 1e-7) # df=30: J = 1 - 3/119 = 0.97479... expect_equal(hedges_J(30), 1 - 3/119, tolerance = 1e-7) # df=1: J = 1 - 3/3 = 0 expect_equal(hedges_J(1), 0, tolerance = 1e-7) # Edge: df=0 returns NA expect_true(is.na(hedges_J(0))) }) test_that("d_ind_from_t computes correct Cohen's d for independent samples", { # d = t * sqrt(1/n1 + 1/n2) # t=2.5, n1=30, n2=30: d = 2.5 * sqrt(1/30 + 1/30) = 2.5 * sqrt(2/30) = 2.5 * 0.25820 = 0.64550 expect_equal(d_ind_from_t(2.5, 30, 30), 2.5 * sqrt(2/30), tolerance = 1e-5) # t=3.0, n1=20, n2=40: d = 3.0 * sqrt(1/20 + 1/40) = 3.0 * sqrt(0.075) = 3.0 * 0.27386 = 0.82158 expect_equal(d_ind_from_t(3.0, 20, 40), 3.0 * sqrt(1/20 + 1/40), tolerance = 1e-5) # Negative t expect_equal(d_ind_from_t(-2.0, 15, 15), -2.0 * sqrt(2/15), tolerance = 1e-5) }) test_that("g_ind_from_t computes correct Hedges' g", { # g = d * J where J = 1 - 3/(4*df - 1), df = n1 + n2 - 2 # t=2.5, n1=30, n2=30: df=58, J = 1 - 3/231 = 0.98701 # d = 2.5*sqrt(2/30) = 0.64550, g = 0.64550 * 0.98701 = 0.63712 t_val <- 2.5; n1 <- 30; n2 <- 30 df <- n1 + n2 - 2 d_expected <- t_val * sqrt(1/n1 + 1/n2) J_expected <- 1 - 3/(4*df - 1) g_expected <- d_expected * J_expected expect_equal(g_ind_from_t(t_val, n1, n2), g_expected, tolerance = 1e-5) # Small sample: t=2.0, n1=5, n2=5, df=8, J = 1 - 3/31 = 0.90323 t_val2 <- 2.0; n1b <- 5; n2b <- 5 df2 <- n1b + n2b - 2 d2 <- t_val2 * sqrt(1/n1b + 1/n2b) J2 <- 1 - 3/(4*df2 - 1) expect_equal(g_ind_from_t(t_val2, n1b, n2b), d2 * J2, tolerance = 1e-5) }) test_that("dz_from_t computes correct paired Cohen's dz", { # dz = t / sqrt(n) # t=2.5, n=25: dz = 2.5 / 5 = 0.5 expect_equal(dz_from_t(2.5, 25), 0.5, tolerance = 1e-7) # t=3.0, n=100: dz = 3.0 / 10 = 0.3 expect_equal(dz_from_t(3.0, 100), 0.3, tolerance = 1e-7) # t=-1.5, n=36: dz = -1.5 / 6 = -0.25 expect_equal(dz_from_t(-1.5, 36), -0.25, tolerance = 1e-7) }) test_that("dav_from_dz computes correct conversion", { # dav = dz / sqrt(2*(1-r)) # dz=0.5, r=0.5: dav = 0.5 / sqrt(2*0.5) = 0.5 / 1 = 0.5 expect_equal(dav_from_dz(0.5, 0.5), 0.5, tolerance = 1e-7) # dz=0.6, r=0.7: dav = 0.6 / sqrt(2*0.3) = 0.6 / 0.77460 = 0.77460 expect_equal(dav_from_dz(0.6, 0.7), 0.6 / sqrt(2*0.3), tolerance = 1e-5) # r=0: dav = dz / sqrt(2) - r=0 is accepted (r >= 0 and r < 1) expect_equal(dav_from_dz(1.0, 0.0), 1.0 / sqrt(2), tolerance = 1e-5) # r negative returns NA expect_true(is.na(dav_from_dz(0.5, -0.1))) }) test_that("drm_from_dz computes correct conversion", { # drm = dz * sqrt(2*(1-r)) # dz=0.5, r=0.5: drm = 0.5 * sqrt(1) = 0.5 expect_equal(drm_from_dz(0.5, 0.5), 0.5, tolerance = 1e-7) # dz=0.6, r=0.3: drm = 0.6 * sqrt(2*0.7) = 0.6 * 1.18322 = 0.70993 expect_equal(drm_from_dz(0.6, 0.3), 0.6 * sqrt(2*0.7), tolerance = 1e-5) # Without r, returns NA expect_true(is.na(drm_from_dz(0.5))) }) # =========================================================================== # ANOVA effect sizes # =========================================================================== test_that("eta2_from_F computes correct eta-squared", { # eta2 = (F*df1) / (F*df1 + df2) # F=5.0, df1=2, df2=57: eta2 = 10 / (10 + 57) = 10/67 = 0.14925 expect_equal(eta2_from_F(5.0, 2, 57), 10/67, tolerance = 1e-5) # F=10.0, df1=1, df2=98: eta2 = 10 / (10 + 98) = 10/108 = 0.09259 expect_equal(eta2_from_F(10.0, 1, 98), 10/108, tolerance = 1e-5) # F=0: eta2 = 0 expect_equal(eta2_from_F(0, 1, 50), 0, tolerance = 1e-7) }) test_that("partial_eta2_from_F computes correct partial eta-squared", { # Same formula as eta2 for one-way: (F*df1) / (F*df1 + df2) # F=4.0, df1=3, df2=96: petap2 = 12 / (12 + 96) = 12/108 = 0.11111 expect_equal(partial_eta2_from_F(4.0, 3, 96), 12/108, tolerance = 1e-5) }) test_that("omega2_from_F computes correct omega-squared", { # omega2 = (F*df1 - df1) / (F*df1 + df2 + 1) # F=5.0, df1=2, df2=57: omega2 = (10 - 2) / (10 + 57 + 1) = 8 / 68 = 0.11765 expect_equal(omega2_from_F(5.0, 2, 57), 8/68, tolerance = 1e-5) # F=1.0, df1=1, df2=98: omega2 = (1 - 1) / (1 + 98 + 1) = 0/100 = 0 expect_equal(omega2_from_F(1.0, 1, 98), 0, tolerance = 1e-7) # F < 1 may give negative -> clamped to 0 # F=0.5, df1=1, df2=50: omega2 = (0.5 - 1) / (0.5 + 50 + 1) = -0.5/51.5 -> max(0, .) expect_equal(omega2_from_F(0.5, 1, 50), 0, tolerance = 1e-7) }) test_that("cohens_f_from_F computes correct Cohen's f", { # f = sqrt((F*df1)/df2) # F=5.0, df1=2, df2=57: f = sqrt(10/57) = sqrt(0.17544) = 0.41886 expect_equal(cohens_f_from_F(5.0, 2, 57), sqrt(10/57), tolerance = 1e-5) # F=10.0, df1=1, df2=98: f = sqrt(10/98) = sqrt(0.10204) = 0.31944 expect_equal(cohens_f_from_F(10.0, 1, 98), sqrt(10/98), tolerance = 1e-5) }) test_that("cohens_f_from_eta2 computes correct value", { # f = sqrt(eta2 / (1 - eta2)) # eta2=0.06: f = sqrt(0.06/0.94) = sqrt(0.06383) = 0.25265 expect_equal(cohens_f_from_eta2(0.06), sqrt(0.06/0.94), tolerance = 1e-5) # eta2=0.25: f = sqrt(0.25/0.75) = sqrt(1/3) = 0.57735 expect_equal(cohens_f_from_eta2(0.25), sqrt(1/3), tolerance = 1e-5) }) # =========================================================================== # Correlation # =========================================================================== test_that("r_from_t computes correct correlation", { # r = t / sqrt(t^2 + df) # t=2.5, df=28: r = 2.5 / sqrt(6.25 + 28) = 2.5 / sqrt(34.25) = 2.5 / 5.85235 = 0.42718 expect_equal(r_from_t(2.5, 28), 2.5 / sqrt(6.25 + 28), tolerance = 1e-5) # t=0, df=50: r = 0 expect_equal(r_from_t(0, 50), 0, tolerance = 1e-7) # t=-3.0, df=18: r = -3 / sqrt(9 + 18) = -3/sqrt(27) = -3/5.19615 = -0.57735 expect_equal(r_from_t(-3.0, 18), -3/sqrt(27), tolerance = 1e-5) }) test_that("fisher_ci_r computes correct Fisher z-transform CI", { # z = atanh(r), se = 1/sqrt(n-3), ci = tanh(z +/- z_crit * se) # r=0.5, n=50: z = atanh(0.5) = 0.54931, se = 1/sqrt(47) = 0.14586 # z_crit(0.975) = 1.95996 # lo = tanh(0.54931 - 1.95996*0.14586) = tanh(0.26340) = 0.25786 # hi = tanh(0.54931 + 1.95996*0.14586) = tanh(0.83522) = 0.68388 r_val <- 0.5; n_val <- 50 z_r <- atanh(r_val) se_r <- 1/sqrt(n_val - 3) z_crit <- qnorm(0.975) expected_lo <- tanh(z_r - z_crit * se_r) expected_hi <- tanh(z_r + z_crit * se_r) result <- fisher_ci_r(r_val, n_val, 0.95) expect_equal(result[1], expected_lo, tolerance = 1e-4) expect_equal(result[2], expected_hi, tolerance = 1e-4) }) test_that("fisher_ci_r returns NA for edge cases", { expect_true(all(is.na(fisher_ci_r(0.5, 3)))) # n <= 3 expect_true(all(is.na(fisher_ci_r(1.0, 50)))) # |r| >= 1 expect_true(all(is.na(fisher_ci_r(-1.0, 50)))) }) # =========================================================================== # Chi-square effect sizes # =========================================================================== test_that("phi_from_chisq computes correct phi coefficient", { # phi = sqrt(chisq / N) # chisq=10, N=100: phi = sqrt(0.1) = 0.31623 expect_equal(phi_from_chisq(10, 100), sqrt(0.1), tolerance = 1e-5) # chisq=0, N=50: phi = 0 expect_equal(phi_from_chisq(0, 50), 0, tolerance = 1e-7) }) test_that("V_from_chisq computes correct Cramer's V", { # V = sqrt(chisq / (N * m)) where m = min(r-1, c-1) # chisq=20, N=200, m=2: V = sqrt(20 / 400) = sqrt(0.05) = 0.22361 expect_equal(V_from_chisq(20, 200, 2), sqrt(20/400), tolerance = 1e-5) # For 2x2 table (m=1), V = phi expect_equal(V_from_chisq(10, 100, 1), phi_from_chisq(10, 100), tolerance = 1e-7) }) test_that("enumerate_m_from_df returns correct divisor minima", { # df = (r-1)(c-1). For df=1: only 1x1 -> m=1 (i.e., 2x2 table) expect_equal(enumerate_m_from_df(1), 1L) # df=6: (1,6),(2,3),(3,2),(6,1) -> m = min(1,6)=1, min(2,3)=2, min(3,2)=2, min(6,1)=1 -> unique sorted: 1,2 expect_equal(enumerate_m_from_df(6), c(1L, 2L)) # df=4: (1,4),(2,2),(4,1) -> m = 1, 2, 1 -> 1, 2 expect_equal(enumerate_m_from_df(4), c(1L, 2L)) }) # =========================================================================== # Regression effect sizes # =========================================================================== test_that("standardized_beta_from_t computes correct value", { # beta = t / sqrt(t^2 + df) # Same formula as r_from_t # t=3.75, df=198: beta = 3.75 / sqrt(14.0625 + 198) = 3.75 / sqrt(212.0625) = 3.75 / 14.56239 = 0.25750 expect_equal(standardized_beta_from_t(3.75, 198), 3.75 / sqrt(3.75^2 + 198), tolerance = 1e-5) }) test_that("partial_r_from_t computes correct value", { # Same formula as standardized_beta_from_t # t=2.0, df=50: pr = 2 / sqrt(4 + 50) = 2/sqrt(54) = 2/7.34847 = 0.27217 expect_equal(partial_r_from_t(2.0, 50), 2/sqrt(54), tolerance = 1e-5) }) test_that("cohens_f2_from_R2 computes correct value", { # f2 = R2 / (1 - R2) # R2=0.25: f2 = 0.25/0.75 = 1/3 = 0.33333 expect_equal(cohens_f2_from_R2(0.25), 1/3, tolerance = 1e-5) # R2=0.10: f2 = 0.10/0.90 = 1/9 = 0.11111 expect_equal(cohens_f2_from_R2(0.10), 1/9, tolerance = 1e-5) # R2=0: f2 = 0 expect_equal(cohens_f2_from_R2(0), 0, tolerance = 1e-7) # R2 >= 1: NA expect_true(is.na(cohens_f2_from_R2(1.0))) # R2 < 0: NA expect_true(is.na(cohens_f2_from_R2(-0.1))) }) test_that("R2_from_F computes correct value", { # R2 = (F*df1) / (F*df1 + df2) # Same as eta2 formula # F=8.0, df1=3, df2=96: R2 = 24 / (24 + 96) = 24/120 = 0.2 expect_equal(R2_from_F(8.0, 3, 96), 24/120, tolerance = 1e-5) }) test_that("cohens_f2_from_F computes correct value", { # f2 = (F*df1) / df2 # F=5.0, df1=2, df2=57: f2 = 10/57 = 0.17544 expect_equal(cohens_f2_from_F(5.0, 2, 57), 10/57, tolerance = 1e-5) }) # =========================================================================== # Nonparametric effect sizes # =========================================================================== test_that("rank_biserial_r_from_U computes correct value", { # r_rb = 1 - 2*U / (n1*n2) # U=245, n1=30, n2=40: r_rb = 1 - 490/1200 = 1 - 0.40833 = 0.59167 expect_equal(rank_biserial_r_from_U(245, 30, 40), 1 - 490/1200, tolerance = 1e-5) # U=0, n1=10, n2=10: r_rb = 1 (complete separation) expect_equal(rank_biserial_r_from_U(0, 10, 10), 1, tolerance = 1e-7) # U=n1*n2, n1=10, n2=10: r_rb = -1 (complete reverse) expect_equal(rank_biserial_r_from_U(100, 10, 10), -1, tolerance = 1e-7) # U = n1*n2/2 (no effect): r_rb = 0 expect_equal(rank_biserial_r_from_U(50, 10, 10), 0, tolerance = 1e-7) }) test_that("cliffs_delta_from_U computes correct value", { # delta = 2*U / (n1*n2) - 1 # U=245, n1=30, n2=40: delta = 490/1200 - 1 = -0.59167 expect_equal(cliffs_delta_from_U(245, 30, 40), 490/1200 - 1, tolerance = 1e-5) # Note: cliff's delta = -rank_biserial_r expect_equal(cliffs_delta_from_U(245, 30, 40), -rank_biserial_r_from_U(245, 30, 40), tolerance = 1e-7) }) test_that("epsilon_squared_from_H computes correct value", { # epsilon2 = (H - k + 1) / (N - k) # H=8.45, N=60, k=3: epsilon2 = (8.45 - 3 + 1) / (60 - 3) = 6.45 / 57 = 0.11316 expect_equal(epsilon_squared_from_H(8.45, 60, 3), 6.45/57, tolerance = 1e-5) # Small H -> max(0, result) # H=1.5, N=30, k=3: epsilon2 = (1.5 - 3 + 1) / (30 - 3) = -0.5/27 -> 0 expect_equal(epsilon_squared_from_H(1.5, 30, 3), 0, tolerance = 1e-7) }) test_that("kendalls_W_from_chisq computes correct value", { # W = chisq / (N * (k - 1)) # chisq=15, N=10, k=4: W = 15 / (10 * 3) = 15/30 = 0.5 expect_equal(kendalls_W_from_chisq(15, 10, 4), 0.5, tolerance = 1e-5) # W bounded [0,1] # chisq=0: W=0 expect_equal(kendalls_W_from_chisq(0, 10, 4), 0, tolerance = 1e-7) }) test_that("rank_biserial_r_from_z computes correct value", { # r = z / sqrt(N) # z=2.31, N=70: r = 2.31 / sqrt(70) = 2.31 / 8.36660 = 0.27609 expect_equal(rank_biserial_r_from_z(2.31, 70), 2.31 / sqrt(70), tolerance = 1e-5) }) # =========================================================================== # P-value recomputation (pipeline tests) # =========================================================================== test_that("p-value recomputed correctly for t-test", { # t(28) = 2.21, two-tailed p = 2 * pt(2.21, 28, lower.tail=FALSE) expected_p <- 2 * pt(2.21, 28, lower.tail = FALSE) res <- check_text("t(28) = 2.21, p = .035, d = 0.80") expect_equal(res$p_computed[1], expected_p, tolerance = 1e-4) }) test_that("p-value recomputed correctly for F-test", { # F(2, 57) = 5.00, p = pf(5, 2, 57, lower.tail=FALSE) expected_p <- pf(5.0, 2, 57, lower.tail = FALSE) res <- check_text("F(2, 57) = 5.00, p = .010, eta-squared = 0.15") expect_equal(res$p_computed[1], expected_p, tolerance = 1e-4) }) test_that("p-value recomputed correctly for r-test", { # r(48) = 0.30: convert to t = r*sqrt(df/(1-r^2)) r_val <- 0.30; df_val <- 48 t_from_r <- r_val * sqrt(df_val / (1 - r_val^2)) expected_p <- 2 * pt(abs(t_from_r), df_val, lower.tail = FALSE) res <- check_text("r(48) = .30, p = .037") expect_equal(res$p_computed[1], expected_p, tolerance = 1e-4) }) test_that("p-value recomputed correctly for chi-square", { # chi-square(1) = 5.02: p = pchisq(5.02, 1, lower.tail=FALSE) expected_p <- pchisq(5.02, 1, lower.tail = FALSE) res <- check_text("chi-square(1) = 5.02, p = .025, N = 100") expect_equal(res$p_computed[1], expected_p, tolerance = 1e-4) }) test_that("p-value recomputed correctly for z-test", { # z-tests don't carry df1, so Phase 9 p-value guard (stat && df1) skips them. # Verify the z p-value computation function directly instead. # z = 2.50, two-tailed p = 2 * pnorm(2.50, lower.tail=FALSE) expected_p <- 2 * pnorm(2.50, lower.tail = FALSE) computed_p <- 2 * pnorm(abs(2.50), lower.tail = FALSE) expect_equal(computed_p, expected_p, tolerance = 1e-7) # Pipeline: z-test is parsed but p_computed will be NA (known limitation) res <- check_text("z = 2.50, p = .012") expect_equal(res$test_type[1], "z") }) # =========================================================================== # Error detection # =========================================================================== test_that("grossly wrong effect size produces ERROR status", { # Report d = 5.00 for a modest t-test -> should flag as ERROR # t(28) = 2.21: real d ~ 0.83 (equal n), 5.00 is off by ~4.17 >> 5*tol res <- check_text("t(28) = 2.21, p = .035, d = 5.00, N = 30") expect_equal(res$status[1], "ERROR") }) # =========================================================================== # CI computations # =========================================================================== test_that("ci_d_ind_approx computes correct approximate CI", { # SE = sqrt(N/(n1*n2) + d^2/(2*(N-2))) # d=0.5, n1=30, n2=30, N=60 d <- 0.5; n1 <- 30; n2 <- 30; N <- 60 se_d <- sqrt(N/(n1*n2) + d^2/(2*(N-2))) z_crit <- qnorm(0.975) expected <- c(d - z_crit*se_d, d + z_crit*se_d) result <- ci_d_ind_approx(d, n1, n2, 0.95) expect_equal(result[1], expected[1], tolerance = 1e-4) expect_equal(result[2], expected[2], tolerance = 1e-4) }) test_that("ci_dz computes CI for paired dz", { # Large-sample fallback: SE = sqrt(1/n + dz^2/(2*n)) dz <- 0.4; n <- 50 result <- ci_dz(dz, n, 0.95) expect_true(result$success) expect_true(result$bounds[1] < dz) expect_true(result$bounds[2] > dz) }) test_that("ci_etap2 computes CI for partial eta-squared", { # NCP inversion method result <- ci_etap2(5.0, 2, 57, 0.95) expect_true(result$success) # Point estimate: eta_p2 = 10/67 = 0.14925 # CI should bracket the point estimate point <- 10/67 expect_true(result$bounds[1] <= point) expect_true(result$bounds[2] >= point) }) test_that("ci_cohens_f computes CI derived from eta-squared", { result <- ci_cohens_f(5.0, 2, 57, 0.95) expect_true(result$success) # f = sqrt(eta2/(1-eta2)), so CI bounds should be non-negative expect_true(result$bounds[1] >= 0) expect_true(result$bounds[2] > result$bounds[1]) })