test_that("adm_col_sq_sum_cpp agrees with colSums(m^2) for general matrix", { set.seed(20) m <- matrix(rnorm(60), 12, 5) expect_equal(admixr2:::adm_col_sq_sum_cpp(m), colSums(m^2), tolerance = 1e-12) }) test_that("adm_col_sq_sum_cpp: single column", { m <- matrix(c(1.0, 2.0, 3.0), 3, 1) expect_equal(admixr2:::adm_col_sq_sum_cpp(m), sum(m^2), tolerance = 1e-12) }) test_that("adm_col_sq_sum_cpp: single row", { m <- matrix(c(1.0, 2.0, 3.0, 4.0), 1, 4) expect_equal(admixr2:::adm_col_sq_sum_cpp(m), as.numeric(m^2), tolerance = 1e-12) }) # ---- Central regression test: z_diag_scale vs eta_mat factor-of-2 bug ------- # CLAUDE.md documents: passing raw eta_mat (= z * L_ii) as the eta_mat argument # gives a diagonal omega gradient that is 2x too large. # Correct call uses z_diag_scale = sweep(z, 2L, diag(L)/2, "*") = z * L_ii/2. # neta1/neta2 are integer vectors from eta_rows_df (all eta rows, incl. off-diag). test_that("adm_grad_eta_omega_cpp: z_diag_scale (correct) gives half the gradient of eta_mat (wrong)", { set.seed(30) n_sim <- 500L; n_t <- 2L pinfo <- admixr2:::.admParseIniDf(make_inidf_1eta()) L_ii <- sqrt(0.09) # = 0.3 L <- matrix(L_ii) z <- matrix(rnorm(n_sim), n_sim, 1) eta_mat <- z * L_ii # wrong: what the buggy code would pass cp_c <- matrix(rnorm(n_sim * n_t), n_sim, n_t) D_mat <- matrix(rnorm(n_sim * n_t), n_sim, n_t) dNLL_dV <- diag(c(0.5, 0.3)) dNLL_dmu <- c(0.1, -0.2) sigma_mu_scale <- c(0.0, 0.0) neta1 <- as.integer(pinfo$eta_rows_df$neta1) neta2 <- as.integer(pinfo$eta_rows_df$neta2) go_wrong <- admixr2:::adm_grad_eta_omega_cpp( cp_c, D_mat, eta_mat, z, dNLL_dV, dNLL_dmu, sigma_mu_scale, neta1, neta2, n_t, pinfo$n_eta ) z_diag_scale <- z * (L_ii / 2) go_correct <- admixr2:::adm_grad_eta_omega_cpp( cp_c, D_mat, z_diag_scale, z, dNLL_dV, dNLL_dmu, sigma_mu_scale, neta1, neta2, n_t, pinfo$n_eta ) # Diagonal omega gradient: wrong is exactly 2x correct expect_equal(go_wrong$omega_grad[1], 2 * go_correct$omega_grad[1], tolerance = 1e-10) }) test_that("adm_grad_eta_omega_cpp: 2-eta model returns 3 omega_grad entries", { set.seed(31) n_sim <- 200L; n_t <- 3L pinfo <- admixr2:::.admParseIniDf(make_inidf_2eta()) L <- t(chol(Omega_2x2)) z <- matrix(rnorm(n_sim * pinfo$n_eta), n_sim, pinfo$n_eta) z_diag_scale <- sweep(z, 2L, diag(L) / 2, "*") cp_c <- matrix(rnorm(n_sim * n_t), n_sim, n_t) n_col <- n_t * pinfo$n_eta D_mat <- matrix(rnorm(n_sim * n_col), n_sim, n_col) A <- diag(c(0.5, 0.3, 0.2)) dNLL_dV <- A dNLL_dmu <- rnorm(n_t) sigma_mu_scale <- rep(0.0, n_t) neta1 <- as.integer(pinfo$eta_rows_df$neta1) neta2 <- as.integer(pinfo$eta_rows_df$neta2) g <- admixr2:::adm_grad_eta_omega_cpp( cp_c, D_mat, z_diag_scale, z, dNLL_dV, dNLL_dmu, sigma_mu_scale, neta1, neta2, n_t, pinfo$n_eta ) expect_true(all(is.finite(g$eta_grad))) expect_true(all(is.finite(g$omega_grad))) # 2 diagonal rows + 1 off-diagonal row in eta_rows_df -> length 3 expect_equal(length(g$omega_grad), nrow(pinfo$eta_rows_df)) }) test_that("adm_grad_eta_omega_var_cpp: factor-of-2 bug same as cov version", { set.seed(32) n_sim <- 200L; n_t <- 3L pinfo <- admixr2:::.admParseIniDf(make_inidf_1eta()) L_ii <- sqrt(0.09) z <- matrix(rnorm(n_sim), n_sim, 1) eta_mat <- z * L_ii # wrong (factor-of-2) z_diag_scale <- z * (L_ii / 2) # correct cp_c <- matrix(rnorm(n_sim * n_t), n_sim, n_t) D_mat <- matrix(rnorm(n_sim * n_t), n_sim, n_t) dNLL_dV_diag <- c(0.5, 0.3, 0.2) dNLL_dmu <- c(0.1, -0.2, 0.05) sigma_mu_scale <- rep(0.0, n_t) neta1 <- as.integer(pinfo$eta_rows_df$neta1) neta2 <- as.integer(pinfo$eta_rows_df$neta2) gw <- admixr2:::adm_grad_eta_omega_var_cpp( cp_c, D_mat, eta_mat, z, dNLL_dV_diag, dNLL_dmu, sigma_mu_scale, neta1, neta2, n_t, pinfo$n_eta ) gc <- admixr2:::adm_grad_eta_omega_var_cpp( cp_c, D_mat, z_diag_scale, z, dNLL_dV_diag, dNLL_dmu, sigma_mu_scale, neta1, neta2, n_t, pinfo$n_eta ) expect_equal(gw$omega_grad[1], 2 * gc$omega_grad[1], tolerance = 1e-10) expect_true(all(is.finite(gc$eta_grad))) }) test_that("adm_grad_partial_cpp vs adm_grad_partial_var_cpp agree for diagonal dNLL_dV", { set.seed(33) n_sim <- 100L; n_t <- 2L cp_c <- matrix(rnorm(n_sim * n_t), n_sim, n_t) dpred <- matrix(rnorm(n_sim * n_t), n_sim, n_t) dV_diag <- c(0.5, 0.3) dV <- diag(dV_diag) eff_dmu <- c(0.1, -0.2) inv_nm1 <- 1.0 / (n_sim - 1L) g_cov <- admixr2:::adm_grad_partial_cpp(cp_c, dpred, dV, eff_dmu, inv_nm1) g_var <- admixr2:::adm_grad_partial_var_cpp(cp_c, dpred, dV_diag, eff_dmu, inv_nm1) expect_equal(g_cov, g_var, tolerance = 1e-12) }) test_that("irmc_grad_kernel_cpp: dNLL_dlw sums to zero (softmax Jacobian property)", { # d/dlw of (sum w_i f(x_i)) where sum w_i = 1: the row-sum of the Jacobian = 0. # This holds for any choice of F, mu, dNLL_dV, dNLL_dmu, d_mat, invO. set.seed(34) n_sim <- 50L; n_t <- 2L; n_eta <- 1L F_mat <- matrix(rnorm(n_sim * n_t), n_sim, n_t) lw <- rnorm(n_sim) w <- softmax_r(lw) mu <- colSums(w * F_mat) # d_mat: demeaned eta samples (bi - mean_new), here just zeros (mean_new=0, bi~N(0,1)) bi <- matrix(rnorm(n_sim * n_eta), n_sim, n_eta) d_mat <- bi # sweep(bi, 2, 0) = bi invO <- matrix(1 / 0.09) # inverse of 1x1 Omega = 0.09 dNLL_dmu <- c(0.1, -0.2) dNLL_dV <- diag(c(0.5, 0.3)) eff_dNLL_dmu <- dNLL_dmu gk <- admixr2:::irmc_grad_kernel_cpp(F_mat, w, mu, d_mat, invO, eff_dNLL_dmu, dNLL_dV) expect_equal(sum(gk$dNLL_dlw), 0, tolerance = 1e-10) expect_true(all(is.finite(gk$dNLL_dlw))) }) test_that("irmc_grad_kernel_cpp vs irmc_grad_kernel_var_cpp agree for diagonal dNLL_dV", { set.seed(35) n_sim <- 60L; n_t <- 3L; n_eta <- 1L F_mat <- matrix(rnorm(n_sim * n_t), n_sim, n_t) lw <- rnorm(n_sim) w <- softmax_r(lw) mu <- colSums(w * F_mat) bi <- matrix(rnorm(n_sim * n_eta), n_sim, n_eta) d_mat <- bi invO <- matrix(1 / 0.09) eff_dNLL_dmu <- rnorm(n_t) dNLL_dV_diag <- c(0.4, 0.2, 0.3) dNLL_dV <- diag(dNLL_dV_diag) gk_cov <- admixr2:::irmc_grad_kernel_cpp(F_mat, w, mu, d_mat, invO, eff_dNLL_dmu, dNLL_dV) gk_var <- admixr2:::irmc_grad_kernel_var_cpp(F_mat, w, mu, d_mat, invO, eff_dNLL_dmu, dNLL_dV_diag) expect_equal(gk_cov$dNLL_dlw, gk_var$dNLL_dlw, tolerance = 1e-10) expect_equal(gk_cov$dNLL_dmean_new, gk_var$dNLL_dmean_new, tolerance = 1e-10) })