context("Stan model parsing") test_that("Empty model handled correctly", { parsed = parse_model("") expect_equal(length(parsed$vars), 0) expect_equal(length(parsed$samps), 0) }) test_that("Correct parsed variables", { correct_vars = c(J= "data", y= "data", sigma="data", mu="parameters", tau="parameters", eta="parameters", theta="transformed parameters") code = eightschools_m@stanmodel@model_code parsed = parse_model(code) expect_equal(parsed$vars, correct_vars) }) test_that("Correct parsed sampling statements", { correct_samp = list(eta ~ std_normal(), y ~ normal(theta, sigma), mu ~ uniform(-1e+100, 1e+100), tau ~ uniform(-1e+100, 1e+100)) code = eightschools_m@stanmodel@model_code parsed = parse_model(code) expect_equal(parsed$samp, correct_samp) }) test_that("Provided sampling statements can be matched to model", { model_samp = list(eta ~ std_normal(), y ~ normal(theta, sigma)) prov_samp = list(eta ~ exponential(5)) matched = match_sampling_stmts(prov_samp, model_samp) expect_false(identical(matched, prov_samp)) expect_length(matched, 1) expect_equal(matched[[1]], model_samp[[1]]) }) test_that("Extra sampling statements not in model throw an error", { model_samp = list(eta ~ std_normal(), y ~ normal(theta, sigma)) prov_samp = list(eta ~ exponential(5), x ~ normal(theta, sigma)) expect_error(match_sampling_stmts(prov_samp, model_samp), "No matching sampling statement found for x ~ normal\\(theta, sigma\\)") }) test_that("Variables are correctly extracted from sampling statements", { expect_equal(get_stmt_vars(y ~ normal(theta %*% eta/2, 2%%4 + sigma)), c("y", "theta", "eta", "sigma")) expect_error(get_stmt_vars(y ~ .), "y ~ \\. does not") }) # --- Tests for new token-based parser --- test_that("New array syntax is parsed correctly", { code = " data { array[N] real y; array[N, K] int x; } parameters { array[3] vector[N] beta; } model { y ~ normal(0, 1); }" parsed = parse_model(code) expect_equal(parsed$vars[c("y", "x", "beta")], c(y = "data", x = "data", beta = "parameters")) }) test_that("Old-style array syntax is still parsed correctly", { code = " data { int N; real y[N]; real sigma[N]; } parameters { real mu; } model { y ~ normal(mu, sigma); }" parsed = parse_model(code) expect_equal(parsed$vars, c(N = "data", y = "data", sigma = "data", mu = "parameters")) }) test_that("target += with lpdf/lpmf is parsed correctly", { code = " data { int N; array[N] int Y; } parameters { real mu; } model { target += normal_lpdf(mu | 0, 10); target += poisson_lpmf(Y | mu); }" parsed = parse_model(code) expect_length(parsed$samp, 2) expect_equal(parsed$samp[[1]], mu ~ normal(0, 10)) expect_equal(parsed$samp[[2]], Y ~ poisson(mu)) }) test_that("Statements inside control flow are skipped", { code = " data { int N; array[N] real y; } parameters { real mu; real sigma; } model { mu ~ normal(0, 10); if (N > 0) { y ~ normal(mu, sigma); } }" parsed = parse_model(code) # Only the top-level statement should be found sampled = vapply(parsed$samp, function(s) deparse(rlang::f_lhs(s)), "") expect_true("mu" %in% sampled) expect_false("y" %in% sampled) }) test_that("Profile blocks are handled (statements inside skipped)", { code = ' data { int N; array[N] real y; } parameters { real mu; real sigma; } model { mu ~ normal(0, 10); profile("likelihood") { y ~ normal(mu, sigma); } }' parsed = parse_model(code) sampled = vapply(parsed$samp, function(s) deparse(rlang::f_lhs(s)), "") expect_true("mu" %in% sampled) # profile block increases brace depth, so y ~ ... is skipped expect_false("y" %in% sampled) }) test_that("Block comments and line comments are stripped", { code = " data { /* this is a block comment spanning multiple lines */ int N; // real ghost_var; } model { }" parsed = parse_model(code) expect_equal(names(parsed$vars), "N") expect_true(! "ghost_var" %in% names(parsed$vars)) }) test_that("New constrained types are parsed", { code = " parameters { sum_to_zero_vector[5] beta; simplex[K] theta; ordered[3] cutpoints; cholesky_factor_corr[K] L; } model { beta ~ normal(0, 1); theta ~ dirichlet(alpha); cutpoints ~ normal(0, 5); L ~ lkj_corr_cholesky(2); }" parsed = parse_model(code) expect_equal(parsed$vars[c("beta", "theta", "cutpoints", "L")], c(beta = "parameters", theta = "parameters", cutpoints = "parameters", L = "parameters")) }) test_that("Tokenizer handles Stan operators correctly", { tokens = tokenize_stan("target += normal_lpdf(y | mu, sigma);") expect_true("target" %in% tokens$value) expect_true("+=" %in% tokens$value) expect_true("|" %in% tokens$value) }) test_that("Functions block is parsed without errors", { code = ' functions { real my_fun(real x) { return x * 2; } } data { real y; } parameters { real mu; } model { mu ~ normal(0, 10); y ~ normal(mu, 1); }' parsed = parse_model(code) expect_equal(parsed$vars[c("y", "mu")], c(y = "data", mu = "parameters")) expect_length(parsed$samp, 2) expect_equal(parsed$samp[[2]], y ~ normal(mu, 1)) }) test_that("Truncated sampling statements are parsed (truncation stripped)", { code = " data { real y; } parameters { real mu; real sigma; } model { mu ~ normal(0, 10); sigma ~ exponential(1); y ~ normal(mu, sigma) T[0, ]; }" parsed = parse_model(code) sampled = vapply(parsed$samp, function(s) deparse(rlang::f_lhs(s)), "") expect_true("y" %in% sampled) y_samp = parsed$samp[[which(sampled == "y")]] expect_equal(y_samp, y ~ normal(mu, sigma)) }) test_that("Multiple variables declared on separate lines in same block", { code = " parameters { real alpha; real beta; real sigma; vector[10] gamma; } model { alpha ~ normal(0, 1); beta ~ normal(0, 1); sigma ~ exponential(1); gamma ~ normal(0, 1); }" parsed = parse_model(code) expect_equal(sort(names(parsed$vars)), sort(c("alpha", "beta", "sigma", "gamma"))) expect_true(all(parsed$vars == "parameters")) }) test_that("Transformed data and generated quantities variables are found", { code = " data { int N; } transformed data { real log_N = log(N); } parameters { real mu; } generated quantities { real y_rep = normal_rng(mu, 1); } model { mu ~ normal(0, 1); }" parsed = parse_model(code) expect_equal(parsed$vars["log_N"], c(log_N = "transformed data")) expect_equal(parsed$vars["y_rep"], c(y_rep = "generated quantities")) }) test_that("Variable with assignment in declaration is parsed", { code = " transformed parameters { real disc = 1; vector[3] theta = mu + tau * eta; } parameters { real mu; real tau; vector[3] eta; } model { mu ~ normal(0, 1); tau ~ exponential(1); eta ~ std_normal(); }" parsed = parse_model(code) expect_equal(parsed$vars["disc"], c(disc = "transformed parameters")) expect_equal(parsed$vars["theta"], c(theta = "transformed parameters")) }) test_that("target += with _lupdf and _lupmf suffixes are parsed", { code = " data { real y; } parameters { real mu; } model { target += normal_lupdf(y | mu, 1); }" parsed = parse_model(code) expect_length(parsed$samp, 2) # 1 explicit + 1 implicit uniform for mu expect_equal(parsed$samp[[1]], y ~ normal(mu, 1)) }) test_that("Mixed tilde and target += in same model", { code = " data { int N; array[N] real y; } parameters { real mu; real sigma; } model { mu ~ normal(0, 10); sigma ~ exponential(1); target += normal_lpdf(y | mu, sigma); }" parsed = parse_model(code) # 3 explicit statements, no implicit uniforms (all params have priors) expect_length(parsed$samp, 3) expect_equal(parsed$samp[[1]], mu ~ normal(0, 10)) expect_equal(parsed$samp[[2]], sigma ~ exponential(1)) expect_equal(parsed$samp[[3]], y ~ normal(mu, sigma)) })