test_that("strip_stata_comments removes line comments", {
  lines <- c(
    "* this is a comment",
    "gen x = 1",
    "gen y = 2 // inline comment",
    "  * indented comment"
  )
  result <- strip_stata_comments(lines)
  expect_equal(trimws(result[1]), "")
  expect_equal(trimws(result[2]), "gen x = 1")
  expect_equal(trimws(result[3]), "gen y = 2")
  expect_equal(trimws(result[4]), "")
})

test_that("strip_stata_comments removes block comments", {
  lines <- c(
    "gen x = 1",
    "/* this is",
    "a block comment */",
    "gen y = 2",
    "gen z = /* inline block */ 3"
  )
  result <- strip_stata_comments(lines)
  expect_equal(trimws(result[1]), "gen x = 1")
  expect_equal(trimws(result[2]), "")
  expect_equal(trimws(result[3]), "")
  expect_equal(trimws(result[4]), "gen y = 2")
  expect_equal(trimws(result[5]), "gen z =  3")
})

test_that("join_continuation_lines handles ///", {
  lines <- c(
    "gen x = a + ///",
    "  b + c",
    "gen y = 1"
  )
  result <- join_continuation_lines(lines)
  expect_length(result, 2)
  expect_match(result[1], "gen x = a \\+\\s+b \\+ c")
  expect_equal(trimws(result[2]), "gen y = 1")
})

test_that("expand_stata_loops expands foreach in", {
  lines <- c(
    "foreach var in x y z {",
    "recode `var' .=0",
    "}"
  )
  result <- expand_stata_loops(lines)
  expect_length(result, 3)
  expect_equal(trimws(result[1]), "recode x .=0")
  expect_equal(trimws(result[2]), "recode y .=0")
  expect_equal(trimws(result[3]), "recode z .=0")
})

test_that("expand_stata_loops expands foreach of numlist", {
  lines <- c(
    "foreach i of numlist 1/3 {",
    "gen aux`i'=0",
    "}"
  )
  result <- expand_stata_loops(lines)
  expect_length(result, 3)
  expect_equal(trimws(result[1]), "gen aux1=0")
  expect_equal(trimws(result[2]), "gen aux2=0")
  expect_equal(trimws(result[3]), "gen aux3=0")
})

test_that("expand_numlist handles range and values", {
  expect_equal(expand_numlist("1/5"), as.character(1:5))
  expect_equal(expand_numlist("10 20 30"), c("10", "20", "30"))
})

test_that("parse_stata_command tokenizes gen", {
  cmd <- parse_stata_command("gen bc_pe2=e26")
  expect_equal(cmd$cmd, "gen")
  expect_equal(cmd$args, "bc_pe2=e26")
  expect_null(cmd$if_clause)
  expect_false(cmd$capture)
})

test_that("parse_stata_command handles abbreviation g", {
  cmd <- parse_stata_command("g bc_pe3=e27")
  expect_equal(cmd$cmd, "gen")
})

test_that("parse_stata_command handles replace with if", {
  cmd <- parse_stata_command("replace bc_pe4=1 if e30==1")
  expect_equal(cmd$cmd, "replace")
  expect_equal(cmd$args, "bc_pe4=1")
  expect_equal(cmd$if_clause, "e30==1")
})

test_that("parse_stata_command handles capture prefix", {
  cmd <- parse_stata_command("cap g bc_anio=2021")
  expect_equal(cmd$cmd, "gen")
  expect_true(cmd$capture)

  cmd2 <- parse_stata_command("capture rename old new")
  expect_equal(cmd2$cmd, "rename")
  expect_true(cmd2$capture)
})

test_that("parse_stata_command handles bysort prefix", {
  cmd <- parse_stata_command("bysort bc_correlat: egen total = sum(income)")
  expect_equal(cmd$cmd, "egen")
  expect_equal(cmd$by_group, "bc_correlat")
  expect_match(cmd$args, "total = sum\\(income\\)")
})

test_that("parse_stata_command handles rename", {
  cmd <- parse_stata_command("rename id bc_correlat")
  expect_equal(cmd$cmd, "rename")
  expect_equal(cmd$args, "id bc_correlat")
})

test_that("parse_stata_command handles drop", {
  cmd <- parse_stata_command("drop aux1 aux2 aux3")
  expect_equal(cmd$cmd, "drop")
  expect_equal(cmd$args, "aux1 aux2 aux3")
})

test_that("parse_stata_labels extracts var labels", {
  lines <- c(
    'lab var bc_pe2 "Sexo"',
    'lab var bc_pe3 "Edad"'
  )
  result <- parse_stata_labels(lines)
  expect_equal(result$var_labels$bc_pe2, "Sexo")
  expect_equal(result$var_labels$bc_pe3, "Edad")
})

test_that("parse_stata_labels extracts val labels", {
  lines <- c(
    'lab def pe2l 1 "Hombre" 2 "Mujer"',
    "lab val bc_pe2 pe2l"
  )
  result <- parse_stata_labels(lines)
  expect_equal(result$val_labels$bc_pe2[["1"]], "Hombre")
  expect_equal(result$val_labels$bc_pe2[["2"]], "Mujer")
})

test_that("parse_stata_labels handles define + values together", {
  lines <- c(
    'lab def pe4l 1 "Jefe" 2 "Conyuge" 3 "Hijo"',
    "lab val bc_pe4 pe4l"
  )
  result <- parse_stata_labels(lines)
  expect_length(result$val_labels$bc_pe4, 3)
  expect_equal(result$val_labels$bc_pe4[["1"]], "Jefe")
})

test_that("parse_do_file works on gen_replace fixture", {
  fixture <- system.file("stata-test-cases/gen_replace.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  commands <- parse_do_file(fixture)
  expect_true(length(commands) > 0)
  # First command should be gen
  expect_equal(commands[[1]]$cmd, "gen")
})

test_that("parse_do_file works on foreach fixture", {
  fixture <- system.file("stata-test-cases/foreach.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  commands <- parse_do_file(fixture)
  # foreach with 4 vars * 1 body line + foreach with 3 nums * 2 body lines = 10
  # After loop expansion, should have individual commands

  expect_true(length(commands) >= 4)
})

test_that("parse_do_file works on labels fixture", {
  fixture <- system.file("stata-test-cases/labels.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  raw_lines <- readLines(fixture, warn = FALSE)
  result <- parse_stata_labels(raw_lines)
  expect_true(length(result$var_labels) >= 3)
  expect_true(length(result$val_labels) >= 1)
})

test_that("strip_stata_comments joins /* */ continuation lines", {
  lines <- c(
    "mvencode a b c /*",
    "*/d e f, mv(0)"
  )
  result <- strip_stata_comments(lines)
  # Should join into single line: "mvencode a b c  d e f, mv(0)"
  non_empty <- result[nchar(trimws(result)) > 0]
  expect_length(non_empty, 1)
  expect_match(non_empty, "mvencode")
  expect_match(non_empty, "d e f")
})

test_that("expand_stata_loops handles nested foreach", {
  lines <- c(
    "foreach i in 1 2 {",
    "foreach j in a b {",
    "gen x`i'`j' = 0",
    "}",
    "}"
  )
  result <- expand_stata_loops(lines)
  # 2 outer * 2 inner = 4 expanded lines
  expect_length(result, 4)
  expect_match(result[1], "gen x1a")
  expect_match(result[2], "gen x1b")
  expect_match(result[3], "gen x2a")
  expect_match(result[4], "gen x2b")
})

test_that("expand_numlist handles compound ranges like '81/99 0/17'", {
  result <- expand_numlist("81/83 0/2")
  expect_equal(result, c("81", "82", "83", "0", "1", "2"))
})

test_that("parse_stata_command handles bysort with multiple vars", {
  cmd <- parse_stata_command("bysort g1 g2: egen tot = sum(x)")
  expect_equal(cmd$cmd, "egen")
  expect_equal(cmd$by_group, "g1 g2")
})

# ── Batch 9: stata_parser.R + stata_mappings.R edge cases ─────────────────────

test_that("parse_do_file errors on missing file", {
  expect_error(parse_do_file("/nonexistent/path.do"), "File not found")
})

test_that("parse_do_file handles empty file", {
  tmp <- tempfile(fileext = ".do")
  on.exit(unlink(tmp))
  writeLines(character(0), tmp)
  result <- parse_do_file(tmp)
  expect_length(result, 0)
})

test_that("parse_do_file handles comment-only file", {
  tmp <- tempfile(fileext = ".do")
  on.exit(unlink(tmp))
  writeLines(c(
    "* This is a comment",
    "// Another comment",
    "/* block comment */",
    "  * indented comment"
  ), tmp)
  result <- parse_do_file(tmp)
  expect_length(result, 0)
})

test_that("parse_stata_command returns NULL for empty line", {
  expect_null(parse_stata_command(""))
  expect_null(parse_stata_command("   "))
})

test_that("parse_stata_command returns NULL for orphan expression fragments", {
  expect_null(parse_stata_command("& other_condition"))
  expect_null(parse_stata_command("| another_clause"))
  expect_null(parse_stata_command(") trailing_paren"))
  expect_null(parse_stata_command("(bare_paren_expr)"))
})

test_that("parse_stata_command normalizes all abbreviated commands", {
  expect_equal(parse_stata_command("ge x = 1")$cmd, "gen")
  expect_equal(parse_stata_command("ren old new")$cmd, "rename")
  expect_equal(parse_stata_command("u mydata")$cmd, "use")
  expect_equal(parse_stata_command("sa mydata")$cmd, "save")
  expect_equal(parse_stata_command("su age")$cmd, "summarize")
  expect_equal(parse_stata_command("sum age")$cmd, "summarize")
  expect_equal(parse_stata_command("summ age")$cmd, "summarize")
  expect_equal(parse_stata_command("summa age")$cmd, "summarize")
  expect_equal(parse_stata_command("br")$cmd, "browse")
  expect_equal(parse_stata_command("tab status")$cmd, "tabulate")
  expect_equal(parse_stata_command("lab var x 'test'")$cmd, "label")
  expect_equal(parse_stata_command("mat A = 1")$cmd, "matrix")
  expect_equal(parse_stata_command("matr A = 1")$cmd, "matrix")
  expect_equal(parse_stata_command("matri A = 1")$cmd, "matrix")
})

test_that("parse_stata_command extracts options", {
  cmd <- parse_stata_command("destring var1, replace force")
  expect_equal(cmd$cmd, "destring")
  expect_match(cmd$options, "replace")
  expect_match(cmd$options, "force")
})

test_that("strip_stata_comments preserves /// line continuation", {
  lines <- c(
    "gen x = a + ///",
    "  b"
  )
  result <- strip_stata_comments(lines)
  # /// should be preserved for join_continuation_lines
  expect_match(result[1], "///")
})

test_that("join_broken_expressions re-joins operator-split lines", {
  lines <- c(
    "gen x = a +",
    "b + c"
  )
  result <- metasurvey:::join_broken_expressions(lines)
  non_empty <- result[nchar(trimws(result)) > 0]
  expect_true(any(grepl("a \\+ b \\+ c", non_empty)) || length(non_empty) <= length(lines))
})

test_that("join_broken_expressions handles single line", {
  result <- metasurvey:::join_broken_expressions("gen x = 1")
  expect_equal(result, "gen x = 1")
})

test_that("expand_numlist handles single values", {
  expect_equal(expand_numlist("42"), "42")
  expect_equal(expand_numlist("  7  "), "7")
})

test_that("collect_loop_body handles content before closing brace", {
  lines <- c(
    "foreach i in 1 2 {",
    "gen x`i' = 0",
    "gen y`i' = 1 }",
    "gen z = 3"
  )
  body <- metasurvey:::collect_loop_body(lines, 1)
  expect_length(body$body, 2)
  expect_match(body$body[2], "gen y")
  expect_equal(body$end_idx, 3)
})

test_that("expand_stata_loops handles forvalues", {
  lines <- c(
    "forvalues i = 1/3 {",
    "gen v`i' = `i'",
    "}"
  )
  result <- expand_stata_loops(lines)
  expect_length(result, 3)
  expect_match(result[1], "gen v1 = 1")
  expect_match(result[2], "gen v2 = 2")
  expect_match(result[3], "gen v3 = 3")
})

test_that("expand_stata_loops handles cap foreach", {
  lines <- c(
    "cap foreach v in a b {",
    "gen `v'_new = `v'",
    "}"
  )
  result <- expand_stata_loops(lines)
  expect_length(result, 2)
  expect_match(result[1], "gen a_new = a")
  expect_match(result[2], "gen b_new = b")
})

test_that("parse_stata_labels handles define with add option", {
  lines <- c(
    'lab def status 1 "Active" 2 "Inactive", add'
  )
  result <- parse_stata_labels(lines)
  expect_equal(result$val_labels, list()) # no "lab val" so no resolved labels
  # but val_defs should contain the definition (internal)
})

test_that("parse_stata_labels handles empty input", {
  result <- parse_stata_labels(character(0))
  expect_equal(result$var_labels, list())
  expect_equal(result$val_labels, list())
})

# ── stata_mappings.R tests ───────────────────────────────────────────────────

test_that("translate_stata_expr handles NULL and empty", {
  expect_null(translate_stata_expr(NULL))
  expect_equal(translate_stata_expr(""), "")
  expect_equal(translate_stata_expr("  "), "  ")
})

test_that("translate_stata_expr converts inrange", {
  result <- translate_stata_expr("inrange(age, 18, 65)")
  expect_match(result, "age >= 18")
  expect_match(result, "age <= 65")
})

test_that("translate_stata_expr converts inlist", {
  result <- translate_stata_expr("inlist(status, 1, 2, 3)")
  expect_match(result, "status %in% c\\(1, 2, 3\\)")
})

test_that("translate_stata_expr converts missing value comparisons", {
  result <- translate_stata_expr("age==.")
  expect_match(result, "is\\.na\\(age\\)")
  result2 <- translate_stata_expr("income!=.")
  expect_match(result2, "!is\\.na\\(income\\)")
})

test_that("translate_stata_expr converts string() to as.character()", {
  result <- translate_stata_expr("string(x)")
  expect_match(result, "as\\.character\\(x\\)")
})

test_that("translate_stata_expr converts lag/lead _n notation", {
  result <- translate_stata_expr("income[_n-1]")
  expect_match(result, 'shift\\(income, 1, type = "lag"\\)')
  result2 <- translate_stata_expr("income[_n+1]")
  expect_match(result2, 'shift\\(income, 1, type = "lead"\\)')
  result3 <- translate_stata_expr("x[_n-3]")
  expect_match(result3, 'shift\\(x, 3, type = "lag"\\)')
})

test_that("translate_stata_expr converts _N to .N", {
  result <- translate_stata_expr("gen x = _N")
  expect_match(result, "\\.N")
  # Should not match inside variable names
  result2 <- translate_stata_expr("_N_obs")
  expect_false(grepl("\\.N", result2))
})

test_that("expand_var_range expands simple numeric range", {
  result <- expand_var_range("suma1-suma4")
  expect_equal(result, c("suma1", "suma2", "suma3", "suma4"))
})

test_that("expand_var_range expands underscore range", {
  result <- expand_var_range("e51_2_1-e51_2_5")
  expect_equal(result, paste0("e51_2_", 1:5))
})

test_that("expand_var_range returns single var unchanged", {
  expect_equal(expand_var_range("myvar"), "myvar")
  expect_equal(expand_var_range("  spaced  "), "spaced")
})

test_that("is_constant_rhs detects constants correctly", {
  expect_true(is_constant_rhs("42"))
  expect_true(is_constant_rhs("-9"))
  expect_true(is_constant_rhs("2.5"))
  expect_true(is_constant_rhs('"text"'))
  expect_true(is_constant_rhs("."))
  expect_false(is_constant_rhs("x + 1"))
  expect_false(is_constant_rhs("log(y)"))
})

test_that("parse_gen_args parses basic gen", {
  result <- parse_gen_args("y = x + 1")
  expect_equal(result$var_name, "y")
  expect_equal(result$expr, "x + 1")
})

test_that("parse_gen_args strips type prefix", {
  result <- parse_gen_args("byte flag = 1")
  expect_equal(result$var_name, "flag")
  expect_equal(result$expr, "1")
  result2 <- parse_gen_args("int count = n + 1")
  expect_equal(result2$var_name, "count")
})

test_that("parse_gen_args returns NULL without =", {
  expect_null(parse_gen_args("just a name"))
})

test_that("parse_gen_args strips balanced outer parentheses", {
  result <- parse_gen_args("y = (a + b)")
  expect_equal(result$expr, "a + b")
  # Unbalanced should NOT strip
  result2 <- parse_gen_args("y = (a + b) * (c + d)")
  expect_equal(result2$expr, "(a + b) * (c + d)")
})

test_that("parse_recode_args parses parenthesized mappings", {
  result <- parse_recode_args("status (1=10) (2=20) (3=30)")
  expect_equal(result$var_name, "status")
  expect_length(result$mappings, 3)
  expect_equal(result$mappings[[1]]$from, "1")
  expect_equal(result$mappings[[1]]$to, "10")
})

test_that("parse_recode_args parses gen() option", {
  result <- parse_recode_args("old_var (1=10)", options = "gen(new_var)")
  expect_equal(result$gen_var, "new_var")
})

test_that("parse_recode_args handles inline range format", {
  result <- parse_recode_args("x 23/38=22 .=0")
  expect_equal(result$var_name, "x")
  expect_equal(length(result$mappings), 2)
  # First mapping should have from_range
  expect_true(!is.null(result$mappings[[1]]$from_range))
  expect_equal(result$mappings[[1]]$to, "22")
  # Second mapping is .=0
  expect_equal(result$mappings[[2]]$from, ".")
  expect_equal(result$mappings[[2]]$to, "0")
})

test_that("parse_recode_args handles multi-value parenthesized groups", {
  result <- parse_recode_args("var (0 3 4=-15) (1 2=10)")
  expect_length(result$mappings, 2)
  expect_equal(result$mappings[[1]]$from, c("0", "3", "4"))
  expect_equal(result$mappings[[1]]$to, "-15")
})

test_that("parse_egen_args parses basic egen", {
  result <- parse_egen_args("total_inc = sum(income)", by_group = "region")
  expect_equal(result$var_name, "total_inc")
  expect_equal(result$func, "sum")
  expect_equal(result$func_arg, "income")
  expect_equal(result$by_group, "region")
})

test_that("parse_egen_args extracts by() from options", {
  result <- parse_egen_args("mean_age = mean(age)", options = "by(region)")
  expect_equal(result$by_group, "region")
})

test_that("parse_egen_args returns NULL without =", {
  expect_null(parse_egen_args("just_a_name"))
})

test_that("parse_mvencode_args parses variables and mv option", {
  result <- parse_mvencode_args("x y z", options = "mv(-99)")
  expect_equal(result$var_names, c("x", "y", "z"))
  expect_equal(result$mv_value, "-99")
})

test_that("parse_mvencode_args defaults to mv=0", {
  result <- parse_mvencode_args("a b")
  expect_equal(result$mv_value, "0")
})

test_that("parse_destring_args with replace and force", {
  result <- parse_destring_args("myvar", options = "replace force")
  expect_equal(result$var_name, "myvar")
  expect_true(result$replace)
  expect_true(result$force)
  expect_null(result$gen_var)
})

test_that("parse_destring_args with gen() option", {
  result <- parse_destring_args("old_var", options = "gen(new_var)")
  expect_equal(result$gen_var, "new_var")
  expect_false(result$replace)
})