test_that("translate_stata_expr handles inrange", {
  result <- translate_stata_expr("inrange(e30,3,5)")
  expect_match(result, "e30 >= 3")
  expect_match(result, "e30 <= 5")
})

test_that("translate_stata_expr handles inlist", {
  result <- translate_stata_expr("inlist(x, 1, 2, 3)")
  expect_match(result, "x %in% c\\(")
})

test_that("translate_stata_expr handles missing value comparison", {
  expect_match(translate_stata_expr("var==."), "is.na\\(var\\)")
  expect_match(translate_stata_expr("var!=."), "!is.na\\(var\\)")
})

test_that("translate_stata_expr handles string()", {
  result <- translate_stata_expr("string(x)")
  expect_match(result, "as.character\\(x\\)")
})

test_that("is_constant_rhs detects constants", {
  expect_true(is_constant_rhs("0"))
  expect_true(is_constant_rhs("-9"))
  expect_true(is_constant_rhs("2.5"))
  expect_true(is_constant_rhs('"text"'))
  expect_true(is_constant_rhs("."))
  expect_false(is_constant_rhs("e51_2"))
  expect_false(is_constant_rhs("a + b"))
  expect_false(is_constant_rhs("6+e51_4_a"))
})

test_that("transpile_stata works on gen_replace fixture", {
  fixture <- system.file("stata-test-cases/gen_replace.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  expect_true(length(result$steps) > 0)
  # bc_pe4 gen+replace with constants should produce step_recode
  recode_steps <- grep("step_recode", result$steps, value = TRUE)
  expect_true(length(recode_steps) >= 1)
  # bc_pe2 simple gen should produce step_compute
  compute_steps <- grep("step_compute.*bc_pe2", result$steps, value = TRUE)
  expect_true(length(compute_steps) >= 1)
})

test_that("transpile_stata works on gen_replace_expr fixture", {
  fixture <- system.file("stata-test-cases/gen_replace_expr.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  # Expression RHS should produce step_compute with bc_edu (not step_recode)
  compute_steps <- grep("step_compute.*bc_edu", result$steps, value = TRUE)
  expect_true(length(compute_steps) >= 1)
  # Should use fifelse for conditional updates
  fifelse_steps <- grep("fifelse", result$steps, value = TRUE)
  expect_true(length(fifelse_steps) >= 1)
})

test_that("transpile_stata works on recode fixture", {
  fixture <- system.file("stata-test-cases/recode_gen.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  expect_true(length(result$steps) > 0)
  # recode .=0 should produce fifelse(is.na(...))
  missing_steps <- grep("is.na", result$steps, value = TRUE)
  expect_true(length(missing_steps) >= 1)
})

test_that("transpile_stata works on mvencode fixture", {
  fixture <- system.file("stata-test-cases/mvencode.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  expect_true(length(result$steps) >= 1)
  # All should be step_compute with fifelse(is.na)
  for (step in result$steps) {
    expect_match(step, "step_compute")
    expect_match(step, "is.na")
  }
})

test_that("transpile_stata works on egen_by fixture", {
  fixture <- system.file("stata-test-cases/egen_by.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  expect_true(length(result$steps) >= 1)
  # Should have .by parameter
  by_steps <- grep("\\.by\\s*=", result$steps, value = TRUE)
  expect_true(length(by_steps) >= 1)
})

test_that("transpile_stata works on destring fixture", {
  fixture <- system.file("stata-test-cases/destring.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  expect_true(length(result$steps) >= 1)
  # Should convert to as.numeric
  numeric_steps <- grep("as.numeric", result$steps, value = TRUE)
  expect_true(length(numeric_steps) >= 1)
})

test_that("transpile_stata returns labels from labels fixture", {
  fixture <- system.file("stata-test-cases/labels.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  expect_true(length(result$labels$var_labels) >= 3)
  expect_true(length(result$labels$val_labels) >= 1)
})

test_that("optimize_steps collapses consecutive renames", {
  steps <- c(
    'step_rename(svy, edad = "age")',
    'step_rename(svy, sexo = "sex")',
    "step_compute(svy, x = 1)"
  )
  result <- optimize_steps(steps)
  expect_length(result, 2)
  expect_match(result[1], "edad.*sexo")
})

test_that("optimize_steps collapses consecutive removes", {
  steps <- c(
    "step_remove(svy, a, b)",
    "step_remove(svy, c, d)",
    "step_compute(svy, x = 1)"
  )
  result <- optimize_steps(steps)
  expect_length(result, 2)
  expect_match(result[1], "a.*b.*c.*d")
})

# ── optimize_steps: step_compute collapsing ──────────────────────────────────

test_that("optimize_steps collapses independent consecutive step_compute", {
  steps <- c(
    "step_compute(svy, a = x + 1)",
    "step_compute(svy, b = y * 2)"
  )
  result <- optimize_steps(steps)
  expect_length(result, 1)
  expect_match(result[1], "a = x \\+ 1")
  expect_match(result[1], "b = y \\* 2")
})

test_that("optimize_steps preserves dependent step_compute calls", {
  steps <- c(
    "step_compute(svy, a = x + 1)",
    "step_compute(svy, b = a * 2)"
  )
  result <- optimize_steps(steps)
  expect_length(result, 2)
})

test_that("optimize_steps splits chain at dependency boundary", {
  steps <- c(
    "step_compute(svy, a = x + 1)",
    "step_compute(svy, b = y + 1)",
    "step_compute(svy, c = a + b)"
  )
  result <- optimize_steps(steps)
  expect_length(result, 2)
  # First step collapses a and b (independent)
  expect_match(result[1], "a = x \\+ 1")
  expect_match(result[1], "b = y \\+ 1")
  # Second step has c (depends on a and b)
  expect_match(result[2], "c = a \\+ b")
})

test_that("optimize_steps collapses step_compute with same .by", {
  steps <- c(
    'step_compute(svy, a = mean(x), .by = "g")',
    'step_compute(svy, b = sum(y), .by = "g")'
  )
  result <- optimize_steps(steps)
  expect_length(result, 1)
  expect_match(result[1], "a = mean\\(x\\)")
  expect_match(result[1], "b = sum\\(y\\)")
  expect_match(result[1], '\\.by = "g"')
})

test_that("optimize_steps does not collapse step_compute with different .by", {
  steps <- c(
    'step_compute(svy, a = mean(x), .by = "g1")',
    'step_compute(svy, b = sum(y), .by = "g2")'
  )
  result <- optimize_steps(steps)
  expect_length(result, 2)
})

test_that("optimize_steps does not collapse step_compute with/without .by", {
  steps <- c(
    "step_compute(svy, a = x + 1)",
    'step_compute(svy, b = sum(y), .by = "g")'
  )
  result <- optimize_steps(steps)
  expect_length(result, 2)
})

test_that("optimize_steps single step_compute unchanged", {
  steps <- c("step_compute(svy, a = x + 1)")
  result <- optimize_steps(steps)
  expect_length(result, 1)
  expect_equal(result[1], steps[1])
})

test_that("optimize_steps does not collapse step_compute across other step types", {
  steps <- c(
    "step_compute(svy, a = 1)",
    'step_rename(svy, edad = "age")',
    "step_compute(svy, b = 2)"
  )
  result <- optimize_steps(steps)
  expect_length(result, 3)
})

test_that("optimize_steps handles complex step_compute expressions", {
  steps <- c(
    "step_compute(svy, x = data.table::fifelse(age > 18, income / 1000, 0))",
    "step_compute(svy, y = as.integer(status %in% c(1, 2, 3)))"
  )
  result <- optimize_steps(steps)
  expect_length(result, 1)
  expect_match(result[1], "fifelse")
  expect_match(result[1], "as\\.integer")
})

test_that("extract_output_vars finds created variables", {
  steps <- c(
    "step_compute(svy, x = 1)",
    "step_compute(svy, y = x + 2)",
    "step_recode(svy, z, x == 1 ~ 'a', .default = 'b')"
  )
  vars <- extract_output_vars(steps)
  expect_true("x" %in% vars)
  expect_true("y" %in% vars)
  expect_true("z" %in% vars)
})

# ── Task 7: _n-1/_n+1 lag/lead translation ───────────────────────────────────

test_that("translate_stata_expr handles var[_n-1] lag subscript", {
  result <- translate_stata_expr("nucleo[_n-1]")
  expect_match(result, "shift\\(nucleo,\\s*1")
  expect_match(result, 'type\\s*=\\s*"lag"')
})

test_that("translate_stata_expr handles var[_n+1] lead subscript", {
  result <- translate_stata_expr("x[_n+1]")
  expect_match(result, "shift\\(x,\\s*1")
  expect_match(result, 'type\\s*=\\s*"lead"')
})

test_that("translate_stata_expr handles _n subscript in condition", {
  # In if-clause: e30[_n-1]==3 should become shift(e30, 1, type="lag")==3
  result <- translate_stata_expr("e30[_n-1]==3")
  expect_match(result, "shift\\(e30")
  expect_match(result, "==3")
})

test_that("transpile_stata handles lag_lead fixture", {
  fixture <- system.file("stata-test-cases/lag_lead.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  # Should contain shift() calls from var[_n-1] patterns
  shift_steps <- grep("shift\\(", result$steps, value = TRUE)
  expect_true(length(shift_steps) >= 1)
  # No MANUAL_REVIEW for _N or _n patterns
  manual <- grep("MANUAL_REVIEW.*_n|MANUAL_REVIEW.*_N", result$steps,
    value = TRUE
  )
  expect_length(manual, 0)
})

# ── Task 8: _N total count translation ──────────────────────────────────────

test_that("translate_stata_expr handles standalone _N", {
  result <- translate_stata_expr("_N")
  expect_equal(result, ".N")
})

test_that("translate_stata_expr handles _N in expression", {
  result <- translate_stata_expr("income / _N")
  expect_match(result, "income / \\.N")
})

test_that("transpile produces .N for bysort gen _N pattern", {
  fixture <- system.file("stata-test-cases/lag_lead.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  # "bysort bc_correlat: gen max_nper = _N" should become step_compute with .N
  n_steps <- grep("\\.N", result$steps, value = TRUE)
  expect_true(length(n_steps) >= 1)
  # Should have .by for bysort
  by_steps <- grep('\\.by\\s*=.*"bc_correlat"', result$steps, value = TRUE)
  expect_true(length(by_steps) >= 1)
})

# ── Task 9: Variable range expansion (suma1-suma4) ──────────────────────────

test_that("expand_var_range expands var1-var4 to individual vars", {
  result <- expand_var_range("suma1-suma4")
  expect_equal(result, c("suma1", "suma2", "suma3", "suma4"))
})

test_that("expand_var_range passes through single variable", {
  expect_equal(expand_var_range("income"), "income")
})

test_that("expand_var_range handles alpha range", {
  # e.g. e51_2_1-e51_2_5 (common in ECH)
  result <- expand_var_range("e51_2_1-e51_2_5")
  expect_equal(result, paste0("e51_2_", 1:5))
})

test_that("recode with variable range expands to multiple variables", {
  fixture <- system.file("stata-test-cases/var_range.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  # recode suma1-suma4 (.=0) should reference all 4 suma variables
  all_steps <- paste(result$steps, collapse = " ")
  for (v in paste0("suma", 1:4)) {
    expect_match(all_steps, v)
  }
})

test_that("mvencode with variable range expands to individual steps", {
  fixture <- system.file("stata-test-cases/var_range.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_stata(fixture)
  # mvencode suma1-suma4, mv(0) should reference all 4 suma variables
  all_steps <- paste(result$steps, collapse = " ")
  for (v in paste0("suma", 1:4)) {
    expect_match(all_steps, paste0("is.na\\(", v, "\\)"))
  }
})

# ── Task 10: fifelse type coercion ──────────────────────────────────────────

test_that("translate_gen_block wraps fifelse init with matching type", {
  # When init=-9 and RHS are constants, fifelse(cond, "1", -9) fails type check
  # The transpiled step_recode handles this (string output)

  # But for expression chains: step_compute(svy, v = fifelse(cond, 1L, v))
  # where v may be character, we need as.numeric or as.integer wrapping
  # This test verifies numeric constants get L suffix for integer clarity
  result <- translate_stata_expr("-9")
  # Negative number should pass through unchanged
  expect_equal(result, "-9")
})

test_that("fifelse with numeric default and numeric RHS produces valid R", {
  # gen v = -9 + replace v = 1 if cond -> step_compute chain
  # fifelse(cond, 1, v) is valid when v is initialized to -9 (both numeric)
  cmds <- list(
    list(
      cmd = "gen", args = "v = -9", if_clause = NULL,
      options = NULL, by_group = NULL, raw_line = "gen v = -9",
      line_num = 1L, capture = FALSE
    ),
    list(
      cmd = "replace", args = "v = 1", if_clause = "x==1",
      options = NULL, by_group = NULL, raw_line = "replace v = 1 if x==1",
      line_num = 2L, capture = FALSE
    )
  )
  result <- translate_gen_block(cmds, 1)
  expect_true(length(result$steps) >= 1)
  # Should produce valid R that can parse
  for (s in result$steps) {
    expect_no_error(parse(text = s))
  }
})

# ── Task 11: gen with bysort prefix ─────────────────────────────────────────

test_that("gen with bysort prefix produces .by in step_compute", {
  cmd <- list(
    cmd = "gen", args = "max_nper = _N", if_clause = NULL,
    options = NULL, by_group = "bc_correlat", raw_line = "gen max_nper = _N",
    line_num = 1L, capture = FALSE
  )
  result <- translate_gen_block(list(cmd), 1)
  expect_true(length(result$steps) >= 1)
  expect_match(result$steps[[1]], "\\.by")
  expect_match(result$steps[[1]], "\\.N")
})

test_that("bare gen without = produces step_compute with NA", {
  cmds <- list(
    list(
      cmd = "gen", args = "ine_ht13", if_clause = NULL,
      options = NULL, by_group = NULL, raw_line = "gen ine_ht13",
      line_num = 1L, capture = TRUE
    )
  )
  result <- translate_gen_block(cmds, 1)
  expect_true(length(result$steps) >= 1)
  expect_match(result$steps[[1]], "ine_ht13 = NA")
})

test_that("gen with multi-var bysort produces c() in .by", {
  cmd <- list(
    cmd = "gen", args = "tot = sum(x)", if_clause = NULL,
    options = NULL, by_group = "g1 g2", raw_line = "gen tot = sum(x)",
    line_num = 1L, capture = FALSE
  )
  result <- translate_gen_block(list(cmd), 1)
  expect_true(length(result$steps) >= 1)
  expect_match(result$steps[[1]], 'c\\("g1", "g2"\\)')
})

# ── Multi-variable destring ──────────────────────────────────────────────────

test_that("destring with multiple variables produces one step per var", {
  cmd <- list(
    cmd = "destring", args = "g144_1 g261 g261_1",
    if_clause = NULL, options = "replace force",
    by_group = NULL, raw_line = "destring g144_1 g261 g261_1, replace force",
    line_num = 1L, capture = FALSE
  )
  result <- translate_destring(cmd)
  expect_true(length(result$steps) == 3)
  expect_match(result$steps[[1]], "g144_1")
  expect_match(result$steps[[2]], "g261")
  expect_match(result$steps[[3]], "g261_1")
  for (s in result$steps) {
    expect_match(s, "as.numeric")
    expect_no_error(parse(text = s))
  }
})

# ── Drop with STATA variable range ──────────────────────────────────────────

test_that("drop with STATA variable range expands to individual vars", {
  cmd <- list(
    cmd = "drop", args = "aux1-aux14_max",
    if_clause = NULL, options = NULL, by_group = NULL,
    raw_line = "drop aux1-aux14_max",
    line_num = 1L, capture = FALSE
  )
  result <- translate_drop(cmd)
  expect_true(length(result$steps) >= 1)
  step <- result$steps[[1]]
  # Should contain aux1, aux2, ..., aux14, aux1_max, ..., aux14_max
  expect_match(step, "aux1,")
  expect_match(step, "aux14")
  expect_match(step, "aux1_max")
  expect_match(step, "aux14_max")
})

test_that("expand_var_range handles suffixed ranges like aux1-aux14_max", {
  result <- expand_var_range("aux1-aux14_max")
  # Should expand: aux1..aux14 + aux1_max..aux14_max
  expect_true("aux1" %in% result)
  expect_true("aux14" %in% result)
  expect_true("aux1_max" %in% result)
  expect_true("aux14_max" %in% result)
  expect_equal(length(result), 28)
})

# ── Multi-variable recode ────────────────────────────────────────────────────

test_that("recode with multiple space-separated variables expands", {
  cmd <- list(
    cmd = "recode", args = "g144_1 g261 g261_1 (.=0)",
    if_clause = NULL, options = NULL, by_group = NULL,
    raw_line = "recode g144_1 g261 g261_1 (.=0)",
    line_num = 1L, capture = FALSE
  )
  result <- translate_recode(cmd)
  # Should produce 3 steps (one per variable)
  expect_equal(length(result$steps), 3)
  expect_match(result$steps[[1]], "g144_1")
  expect_match(result$steps[[2]], "g261[^_]")
  expect_match(result$steps[[3]], "g261_1")
})

test_that("transpile_stata_module groups files by module", {
  skip_on_cran()
  year_dir <- file.path("do_files_iecon", "2022")
  # Use absolute path from package root
  year_dir_abs <- file.path(
    system.file(package = "metasurvey"),
    "..", "..", "..", year_dir
  )
  # Fallback to relative path from working directory
  if (!dir.exists(year_dir_abs)) {
    year_dir_abs <- year_dir
  }
  skip_if_not(dir.exists(year_dir_abs),
    message = "do_files_iecon/2022 not available"
  )

  recipes <- transpile_stata_module(year_dir_abs, year = 2022)
  expect_true(is.list(recipes))
  expect_true(length(recipes) >= 1)
  # Each element should be a Recipe
  for (r in recipes) {
    expect_true(inherits(r, "Recipe"))
    expect_true(length(r$steps) > 0)
  }
})


# --- Merged from test-transpile-e2e.R ---

# End-to-end integration test: transpile do-files -> load ECH -> bake recipes
# Requires: do_files_iecon/ directory and example-data/ech/ech_2022.csv
# Skipped on CRAN and when data is not available

test_that("transpile 2022 do-files and bake sequentially on ECH data", {
  skip_on_cran()
  pkg_root <- normalizePath(test_path("..", ".."), mustWork = FALSE)
  year_dir <- file.path(pkg_root, "do_files_iecon", "2022")
  skip_if_not(dir.exists(year_dir), "do_files_iecon/2022 not available")

  ech_path <- file.path(pkg_root, "example-data", "ech", "ech_2022.csv")
  skip_if_not(file.exists(ech_path), "ECH 2022 CSV not available")

  # Step 1: Transpile do-files into recipes by module
  recipes <- transpile_stata_module(year_dir, year = 2022)
  expect_true(is.list(recipes))
  expect_true(length(recipes) >= 1)

  # Step 2: Load a SAMPLE of ECH data (first 500 rows for speed/memory)
  ech_data <- data.table::fread(ech_path, nrows = 500)
  data.table::setnames(ech_data, tolower(names(ech_data)))
  tmp_path <- tempfile(fileext = ".csv")
  on.exit(unlink(tmp_path), add = TRUE)
  data.table::fwrite(ech_data, tmp_path)
  old_engine <- getOption("metasurvey.engine")
  options(metasurvey.engine = "data.table")
  on.exit(options(metasurvey.engine = old_engine), add = TRUE)
  svy <- load_survey(
    path = tmp_path,
    svy_type = "ech",
    svy_edition = "2022",
    svy_weight = list(annual = "w_ano")
  )
  expect_true(nrow(svy$data) > 0)

  # Step 3: Apply recipes SEQUENTIALLY in module order
  module_order <- c(
    "data_prep", "demographics", "income_detail",
    "income_aggregate", "cleanup"
  )

  baked_modules <- character(0)
  total_steps_ok <- 0
  for (module_name in module_order) {
    if (!module_name %in% names(recipes)) next
    rec <- recipes[[module_name]]

    executable_steps <- rec$steps[!grepl("^#", rec$steps)]
    if (length(executable_steps) == 0) next

    # Bake steps one by one, skipping expected missing-variable errors
    eval_env <- new.env(parent = parent.frame())
    eval_env$svy <- svy
    steps_ok <- 0
    for (step_str in executable_steps) {
      tryCatch(
        {
          step_call <- parse(text = step_str)[[1]]
          if (is.call(step_call) && length(step_call) >= 2 &&
            is.name(step_call[[2]]) &&
            as.character(step_call[[2]]) == ".") {
            step_call[[2]] <- as.name("svy")
          }
          eval_env$svy <- eval(step_call, envir = eval_env)
          steps_ok <- steps_ok + 1
        },
        error = function(e) {
          msg <- conditionMessage(e)
          # Skip expected errors:
          # - missing variables (orchestrator creates them)
          # - parse errors (untranslated STATA syntax like _n, _N)
          # - undefined columns
          is_expected <- grepl(
            paste0(
              "not found|not exist|Variables to rename|",
              "undefined columns|cannot remove|object.*not found|",
              "unexpected symbol|unexpected '='|parse error|",
              "type logical but|type character but|type double but|",
              "type integer but|same type"
            ),
            msg
          )
          if (!is_expected) {
            fail(sprintf(
              "Module '%s' unexpected error: %s\nStep: %s",
              module_name, msg, substr(step_str, 1, 120)
            ))
          }
        }
      )
    }
    svy <- eval_env$svy

    if (steps_ok > 0) {
      baked_modules <- c(baked_modules, module_name)
      total_steps_ok <- total_steps_ok + steps_ok
    }
  }

  # At least some modules and steps should have baked successfully
  expect_true(
    length(baked_modules) >= 2,
    info = paste("Baked modules:", paste(baked_modules, collapse = ", "))
  )
  expect_true(total_steps_ok >= 50,
    info = paste("Total steps baked:", total_steps_ok)
  )
})

test_that("transpile single do-file produces valid steps for bake", {
  skip_on_cran()
  fixture <- system.file("stata-test-cases/gen_replace.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))

  result <- transpile_stata(fixture)
  expect_true(length(result$steps) > 0)

  # Create survey with the required source variables
  svy <- make_test_survey(n = 4)
  dt <- svy$data
  dt[, e26 := c(1, 2, 1, 2)]
  dt[, e27 := c(25, 30, 45, 50)]
  dt[, e30 := c(1, 2, 3, 7)]
  dt[, region_4 := c(1, 2, 3, 4)]
  svy <- set_data(svy, dt)

  rec <- Recipe$new(
    id = "test_gen_replace",
    name = "Test gen_replace",
    user = "test",
    edition = "2022",
    survey_type = "ech",
    default_engine = "data.table",
    depends_on = list(),
    description = "Test fixture",
    steps = result$steps,
    topic = "test"
  )
  svy <- add_recipe(svy, rec, bake = FALSE)
  baked <- bake_recipes(svy)

  expect_true(inherits(baked, "Survey"))
  expect_true("bc_pe4" %in% names(baked$data))
  expect_true("bc_pe2" %in% names(baked$data))
  expect_true("bc_pe3" %in% names(baked$data))
  # Check recode values (step_recode produces character/factor)
  # e30=c(1,2,3,7): last matching condition wins (sequential overwrite)
  expect_equal(as.character(baked$data$bc_pe4), c("1", "2", "3", "5"))
  expect_equal(baked$data$bc_pe2, c(1, 2, 1, 2))
})

# --- translate_commands, translate_replace, translate_keep, etc. ---

test_that("translate_commands skips label and sort commands", {
  cmds <- list(
    list(
      cmd = "sort", args = "id", if_clause = NULL, options = NULL,
      by_group = NULL, raw_line = "sort id", line_num = 1L, capture = FALSE
    ),
    list(
      cmd = "label", args = "variable x 'test'", if_clause = NULL,
      options = NULL, by_group = NULL, raw_line = "label variable x 'test'",
      line_num = 2L, capture = FALSE
    ),
    list(
      cmd = "gen", args = "y = 1", if_clause = NULL, options = NULL,
      by_group = NULL, raw_line = "gen y = 1", line_num = 3L, capture = FALSE
    )
  )
  result <- translate_commands(cmds)
  expect_equal(result$stats$skipped, 2L)
  expect_equal(result$stats$translated, 1L)
  expect_true(length(result$steps) == 1)
})

test_that("translate_commands emits MANUAL_REVIEW for unhandled commands", {
  cmds <- list(
    list(
      cmd = "regress", args = "y x1 x2", if_clause = NULL, options = NULL,
      by_group = NULL, raw_line = "regress y x1 x2", line_num = 1L,
      capture = FALSE
    )
  )
  result <- translate_commands(cmds, strict = FALSE)
  expect_equal(result$stats$manual_review, 1L)
  expect_true(length(result$warnings) == 1)
  expect_match(result$warnings[1], "MANUAL_REVIEW")
})

test_that("translate_commands strict mode errors on unhandled command", {
  cmds <- list(
    list(
      cmd = "regress", args = "y x1 x2", if_clause = NULL, options = NULL,
      by_group = NULL, raw_line = "regress y x1 x2", line_num = 1L,
      capture = FALSE
    )
  )
  expect_error(translate_commands(cmds, strict = TRUE), "MANUAL_REVIEW")
})

test_that("translate_replace standalone without if clause", {
  cmd <- list(
    cmd = "replace", args = "x = 0", if_clause = NULL, options = NULL,
    by_group = NULL, raw_line = "replace x = 0", line_num = 1L, capture = FALSE
  )
  result <- translate_replace(cmd)
  expect_true(length(result$steps) == 1)
  expect_match(result$steps[[1]], "step_compute.*x = 0")
  expect_no_error(parse(text = result$steps[[1]]))
})

test_that("translate_replace with if clause uses fifelse", {
  cmd <- list(
    cmd = "replace", args = "x = 1", if_clause = "age > 30",
    options = NULL, by_group = NULL, raw_line = "replace x = 1 if age > 30",
    line_num = 1L, capture = FALSE
  )
  result <- translate_replace(cmd)
  expect_true(length(result$steps) == 1)
  expect_match(result$steps[[1]], "fifelse")
  expect_match(result$steps[[1]], "age > 30")
})

test_that("translate_replace returns NULL for unparseable args", {
  cmd <- list(
    cmd = "replace", args = "", if_clause = NULL, options = NULL,
    by_group = NULL, raw_line = "replace", line_num = 1L, capture = FALSE
  )
  result <- translate_replace(cmd)
  expect_null(result)
})

test_that("translate_keep emits MANUAL_REVIEW", {
  cmd <- list(
    cmd = "keep", args = "id age income", if_clause = NULL, options = NULL,
    by_group = NULL, raw_line = "keep id age income", line_num = 1L,
    capture = FALSE
  )
  result <- translate_keep(cmd)
  expect_true(length(result$steps) == 1)
  expect_match(result$steps[[1]], "MANUAL_REVIEW")
  expect_match(result$steps[[1]], "keep")
})

test_that("translate_tostring produces as.character step", {
  cmd <- list(
    cmd = "tostring", args = "edad", if_clause = NULL, options = NULL,
    by_group = NULL, raw_line = "tostring edad", line_num = 1L, capture = FALSE
  )
  result <- translate_tostring(cmd)
  expect_true(length(result$steps) == 1)
  expect_match(result$steps[[1]], "as.character\\(edad\\)")
  expect_no_error(parse(text = result$steps[[1]]))
})

test_that("translate_tostring strips options", {
  cmd <- list(
    cmd = "tostring", args = "x, replace force", if_clause = NULL,
    options = "replace force", by_group = NULL,
    raw_line = "tostring x, replace force", line_num = 1L, capture = FALSE
  )
  result <- translate_tostring(cmd)
  expect_match(result$steps[[1]], "step_compute.*x = as.character\\(x\\)")
})

test_that("translate_merge emits MANUAL_REVIEW", {
  cmd <- list(
    cmd = "merge", args = "1:1 id using data2.dta", if_clause = NULL,
    options = NULL, by_group = NULL,
    raw_line = "merge 1:1 id using data2.dta", line_num = 1L, capture = FALSE
  )
  result <- translate_merge(cmd)
  expect_true(length(result$steps) == 1)
  expect_match(result$steps[[1]], "MANUAL_REVIEW")
  expect_match(result$steps[[1]], "merge")
})

test_that("translate_rename returns NULL for single token", {
  cmd <- list(
    cmd = "rename", args = "only_one", if_clause = NULL, options = NULL,
    by_group = NULL, raw_line = "rename only_one", line_num = 1L,
    capture = FALSE
  )
  result <- translate_rename(cmd)
  expect_null(result)
})

test_that("translate_rename produces step_rename", {
  cmd <- list(
    cmd = "rename", args = "old_name new_name", if_clause = NULL,
    options = NULL, by_group = NULL, raw_line = "rename old_name new_name",
    line_num = 1L, capture = FALSE
  )
  result <- translate_rename(cmd)
  expect_true(length(result$steps) == 1)
  expect_match(result$steps[[1]], 'step_rename.*new_name.*=.*"old_name"')
})

# --- extract_input_vars, build_doc_from_steps, transpile_coverage, filter_labels ---

test_that("extract_input_vars finds referenced variables excluding outputs", {
  steps <- c(
    "step_compute(svy, z = age + income)",
    "step_compute(svy, w = z * 2)"
  )
  inputs <- extract_input_vars(steps)
  # age, income are inputs (not created by any step)
  expect_true("age" %in% inputs)
  expect_true("income" %in% inputs)
  # z is created by step 1, so NOT an input
  expect_false("z" %in% inputs)
  # svy, step_compute are stripped as known non-variables
  expect_false("svy" %in% inputs)
})

test_that("extract_input_vars skips comment lines", {
  steps <- c(
    "# MANUAL_REVIEW: some comment",
    "step_compute(svy, x = age + 1)"
  )
  inputs <- extract_input_vars(steps)
  expect_true("age" %in% inputs)
})

test_that("build_doc_from_steps produces pipeline with compute steps", {
  steps <- c(
    "step_compute(svy, x = age + 1)",
    "step_compute(svy, y = x * 2)"
  )
  doc <- build_doc_from_steps(steps)
  expect_true(is.list(doc))
  expect_true("input_variables" %in% names(doc))
  expect_true("output_variables" %in% names(doc))
  expect_true("pipeline" %in% names(doc))
  expect_true("x" %in% doc$output_variables)
  expect_true("y" %in% doc$output_variables)
  expect_true("age" %in% doc$input_variables)
  expect_equal(length(doc$pipeline), 2)
  expect_equal(doc$pipeline[[1]]$type, "compute")
})

test_that("build_doc_from_steps handles recode steps", {
  steps <- c(
    "step_recode(svy, age_cat, age < 30 ~ 'young', .default = 'old')"
  )
  doc <- build_doc_from_steps(steps)
  expect_equal(doc$pipeline[[1]]$type, "recode")
  expect_true("age_cat" %in% doc$output_variables)
})

test_that("build_doc_from_steps handles rename steps", {
  steps <- c(
    'step_rename(svy, edad = "age", sexo = "sex")'
  )
  doc <- build_doc_from_steps(steps)
  expect_equal(doc$pipeline[[1]]$type, "rename")
  expect_true("edad" %in% doc$output_variables)
  expect_true("sexo" %in% doc$output_variables)
})

test_that("build_doc_from_steps handles remove steps", {
  steps <- c(
    "step_remove(svy, tmp1, tmp2)"
  )
  doc <- build_doc_from_steps(steps)
  expect_equal(doc$pipeline[[1]]$type, "remove")
  expect_true("tmp1" %in% doc$output_variables)
})

test_that("build_doc_from_steps skips comment lines", {
  steps <- c(
    "# MANUAL_REVIEW: skip this",
    "step_compute(svy, x = 1)"
  )
  doc <- build_doc_from_steps(steps)
  expect_equal(length(doc$pipeline), 1)
})

test_that("transpile_coverage reports on a single .do file", {
  fixture <- system.file("stata-test-cases/gen_replace.do",
    package = "metasurvey"
  )
  skip_if_not(file.exists(fixture))
  result <- transpile_coverage(fixture)
  expect_true(is.data.frame(result))
  expect_true("coverage_pct" %in% names(result))
  expect_true("TOTAL" %in% result$file)
  expect_true(nrow(result) >= 2) # file row + TOTAL row
  expect_true(result$translated[1] >= 1)
})

test_that("transpile_coverage reports on a directory", {
  fixture_dir <- system.file("stata-test-cases", package = "metasurvey")
  skip_if_not(dir.exists(fixture_dir))
  result <- transpile_coverage(fixture_dir)
  expect_true(is.data.frame(result))
  # Should have one row per .do file plus TOTAL
  n_do <- length(list.files(fixture_dir, "\\.do$"))
  expect_equal(nrow(result), n_do + 1)
})

test_that("transpile_coverage errors on non-existent path", {
  expect_error(transpile_coverage("/nonexistent/path"), "Path not found")
})

test_that("filter_labels filters to specified variables", {
  labels <- list(
    var_labels = list(age = "Age", sex = "Sex", income = "Income"),
    val_labels = list(sex = list("1" = "M", "2" = "F"), region = list("1" = "N"))
  )
  filtered <- filter_labels(labels, c("age", "sex"))
  expect_equal(names(filtered$var_labels), c("age", "sex"))
  expect_equal(names(filtered$val_labels), "sex")
})

# --- translate_gen_block edge cases, recode range ---

test_that("translate_gen_block with if clause (no replace) uses fifelse", {
  cmd <- list(
    cmd = "gen", args = "eligible = 1", if_clause = "age >= 18",
    options = NULL, by_group = NULL, raw_line = "gen eligible = 1 if age >= 18",
    line_num = 1L, capture = FALSE
  )
  result <- translate_gen_block(list(cmd), 1)
  expect_true(length(result$steps) >= 1)
  expect_match(result$steps[[1]], "fifelse")
  expect_match(result$steps[[1]], "age >= 18")
  expect_match(result$steps[[1]], "eligible")
  expect_no_error(parse(text = result$steps[[1]]))
})

test_that("translate_gen_block gen+replace all constants produces step_recode", {
  cmds <- list(
    list(
      cmd = "gen", args = "cat = 0", if_clause = NULL,
      options = NULL, by_group = NULL, raw_line = "gen cat = 0",
      line_num = 1L, capture = FALSE
    ),
    list(
      cmd = "replace", args = "cat = 1", if_clause = "age < 30",
      options = NULL, by_group = NULL, raw_line = "replace cat = 1 if age < 30",
      line_num = 2L, capture = FALSE
    ),
    list(
      cmd = "replace", args = "cat = 2", if_clause = "age >= 30 & age < 65",
      options = NULL, by_group = NULL,
      raw_line = "replace cat = 2 if age >= 30 & age < 65",
      line_num = 3L, capture = FALSE
    )
  )
  result <- translate_gen_block(cmds, 1)
  expect_true(length(result$steps) >= 1)
  expect_match(result$steps[[1]], "step_recode")
  expect_match(result$steps[[1]], ".default")
  expect_equal(result$advance, 3)
})

test_that("translate_gen_block gen+replace mixed (expression RHS) uses fifelse chain", {
  cmds <- list(
    list(
      cmd = "gen", args = "ratio = 0", if_clause = NULL,
      options = NULL, by_group = NULL, raw_line = "gen ratio = 0",
      line_num = 1L, capture = FALSE
    ),
    list(
      cmd = "replace", args = "ratio = income / 1000", if_clause = "age > 18",
      options = NULL, by_group = NULL,
      raw_line = "replace ratio = income / 1000 if age > 18",
      line_num = 2L, capture = FALSE
    )
  )
  result <- translate_gen_block(cmds, 1)
  expect_true(length(result$steps) >= 2) # init + fifelse
  expect_match(result$steps[[1]], "step_compute.*ratio = 0")
  expect_match(result$steps[[2]], "fifelse")
  for (s in result$steps) expect_no_error(parse(text = s))
})

test_that("translate_gen_block bare gen with byte type prefix", {
  cmds <- list(
    list(
      cmd = "gen", args = "byte flag", if_clause = NULL,
      options = NULL, by_group = NULL, raw_line = "gen byte flag",
      line_num = 1L, capture = TRUE
    )
  )
  result <- translate_gen_block(cmds, 1)
  expect_true(length(result$steps) >= 1)
  expect_match(result$steps[[1]], "flag = NA")
})

test_that("translate_recode_single handles range mapping (23/38=22)", {
  parsed <- list(
    var_name = "edad",
    gen_var = NULL,
    mappings = list(
      list(from_range = c(23, 38), to = "22"),
      list(from = ".", to = "0")
    )
  )
  steps <- translate_recode_single(parsed)
  expect_true(length(steps) >= 2)
  expect_match(steps[1], "edad >= 23")
  expect_match(steps[1], "edad <= 38")
  expect_match(steps[2], "is.na")
})

test_that("translate_recode_single handles multi-value from", {
  parsed <- list(
    var_name = "x",
    gen_var = NULL,
    mappings = list(
      list(from = c("1", "2", "3"), to = "99")
    )
  )
  steps <- translate_recode_single(parsed)
  expect_true(length(steps) >= 1)
  expect_match(steps[1], "%in%")
  expect_match(steps[1], "c\\(1, 2, 3\\)")
})

test_that("translate_recode_single with gen() copies variable first", {
  parsed <- list(
    var_name = "source",
    gen_var = "target",
    mappings = list(
      list(from = c("1"), to = "99")
    )
  )
  steps <- translate_recode_single(parsed)
  # First step should be copy: step_compute(svy, target = source)
  expect_match(steps[1], "target = source")
  # Second step should be the recode
  expect_match(steps[2], "target.*fifelse")
})

test_that("translate_drop with if clause emits MANUAL_REVIEW", {
  cmd <- list(
    cmd = "drop", args = "x", if_clause = "age < 18", options = NULL,
    by_group = NULL, raw_line = "drop if age < 18", line_num = 1L,
    capture = FALSE
  )
  result <- translate_drop(cmd)
  expect_match(result$steps[[1]], "MANUAL_REVIEW")
  expect_match(result$steps[[1]], "observation deletion")
})

# ── Additional transpile coverage push ────────────────────────────────────────

test_that("translate_egen without by-group produces simple step_compute", {
  cmd <- list(
    cmd = "egen", args = "total_inc = sum(income)", if_clause = NULL,
    options = NULL, by_group = NULL,
    raw_line = "egen total_inc = sum(income)", line_num = 1L, capture = FALSE
  )
  result <- translate_egen(cmd)
  expect_match(result$steps, "step_compute")
  expect_match(result$steps, "total_inc")
  expect_match(result$steps, "sum.*income.*na.rm = TRUE")
  # No .by argument
  expect_false(grepl("\\.by", result$steps))
})

test_that("translate_egen with multiple by-group vars produces c() for .by", {
  cmd <- list(
    cmd = "egen", args = "mean_age = mean(age)", if_clause = NULL,
    options = NULL, by_group = "region status",
    raw_line = "bysort region status: egen mean_age = mean(age)",
    line_num = 1L, capture = FALSE
  )
  result <- translate_egen(cmd)
  expect_match(result$steps, "\\.by = c\\(")
  expect_match(result$steps, '"region"')
  expect_match(result$steps, '"status"')
})

test_that("translate_egen with unknown function passes through", {
  cmd <- list(
    cmd = "egen", args = "val = rowtotal(a b c)", if_clause = NULL,
    options = NULL, by_group = NULL,
    raw_line = "egen val = rowtotal(a b c)", line_num = 1L, capture = FALSE
  )
  result <- translate_egen(cmd)
  expect_match(result$steps, "rowtotal\\(a b c\\)")
})

test_that("translate_egen returns NULL for invalid egen syntax", {
  cmd <- list(
    cmd = "egen", args = "no_equals_sign", if_clause = NULL,
    options = NULL, by_group = NULL,
    raw_line = "egen no_equals_sign", line_num = 1L, capture = FALSE
  )
  expect_null(translate_egen(cmd))
})

test_that("translate_destring produces as.numeric conversion", {
  cmd <- list(
    cmd = "destring", args = "income_str", if_clause = NULL,
    options = "replace", by_group = NULL,
    raw_line = "destring income_str, replace", line_num = 1L, capture = FALSE
  )
  result <- translate_destring(cmd)
  expect_match(result$steps, "step_compute")
  expect_match(result$steps, "as\\.numeric.*as\\.character.*income_str")
})

test_that("translate_destring with force uses suppressWarnings", {
  cmd <- list(
    cmd = "destring", args = "x", if_clause = NULL,
    options = "replace force", by_group = NULL,
    raw_line = "destring x, replace force", line_num = 1L, capture = FALSE
  )
  result <- translate_destring(cmd)
  expect_match(result$steps, "suppressWarnings")
})

test_that("translate_mvencode handles multiple variables with range", {
  cmd <- list(
    cmd = "mvencode", args = "a b c", if_clause = NULL,
    options = "mv(-99)", by_group = NULL,
    raw_line = "mvencode a b c, mv(-99)", line_num = 1L, capture = FALSE
  )
  result <- translate_mvencode(cmd)
  expect_length(result$steps, 3)
  expect_match(result$steps[1], "fifelse.*is\\.na.*a.*-99")
  expect_match(result$steps[2], "fifelse.*is\\.na.*b.*-99")
})

test_that("translate_drop with empty args returns NULL", {
  cmd <- list(
    cmd = "drop", args = "", if_clause = NULL, options = NULL,
    by_group = NULL, raw_line = "drop", line_num = 1L, capture = FALSE
  )
  result <- translate_drop(cmd)
  expect_null(result)
})

test_that("translate_drop with variable range expands correctly", {
  cmd <- list(
    cmd = "drop", args = "aux1-aux3", if_clause = NULL, options = NULL,
    by_group = NULL, raw_line = "drop aux1-aux3", line_num = 1L, capture = FALSE
  )
  result <- translate_drop(cmd)
  expect_match(result$steps, "step_remove")
  expect_match(result$steps, "aux1.*aux2.*aux3")
})

test_that("translate_gen_block with gen followed by non-replace breaks lookahead", {
  cmds <- list(
    list(
      cmd = "gen", args = "x = 1", if_clause = NULL, options = NULL,
      by_group = NULL, raw_line = "gen x = 1", line_num = 1L, capture = FALSE
    ),
    list(
      cmd = "drop", args = "y", if_clause = NULL, options = NULL,
      by_group = NULL, raw_line = "drop y", line_num = 2L, capture = FALSE
    )
  )
  result <- translate_gen_block(cmds, 1)
  expect_equal(result$advance, 1) # only consumed the gen, not the drop
  expect_match(result$steps, "step_compute.*x = 1")
})

test_that("translate_gen_block with invalid gen returns NULL", {
  cmds <- list(
    list(
      cmd = "gen", args = "bad bad bad = =", if_clause = NULL,
      options = NULL, by_group = NULL, raw_line = "gen bad bad bad = =",
      line_num = 1L, capture = FALSE
    )
  )
  # parse_gen_args will return something because there's an = sign
  # Let's use a case that truly returns NULL: no = sign and not matching bare var
  cmds2 <- list(
    list(
      cmd = "gen", args = "", if_clause = NULL, options = NULL,
      by_group = NULL, raw_line = "gen", line_num = 1L, capture = FALSE
    )
  )
  result <- translate_gen_block(cmds2, 1)
  expect_null(result)
})

test_that("translate_gen_block recode path with quoted string constants", {
  cmds <- list(
    list(
      cmd = "gen", args = 'status = "unknown"', if_clause = NULL,
      options = NULL, by_group = NULL, raw_line = 'gen status = "unknown"',
      line_num = 1L, capture = FALSE
    ),
    list(
      cmd = "replace", args = 'status = "active"', if_clause = "flag == 1",
      options = NULL, by_group = NULL,
      raw_line = 'replace status = "active" if flag == 1',
      line_num = 2L, capture = FALSE
    ),
    list(
      cmd = "replace", args = 'status = "inactive"', if_clause = "flag == 0",
      options = NULL, by_group = NULL,
      raw_line = 'replace status = "inactive" if flag == 0',
      line_num = 3L, capture = FALSE
    )
  )
  result <- translate_gen_block(cmds, 1)
  # All RHS are quoted strings (constants), so should produce step_recode
  expect_match(result$steps, "step_recode")
  expect_equal(result$advance, 3)
})

test_that("translate_recode with multiple variables", {
  cmd <- list(
    cmd = "recode", args = "v1 v2 v3 (.=0)", if_clause = NULL,
    options = NULL, by_group = NULL,
    raw_line = "recode v1 v2 v3 (.=0)", line_num = 1L, capture = FALSE
  )
  result <- translate_recode(cmd)
  expect_true(length(result$steps) == 3)
})

test_that("build_doc_from_steps handles rename and remove step types", {
  steps <- c(
    'step_rename(svy, new_name = "old_name")',
    "step_remove(svy, drop_var1, drop_var2)"
  )
  doc <- build_doc_from_steps(steps)
  expect_true("new_name" %in% doc$output_variables)
  expect_true("drop_var1" %in% doc$output_variables)
  expect_equal(length(doc$pipeline), 2)
})

# ── transpile_stata_module with temp .do files ────────────────────────────────

test_that("transpile_stata_module errors on missing directory", {
  expect_error(transpile_stata_module("/nonexistent/dir", 2023), "Directory not found")
})

test_that("transpile_stata_module processes a year directory with .do files", {
  year_dir <- file.path(tempdir(), "test_module_2023")
  dir.create(year_dir, showWarnings = FALSE)
  on.exit(unlink(year_dir, recursive = TRUE))

  # Create data_prep module file (matches "2_correc")
  writeLines(c(
    "gen bc_anio = 2023",
    "gen bc_mes = 1",
    "rename id bc_correlat"
  ), file.path(year_dir, "2_correcciones.do"))

  # Create demographics module file (matches "3_compat")
  writeLines(c(
    "gen bc_pe2 = e26",
    "gen bc_pe3 = e27",
    "replace bc_pe3 = 1 if e30 == 1",
    "replace bc_pe3 = 2 if e30 == 2"
  ), file.path(year_dir, "3_compatibiliza.do"))

  # Create a label file
  writeLines(c(
    'label variable bc_pe2 "Sexo"',
    'label define pe2l 1 "Hombre" 2 "Mujer"',
    "label values bc_pe2 pe2l"
  ), file.path(year_dir, "label_pe2.do"))

  result <- transpile_stata_module(year_dir, 2023)
  expect_true(is.list(result))
  expect_true("data_prep" %in% names(result))
  expect_true("demographics" %in% names(result))

  # Each module should be a Recipe
  expect_true(inherits(result$data_prep, "Recipe"))
  expect_true(inherits(result$demographics, "Recipe"))

  # Steps should be populated
  expect_true(length(result$data_prep$steps) > 0)
  expect_true(length(result$demographics$steps) > 0)

  # Demographics recipe should depend on data_prep
  expect_true("data_prep" %in%
    sub("^ech_2023_", "", result$demographics$depends_on_recipes))
})

test_that("transpile_stata_module with output_dir writes JSON files", {
  year_dir <- file.path(tempdir(), "test_module_out")
  out_dir <- file.path(tempdir(), "test_recipes_out")
  dir.create(year_dir, showWarnings = FALSE)
  on.exit({
    unlink(year_dir, recursive = TRUE)
    unlink(out_dir, recursive = TRUE)
  })

  writeLines(c(
    "gen x = 1",
    "gen y = x + 1"
  ), file.path(year_dir, "2_correcciones.do"))

  result <- transpile_stata_module(year_dir, 2023, output_dir = out_dir)
  expect_true(dir.exists(out_dir))
  json_files <- list.files(out_dir, pattern = "\\.json$")
  expect_true(length(json_files) > 0)
})

test_that("transpile_stata works on temp .do file with mixed commands", {
  tmp <- tempfile(fileext = ".do")
  on.exit(unlink(tmp))
  writeLines(c(
    "gen x = 1",
    "replace x = 2 if y == 1",
    "replace x = 3 if y == 2",
    "recode z (1=10) (2=20)",
    "drop aux1 aux2",
    "rename old_name new_name",
    "destring income_str, replace force",
    "tostring age",
    "mvencode a b, mv(0)",
    "bysort region: egen mean_inc = mean(income)",
    "keep important_var",
    "merge 1:1 id using other_data",
    "tab status"
  ), tmp)

  result <- transpile_stata(tmp)
  expect_true(length(result$steps) > 0)
  expect_true(length(result$stats) > 0)
  expect_true(result$stats$translated > 0)
  expect_true(result$stats$skipped > 0) # tab is skipped
})

test_that("transpile_stata with labels extracts label info", {
  tmp <- tempfile(fileext = ".do")
  on.exit(unlink(tmp))
  writeLines(c(
    "gen x = 1",
    'label variable x "My variable"',
    'label define xl 1 "One" 2 "Two"',
    "label values x xl"
  ), tmp)

  result <- transpile_stata(tmp)
  expect_true(length(result$steps) > 0)
  expect_equal(result$labels$var_labels$x, "My variable")
  expect_equal(result$labels$val_labels$x[["1"]], "One")
})