# Integration tests - end-to-end pipelines using in-memory data test_that("Pipeline: step_compute -> bake -> step_recode", { s <- make_test_survey(n = 50) # Compute and bake first s2 <- step_compute(s, income_double = income * 2) s2 <- bake_steps(s2) expect_true("income_double" %in% names(s2$data)) expect_equal(s2$data$income_double, s2$data$income * 2) # Then recode (recode applies immediately with use_copy=TRUE) s3 <- step_recode(s2, income_cat, income > 3000 ~ "high", income > 1500 ~ "medium", .default = "low" ) expect_true("income_cat" %in% names(s3$data)) expect_true(all(s3$data$income_cat %in% c("high", "medium", "low"))) }) test_that("Pipeline: step_remove + step_rename chain", { s <- make_test_survey() s2 <- step_remove(s, status) s2 <- step_rename(s2, edad = age) s2 <- bake_steps(s2) expect_false("status" %in% names(s2$data)) expect_true("edad" %in% names(s2$data)) expect_false("age" %in% names(s2$data)) }) test_that("Pipeline: step_join with external data", { s <- make_test_survey() lookup <- data.frame(region = 1:4, region_name = c("North", "South", "East", "West")) s2 <- step_join(s, lookup, by = "region", type = "left") s2 <- bake_steps(s2) expect_true("region_name" %in% names(s2$data)) expect_equal(nrow(s2$data), 10) }) test_that("Recipe creation and application end-to-end", { s <- make_test_survey() r <- recipe( name = "Integration test recipe", user = "test_user", svy = s, description = "Test recipe for integration", step_remove(s, status) ) expect_s3_class(r, "Recipe") expect_equal(r$name, "Integration test recipe") # Save and read back tmp <- tempfile(fileext = ".json") on.exit(unlink(tmp), add = TRUE) expect_message(save_recipe(r, tmp), "saved") expect_true(file.exists(tmp)) }) test_that("Full pipeline with simulated survey data", { svy <- make_test_survey() svy <- step_compute(svy, z = x + y, comment = "sum") svy <- bake_steps(svy) expect_true("z" %in% names(get_data(svy))) r <- recipe( name = "integration", user = "tester", svy = svy, description = "Integration test" ) expect_s3_class(r, "Recipe") result <- workflow( list(svy), survey::svymean(~x, na.rm = TRUE), estimation_type = "annual" ) expect_true(nrow(result) > 0) expect_true("value" %in% names(result)) }) # --- Long pipeline integration tests (memory fix validation) --- test_that("Long pipeline: 25 steps -> bake -> workflow end-to-end", { svy <- make_test_survey(n = 100) # 10 compute steps svy <- step_compute(svy, inc2 = income * 2) svy <- step_compute(svy, inc3 = income * 3) svy <- step_compute(svy, inc_log = log(income + 1)) svy <- step_compute(svy, age_sq = age^2) svy <- step_compute(svy, ratio_xy = x / (y + 1)) svy <- step_compute(svy, combined = x + y + age) svy <- step_compute(svy, flag_high = as.integer(income > 3000)) svy <- step_compute(svy, age_norm = age / max(age)) svy <- step_compute(svy, inc_diff = inc2 - income) svy <- step_compute(svy, score = x * 10 + y) # 5 recode steps svy <- step_recode(svy, inc_cat, income < 2000 ~ "low", income < 4000 ~ "mid", .default = "high" ) svy <- step_recode(svy, age_grp, age < 25 ~ "young", age < 45 ~ "adult", .default = "senior" ) svy <- step_recode(svy, region_lbl, region == 1 ~ "A", region == 2 ~ "B", region == 3 ~ "C", .default = "D" ) svy <- step_recode(svy, status_lbl, status == 1 ~ "active", status == 2 ~ "inactive", .default = "other" ) svy <- step_recode(svy, flag_lbl, flag_high == 1 ~ "yes", .default = "no" ) # 2 rename (avoid renaming weight column 'w' which breaks design) svy <- step_rename(svy, identifier = id) svy <- step_rename(svy, years_old = age) # Verify 17 lazy steps recorded expect_length(get_steps(svy), 17) # Bake all baked <- bake_steps(svy) dt <- get_data(baked) # Verify computed columns exist and are correct expect_equal(dt$inc2, dt$income * 2) expect_equal(dt$inc3, dt$income * 3) # age was renamed to years_old, so use years_old for comparison expect_equal(dt$age_sq, dt$years_old^2) expect_equal(dt$inc_diff, dt$inc2 - dt$income) # Verify recoded columns exist expect_true("inc_cat" %in% names(dt)) expect_true("age_grp" %in% names(dt)) expect_true("region_lbl" %in% names(dt)) # Verify renames applied expect_true("identifier" %in% names(dt)) expect_true("years_old" %in% names(dt)) expect_false("id" %in% names(dt)) expect_false("age" %in% names(dt)) # All steps baked for (step in get_steps(baked)) { expect_true(step$bake) } # Design should be valid after bake expect_true(baked$design_initialized) # Workflow should work on baked survey with renamed weight result <- workflow( list(baked), survey::svymean(~income, na.rm = TRUE), estimation_type = "annual" ) expect_true(nrow(result) > 0) expect_true("value" %in% names(result)) }) test_that("RotativePanelSurvey: steps on implantation + follow-ups", { # Build panel with follow-ups impl <- make_test_survey(n = 30) impl_data <- get_data(impl) impl_data[, `:=`(mes = 1, anio = 2023, numero = id)] impl$periodicity <- "monthly" fu1 <- make_test_survey(n = 30) fu1_data <- get_data(fu1) fu1_data[, `:=`(mes = 2, anio = 2023, numero = id)] fu2 <- make_test_survey(n = 30) fu2_data <- get_data(fu2) fu2_data[, `:=`(mes = 3, anio = 2023, numero = id)] panel <- RotativePanelSurvey$new( implantation = impl, follow_up = list(fu1, fu2), type = "ech", default_engine = "data.table", steps = list(), recipes = list(), workflows = list(), design = NULL ) # Apply steps to all levels panel <- step_compute(panel, z = x + y, .level = "auto") panel <- step_compute(panel, z2 = z * 2, .level = "auto") panel <- step_recode(panel, age_cat, age < 30 ~ "young", .default = "old", .level = "auto" ) # Bake baked <- bake_steps(panel) # Verify implantation impl_dt <- get_data(baked$implantation) expect_true("z" %in% names(impl_dt)) expect_true("z2" %in% names(impl_dt)) expect_true("age_cat" %in% names(impl_dt)) expect_equal(impl_dt$z, impl_dt$x + impl_dt$y) # Verify both follow-ups for (i in seq_along(baked$follow_up)) { fu_dt <- get_data(baked$follow_up[[i]]) expect_true("z" %in% names(fu_dt), info = paste("follow_up", i, "should have z") ) expect_true("z2" %in% names(fu_dt), info = paste("follow_up", i, "should have z2") ) expect_equal(fu_dt$z, fu_dt$x + fu_dt$y, info = paste("follow_up", i, "z values should be correct") ) } }) test_that("use_copy=TRUE vs FALSE produce identical numeric results", { old_copy <- use_copy_default() on.exit(set_use_copy(old_copy), add = TRUE) # Build identical pipelines under both modes run_pipeline <- function() { svy <- make_test_survey(n = 50) svy <- step_compute(svy, z = x + y) svy <- step_compute(svy, z2 = z * income) svy <- step_compute(svy, log_inc = log(income + 1)) svy <- step_recode(svy, age_cat, age < 30 ~ "young", age >= 30 & age < 50 ~ "adult", .default = "senior" ) svy <- step_recode(svy, inc_cat, income < 2500 ~ "low", .default = "high" ) baked <- bake_steps(svy) get_data(baked) } set_use_copy(TRUE) dt_copy <- run_pipeline() set_use_copy(FALSE) dt_inplace <- run_pipeline() # Numeric columns must match exactly expect_equal(dt_copy$z, dt_inplace$z) expect_equal(dt_copy$z2, dt_inplace$z2) expect_equal(dt_copy$log_inc, dt_inplace$log_inc) expect_equal(dt_copy$age_cat, dt_inplace$age_cat) expect_equal(dt_copy$inc_cat, dt_inplace$inc_cat) }) # --- Merged from test-integration-comparison.R --- test_that("step_compute produces same results as base R transformation", { set_use_copy(TRUE) # Create test survey s <- make_test_survey(n = 100) original_data <- get_data(s) # Apply transformation with metasurvey s_transformed <- s %>% step_compute(age_squared = age^2) ms_data <- get_data(s_transformed) # Apply same transformation directly direct_result <- original_data[, age_squared := age^2] # Compare results expect_equal(ms_data$age_squared, direct_result$age_squared) }) test_that("data transformations can be applied and verified", { set_use_copy(TRUE) s <- make_test_survey(n = 100) original_data <- data.table::copy(get_data(s)) # Apply compute transformation s_transformed <- s %>% step_compute(age_group = ifelse(age < 30, 1, 0)) ms_data <- get_data(s_transformed) # Apply same transformation directly direct_result <- original_data[, age_group := ifelse(age < 30, 1, 0)] # Compare results expect_equal(ms_data$age_group, direct_result$age_group) }) test_that("svymean produces consistent results", { s <- make_test_survey(n = 200) # Create design directly with survey package des_survey <- survey::svydesign(ids = ~1, data = get_data(s), weights = ~w) # Estimate with survey result_survey <- survey::svymean(~age, des_survey) # Verify result structure expect_true(!is.null(coef(result_survey))) expect_true(!is.null(survey::SE(result_survey))) expect_equal(length(coef(result_survey)), 1) }) test_that("svytotal produces consistent results", { s <- make_test_survey(n = 200) des_survey <- survey::svydesign(ids = ~1, data = get_data(s), weights = ~w) result_survey <- survey::svytotal(~income, des_survey) expect_true(!is.null(coef(result_survey))) expect_true(!is.null(survey::SE(result_survey))) }) test_that("svyratio produces consistent results", { s <- make_test_survey(n = 200) des_survey <- survey::svydesign(ids = ~1, data = get_data(s), weights = ~w) result_survey <- survey::svyratio(~income, ~age, des_survey) expect_true(!is.null(coef(result_survey))) expect_true(!is.null(survey::SE(result_survey))) }) test_that("multiple transformations preserve data integrity", { set_use_copy(TRUE) s <- make_test_survey(n = 100) original_data <- data.table::copy(get_data(s)) # Apply multiple steps s_multi <- s %>% step_compute(income_thousands = income / 1000) %>% step_compute(income_log = log(income_thousands + 1)) ms_data <- get_data(s_multi) # Apply same transformations directly direct_result <- original_data[, `:=`( income_thousands = income / 1000 )][, `:=`( income_log = log(income_thousands + 1) )] # Compare all new columns expect_equal(ms_data$income_thousands, direct_result$income_thousands) expect_equal(ms_data$income_log, direct_result$income_log) }) test_that("multiple survey statistics are consistent", { s <- make_test_survey(n = 200) des <- survey::svydesign(ids = ~1, data = get_data(s), weights = ~w) # Multiple estimations directly mean_age <- survey::svymean(~age, des) total_income <- survey::svytotal(~income, des) mean_region <- survey::svymean(~region, des) # Verify all results are valid expect_true(!is.null(coef(mean_age))) expect_true(!is.null(coef(total_income))) expect_true(!is.null(coef(mean_region))) # Verify standard errors are positive expect_true(survey::SE(mean_age) > 0) expect_true(survey::SE(total_income) > 0) expect_true(survey::SE(mean_region) > 0) }) test_that("transformed data can be used in survey estimation", { set_use_copy(TRUE) s <- make_test_survey(n = 200) # Transform data with metasurvey s_transformed <- s %>% step_compute(age_decade = floor(age / 10)) # Verify transformation worked transformed_data <- get_data(s_transformed) expect_true("age_decade" %in% names(transformed_data)) # Create design and estimate with transformed data des <- survey::svydesign(ids = ~1, data = transformed_data, weights = ~w) result <- survey::svymean(~age_decade, des) # Same transformation and estimation directly original_data <- get_data(s) direct_data <- original_data[, age_decade := floor(age / 10)] des_direct <- survey::svydesign(ids = ~1, data = direct_data, weights = ~w) result_direct <- survey::svymean(~age_decade, des_direct) # Compare expect_equal( as.numeric(coef(result_direct)), as.numeric(coef(result)), tolerance = 1e-6 ) }) test_that("step_join adds columns correctly", { set_use_copy(TRUE) s <- make_test_survey(n = 100) # Create auxiliary data aux_data <- data.frame( region = 1:4, region_name = c("North", "South", "East", "West") ) # Join with metasurvey s_joined <- s %>% step_join(aux_data, by = "region") joined_data <- get_data(s_joined) # Verify join worked expect_true("region_name" %in% names(joined_data)) expect_equal(nrow(joined_data), 100) # Verify we can still do survey estimation des <- survey::svydesign(ids = ~1, data = joined_data, weights = ~w) result <- survey::svymean(~age, des) expect_true(!is.null(result)) }) test_that("categorical variables work in survey estimation", { s <- make_test_survey(n = 200) des <- survey::svydesign(ids = ~1, data = get_data(s), weights = ~w) # Estimate proportions result_survey <- survey::svymean(~ as.factor(status), des) # Verify result structure expect_true(!is.null(coef(result_survey))) expect_equal(length(coef(result_survey)), 3) # 3 levels of status # All proportions should sum to 1 expect_equal(sum(coef(result_survey)), 1, tolerance = 1e-6) }) test_that("variance estimation is consistent", { s <- make_test_survey(n = 200) des <- survey::svydesign(ids = ~1, data = get_data(s), weights = ~w) result <- survey::svymean(~income, des) # Compare standard errors exist and are positive expect_true(!is.null(survey::SE(result))) expect_true(survey::SE(result) > 0) # Coefficient of variation should be reasonable cv <- survey::SE(result) / abs(coef(result)) expect_true(cv < 1) # CV typically less than 100% }) test_that("multiple steps in sequence produce correct results", { set_use_copy(TRUE) s <- make_test_survey(n = 100) original_data <- data.table::copy(get_data(s)) # Apply steps in sequence s_step1 <- s %>% step_compute(income_log = log(income)) s_step2 <- s_step1 %>% step_compute(age_cat = ifelse(age < 40, 1, 0)) result_data <- get_data(s_step2) # Apply same transformations manually manual_result <- original_data[, `:=`( income_log = log(income), age_cat = ifelse(age < 40, 1, 0) )] # Compare expect_equal(result_data$income_log, manual_result$income_log) expect_equal(result_data$age_cat, manual_result$age_cat) }) # --- Merged from test-metasurvey-vs-direct.R --- test_that("step_compute produces identical data.frame as direct transformation", { set_use_copy(TRUE) s <- make_test_survey(n = 100) original_data <- data.table::copy(get_data(s)) # Método 1: Con metasurvey step_compute s_metasurvey <- s %>% step_compute(age_squared = age^2) %>% step_compute(income_log = log(income)) result_metasurvey <- get_data(s_metasurvey) # Método 2: Transformación directa en data.table result_direct <- data.table::copy(original_data)[, `:=`( age_squared = age^2, income_log = log(income) )] # COMPARACIÓN EXACTA: Las columnas nuevas deben ser idénticas expect_identical(result_metasurvey$age_squared, result_direct$age_squared) expect_identical(result_metasurvey$income_log, result_direct$income_log) # COMPARACIÓN: Todo el data.frame debe tener las mismas columnas expect_setequal(names(result_metasurvey), names(result_direct)) # COMPARACIÓN: Mismo número de filas expect_identical(nrow(result_metasurvey), nrow(result_direct)) }) test_that("multiple step_compute produce identical results as chained transformations", { set_use_copy(TRUE) s <- make_test_survey(n = 150) original_data <- data.table::copy(get_data(s)) # Con metasurvey s_ms <- s %>% step_compute(income_thousands = income / 1000) %>% step_compute(income_log = log(income_thousands + 1)) %>% step_compute(age_decade = floor(age / 10)) %>% step_compute(high_income = as.integer(income > 3000)) df_metasurvey <- get_data(s_ms) # Directamente df_direct <- data.table::copy(original_data) df_direct[, income_thousands := income / 1000] df_direct[, income_log := log(income_thousands + 1)] df_direct[, age_decade := floor(age / 10)] df_direct[, high_income := as.integer(income > 3000)] # COMPARACIÓN COLUMNA POR COLUMNA expect_identical(df_metasurvey$income_thousands, df_direct$income_thousands) expect_identical(df_metasurvey$income_log, df_direct$income_log) expect_identical(df_metasurvey$age_decade, df_direct$age_decade) expect_identical(df_metasurvey$high_income, df_direct$high_income) }) test_that("step_join produces identical data.frame as merge", { set_use_copy(TRUE) s <- make_test_survey(n = 100) original_data <- data.table::copy(get_data(s)) # Datos auxiliares region_info <- data.frame( region = 1:4, region_name = c("North", "South", "East", "West"), population = c(1000000, 800000, 1200000, 900000) ) # Con metasurvey s_joined <- s %>% step_join(region_info, by = "region") df_metasurvey <- get_data(s_joined) # Con merge directo df_direct <- merge(original_data, region_info, by = "region", all.x = TRUE) data.table::setDT(df_direct) # COMPARACIÓN: Mismas columnas expect_setequal(names(df_metasurvey), names(df_direct)) # COMPARACIÓN: Mismo número de filas expect_identical(nrow(df_metasurvey), nrow(df_direct)) # COMPARACIÓN: region_name debe ser idéntico (ordenando por id) data.table::setorder(df_metasurvey, id) data.table::setorder(df_direct, id) expect_identical(df_metasurvey$region_name, df_direct$region_name) expect_identical(df_metasurvey$population, df_direct$population) }) test_that("survey design with metasurvey data produces same estimates as direct data", { set_use_copy(TRUE) s <- make_test_survey(n = 200) # Transformar con metasurvey s_transformed <- s %>% step_compute(age_cat = as.integer(age >= 40)) df_metasurvey <- get_data(s_transformed) # Transformar directamente df_direct <- data.table::copy(get_data(s)) df_direct[, age_cat := as.integer(age >= 40)] # Crear diseños survey con ambos des_metasurvey <- survey::svydesign(ids = ~1, data = df_metasurvey, weights = ~w) des_direct <- survey::svydesign(ids = ~1, data = df_direct, weights = ~w) # Estimar con ambos est_metasurvey <- survey::svymean(~age_cat, des_metasurvey) est_direct <- survey::svymean(~age_cat, des_direct) # COMPARACIÓN EXACTA: Coeficientes deben ser idénticos expect_identical( as.numeric(coef(est_metasurvey)), as.numeric(coef(est_direct)) ) # COMPARACIÓN EXACTA: Errores estándar deben ser idénticos expect_identical( as.numeric(survey::SE(est_metasurvey)), as.numeric(survey::SE(est_direct)) ) }) test_that("svymean on transformed data matches direct computation exactly", { set_use_copy(TRUE) s <- make_test_survey(n = 250) # Pipeline metasurvey s_pipeline <- s %>% step_compute(log_income = log(income)) %>% step_compute(age_group = cut(age, breaks = c(0, 30, 50, 100), labels = FALSE)) df_pipeline <- get_data(s_pipeline) # Computación directa df_baseline <- data.table::copy(get_data(s)) df_baseline[, log_income := log(income)] df_baseline[, age_group := cut(age, breaks = c(0, 30, 50, 100), labels = FALSE)] # Diseños des_pipeline <- survey::svydesign(ids = ~1, data = df_pipeline, weights = ~w) des_baseline <- survey::svydesign(ids = ~1, data = df_baseline, weights = ~w) # Estimaciones mean_pipeline <- survey::svymean(~log_income, des_pipeline) mean_baseline <- survey::svymean(~log_income, des_baseline) total_pipeline <- survey::svytotal(~income, des_pipeline) total_baseline <- survey::svytotal(~income, des_baseline) # COMPARACIONES EXACTAS expect_equal(coef(mean_pipeline), coef(mean_baseline), tolerance = 1e-10) expect_equal(survey::SE(mean_pipeline), survey::SE(mean_baseline), tolerance = 1e-10) expect_equal(coef(total_pipeline), coef(total_baseline), tolerance = 1e-10) expect_equal(survey::SE(total_pipeline), survey::SE(total_baseline), tolerance = 1e-10) }) test_that("svytotal produces identical results on metasurvey vs direct data", { s <- make_test_survey(n = 200) # Con metasurvey (sin transformación, solo para comparar) df_metasurvey <- get_data(s) # Directo (mismo data) df_direct <- data.table::copy(df_metasurvey) # Diseños des_ms <- survey::svydesign(ids = ~1, data = df_metasurvey, weights = ~w) des_direct <- survey::svydesign(ids = ~1, data = df_direct, weights = ~w) # Estimaciones total_ms <- survey::svytotal(~income, des_ms) total_direct <- survey::svytotal(~income, des_direct) # COMPARACIÓN EXACTA expect_identical(coef(total_ms), coef(total_direct)) expect_identical(survey::SE(total_ms), survey::SE(total_direct)) }) test_that("svyratio produces identical results with metasurvey transformed data", { set_use_copy(TRUE) s <- make_test_survey(n = 200) # Transformar edad con metasurvey s_transformed <- s %>% step_compute(age_adjusted = age + 0.5) df_metasurvey <- get_data(s_transformed) # Transformar directamente df_direct <- data.table::copy(get_data(s)) df_direct[, age_adjusted := age + 0.5] # Diseños des_ms <- survey::svydesign(ids = ~1, data = df_metasurvey, weights = ~w) des_direct <- survey::svydesign(ids = ~1, data = df_direct, weights = ~w) # Ratio ratio_ms <- survey::svyratio(~income, ~age_adjusted, des_ms) ratio_direct <- survey::svyratio(~income, ~age_adjusted, des_direct) # COMPARACIÓN EXACTA expect_equal(coef(ratio_ms), coef(ratio_direct), tolerance = 1e-10) expect_equal(survey::SE(ratio_ms), survey::SE(ratio_direct), tolerance = 1e-10) }) test_that("complex pipeline: transformations + join produce identical survey results", { set_use_copy(TRUE) s <- make_test_survey(n = 150) original <- data.table::copy(get_data(s)) # Info adicional status_info <- data.frame( status = 1:3, status_label = c("Active", "Inactive", "Pending") ) # Pipeline metasurvey s_pipeline <- s %>% step_compute(income_normalized = income / 1000) %>% step_join(status_info, by = "status") %>% step_compute(combined_score = income_normalized * age / 100) df_pipeline <- get_data(s_pipeline) # Pipeline directo df_baseline <- data.table::copy(original) df_baseline[, income_normalized := income / 1000] df_baseline <- merge(df_baseline, status_info, by = "status", all.x = TRUE) data.table::setDT(df_baseline) df_baseline[, combined_score := income_normalized * age / 100] # Ordenar por id para comparar data.table::setorder(df_pipeline, id) data.table::setorder(df_baseline, id) # COMPARACIÓN DE DATA.FRAMES expect_identical(df_pipeline$income_normalized, df_baseline$income_normalized) expect_identical(df_pipeline$status_label, df_baseline$status_label) expect_identical(df_pipeline$combined_score, df_baseline$combined_score) # COMPARACIÓN DE ESTIMACIONES des_pipeline <- survey::svydesign(ids = ~1, data = df_pipeline, weights = ~w) des_baseline <- survey::svydesign(ids = ~1, data = df_baseline, weights = ~w) mean_pipeline <- survey::svymean(~combined_score, des_pipeline) mean_baseline <- survey::svymean(~combined_score, des_baseline) expect_equal(coef(mean_pipeline), coef(mean_baseline), tolerance = 1e-10) expect_equal(survey::SE(mean_pipeline), survey::SE(mean_baseline), tolerance = 1e-10) }) test_that("categorical variables produce identical proportions", { set_use_copy(TRUE) s <- make_test_survey(n = 300) # Categorizar con metasurvey s_cat <- s %>% step_compute(income_bracket = cut(income, breaks = 3, labels = c("Low", "Med", "High"))) df_metasurvey <- get_data(s_cat) # Categorizar directamente df_direct <- data.table::copy(get_data(s)) df_direct[, income_bracket := cut(income, breaks = 3, labels = c("Low", "Med", "High"))] # Diseños des_ms <- survey::svydesign(ids = ~1, data = df_metasurvey, weights = ~w) des_direct <- survey::svydesign(ids = ~1, data = df_direct, weights = ~w) # Proporciones prop_ms <- survey::svymean(~income_bracket, des_ms) prop_direct <- survey::svymean(~income_bracket, des_direct) # COMPARACIÓN EXACTA expect_identical(as.numeric(coef(prop_ms)), as.numeric(coef(prop_direct))) expect_identical(as.numeric(survey::SE(prop_ms)), as.numeric(survey::SE(prop_direct))) }) test_that("variance estimates are identical between methods", { s <- make_test_survey(n = 200) df1 <- get_data(s) df2 <- data.table::copy(df1) des1 <- survey::svydesign(ids = ~1, data = df1, weights = ~w) des2 <- survey::svydesign(ids = ~1, data = df2, weights = ~w) var1 <- survey::svyvar(~age, des1) var2 <- survey::svyvar(~age, des2) # COMPARACIÓN EXACTA de varianza estimada expect_identical(as.numeric(coef(var1)), as.numeric(coef(var2))) expect_identical(as.numeric(survey::SE(var1)), as.numeric(survey::SE(var2))) }) test_that("quantiles are identical with transformed data", { set_use_copy(TRUE) s <- make_test_survey(n = 200) # Transformar s_t <- s %>% step_compute(income_sqrt = sqrt(income)) df_ms <- get_data(s_t) df_direct <- data.table::copy(get_data(s)) df_direct[, income_sqrt := sqrt(income)] # Diseños des_ms <- survey::svydesign(ids = ~1, data = df_ms, weights = ~w) des_direct <- survey::svydesign(ids = ~1, data = df_direct, weights = ~w) # Cuantiles q_ms <- survey::svyquantile(~income_sqrt, des_ms, quantiles = c(0.25, 0.5, 0.75)) q_direct <- survey::svyquantile(~income_sqrt, des_direct, quantiles = c(0.25, 0.5, 0.75)) # COMPARACIÓN (puede haber pequeñas diferencias numéricas en quantiles) expect_equal(as.numeric(coef(q_ms)), as.numeric(coef(q_direct)), tolerance = 1e-8) })