test_that("prepare_panel_data creates correct structure", { set.seed(123) years <- 2000:2005 sectors <- c("A", "B", "C") direct <- data.frame( Year = years, A = runif(6, 100, 200), B = runif(6, 100, 200), C = runif(6, 100, 200) ) production <- data.frame( Year = years, A = runif(6, 100, 200), B = runif(6, 100, 200), C = runif(6, 100, 200) ) panel <- prepare_panel_data(direct, production) expect_s3_class(panel, "data.frame") expect_equal(nrow(panel), length(years) * length(sectors)) expect_true("year" %in% names(panel)) expect_true("sector" %in% names(panel)) expect_true("log_direct" %in% names(panel)) expect_true("log_production" %in% names(panel)) }) test_that("prepare_panel_data handles missing Year column", { direct <- data.frame(A = 1:5, B = 1:5) production <- data.frame(A = 1:5, B = 1:5) expect_error( prepare_panel_data(direct, production), "Year" ) }) test_that("validate_panel_data accepts valid data", { panel <- data.frame( year = rep(2000:2002, 3), sector = rep(c("A", "B", "C"), each = 3), log_direct = rnorm(9), log_production = rnorm(9) ) expect_true(validate_panel_data(panel)) }) test_that("validate_panel_data rejects invalid data", { panel <- data.frame( year = 1:5, sector = letters[1:5] ) expect_error( validate_panel_data(panel, require_log = TRUE), "Missing" ) }) test_that("create_mundlak_data adds correct columns", { set.seed(123) panel <- data.frame( year = rep(2000:2002, 3), sector = rep(c("A", "B", "C"), each = 3), log_direct = rnorm(9, mean = 5), log_production = rnorm(9, mean = 5) ) panel_m <- create_mundlak_data(panel) expect_true("x_mean_sector" %in% names(panel_m)) expect_true("x_within" %in% names(panel_m)) for (s in unique(panel_m$sector)) { idx <- panel_m$sector == s sector_mean <- mean(panel$log_direct[idx]) expect_equal( unique(panel_m$x_mean_sector[idx]), sector_mean, tolerance = 1e-10 ) } }) test_that("aggregate_to_timeseries works correctly", { set.seed(123) panel <- data.frame( year = rep(2000:2005, 3), sector = rep(c("A", "B", "C"), each = 6), log_direct = rnorm(18, mean = 5), log_production = rnorm(18, mean = 5) ) ts_agg <- aggregate_to_timeseries(panel) expect_equal(nrow(ts_agg), 6) expect_true("year" %in% names(ts_agg)) expect_true("log_direct_mean" %in% names(ts_agg)) expect_true("log_production_mean" %in% names(ts_agg)) }) test_that("prepare_log_matrices returns correct structure", { set.seed(123) direct <- data.frame( Year = 2000:2005, A = runif(6, 100, 200), B = runif(6, 100, 200) ) production <- data.frame( Year = 2000:2005, A = runif(6, 100, 200), B = runif(6, 100, 200) ) matrices <- prepare_log_matrices(direct, production) expect_type(matrices, "list") expect_true("X_log" %in% names(matrices)) expect_true("Y_log" %in% names(matrices)) expect_true("X_clean" %in% names(matrices)) expect_true("Y_clean" %in% names(matrices)) expect_true(is.matrix(matrices$X_log)) expect_true(is.matrix(matrices$Y_log)) }) test_that("fit_twoway_fe works with valid data", { skip_if_not_installed("plm") set.seed(123) panel <- data.frame( year = rep(2000:2009, 5), sector = rep(LETTERS[1:5], each = 10), log_direct = rnorm(50, 5, 0.5), log_production = rnorm(50, 5, 0.5) ) panel$log_production <- panel$log_direct * 0.95 + rnorm(50, 0, 0.1) result <- fit_twoway_fe(panel) expect_type(result, "list") expect_true("model" %in% names(result)) expect_true("r2_within" %in% names(result)) expect_true("metrics" %in% names(result)) expect_true(result$r2_within > 0.5) }) test_that("fit_mundlak_cre works with valid data", { skip_if_not_installed("plm") set.seed(123) panel <- data.frame( year = rep(2000:2009, 5), sector = rep(LETTERS[1:5], each = 10), log_direct = rnorm(50, 5, 0.5), log_production = rnorm(50, 5, 0.5) ) panel$log_production <- panel$log_direct * 0.95 + rnorm(50, 0, 0.1) result <- fit_mundlak_cre(panel) expect_type(result, "list") expect_true("model" %in% names(result)) expect_true("variance_components" %in% names(result)) expect_true("panel_data_augmented" %in% names(result)) })