test_that("Test prepare_hedonic_data", { # Create a dummy dataset with an empty string, an NA, and an extra unused column dummy_data <- data.frame( period = c("2020Q1", "2020Q1", "2020Q2", ""), price = c(100000, 200000, 150000, 300000), area = c(50, 100, NA, 80), condition = c("Good", "Bad", "Good", "Good"), unused_column = c(1, 2, 3, 4), stringsAsFactors = FALSE ) # Test 1: Standard cleaning (log_dependent = FALSE) clean_df <- prepare_hedonic_data( dataset = dummy_data, period_variable = "period", dependent_variable = "price", numerical_variables = c("area"), categorical_variables = c("condition"), log_dependent = FALSE ) # Check that unused column was dropped expect_false("unused_column" %in% names(clean_df)) # Check NA and empty string rows were dropped (Rows 3 and 4) expect_equal(nrow(clean_df), 2) # Check variable coercion expect_true(is.factor(clean_df$condition)) expect_true(is.character(clean_df$period)) # Test 2: Log transformation clean_df_log <- prepare_hedonic_data( dataset = dummy_data, period_variable = "period", dependent_variable = "price", numerical_variables = c("area"), categorical_variables = c("condition"), log_dependent = TRUE ) expect_true("log_price" %in% names(clean_df_log)) expect_equal(clean_df_log$log_price[1], log(100000)) # Test 3: Stop execution if logging non-positive values dummy_data$price[1] <- 0 expect_error( prepare_hedonic_data(dummy_data, "period", "price", c("area"), c("condition"), log_dependent = TRUE), "non-positive values" ) }) test_that("Test format_index_output", { periods <- c("2020Q1", "2020Q2", "2020Q3") values <- c(100, 105, 110) obs <- c(500, 520, 510) # Test 1: Base output (no observations, no rebasing) res1 <- format_index_output(periods, values) expect_equal(names(res1), c("period", "Index")) expect_equal(res1$Index, c(100, 105, 110)) # Test 2: With observations (checks exact column order) res2 <- format_index_output(periods, values, observation_counts = obs) expect_equal(names(res2), c("period", "number_of_observations", "Index")) expect_equal(res2$number_of_observations, obs) # Test 3: With rebasing res3 <- format_index_output(periods, values, reference_period = "2020Q2") expect_equal(round(res3$Index[2], 0), 100) # Base period becomes 100 }) test_that("Test fit_hedonic_model and predict_hedonic", { # Create simple linear relationship dummy_data <- data.frame( log_price = c(10, 10.5, 11, 11.5, 12), area = c(50, 60, 70, 80, 90), city = factor(c("A", "A", "B", "B", "C")) ) # Test 1: Fit model with independent variables model <- fit_hedonic_model(dummy_data, "log_price", c("area", "city")) expect_s3_class(model, "lm") expect_true(length(coef(model)) > 2) # Intercept + area + dummies # Test 2: Fit empty/null model model_null <- fit_hedonic_model(dummy_data, "log_price", character(0)) expect_s3_class(model_null, "lm") expect_equal(length(coef(model_null)), 1) # Intercept only # Test 3: Predictions preds <- predict_hedonic(model, newdata = dummy_data) expect_equal(length(preds), 5) expect_true(is.numeric(preds)) # Predictions from lm object should match standard stats::predict expect_equal(as.numeric(preds), as.numeric(stats::predict(model, dummy_data))) })