make_benchmark_panel_fixture <- function() { timestamps <- seq( as.POSIXct("2020-01-01 00:00:00", tz = "UTC"), by = "hour", length.out = 96L ) time_index <- rep(seq_len(24L), 4L) day_index <- rep(0:3, each = 24L) raw <- data.frame( id = rep(c("home_a", "home_b"), each = length(timestamps)), timestamp = rep(timestamps, 2L), load = c( 1 + 0.2 * sin(2 * pi * time_index / 24) + 0.1 * day_index + time_index / 100, 1.4 + 0.25 * cos(2 * pi * time_index / 24) + 0.08 * day_index + time_index / 120 ), temp = c( rep(seq(5, 10, length.out = 24L), 4L), rep(seq(2, 7, length.out = 24L), 4L) ), stringsAsFactors = FALSE ) elcf4r_normalize_panel( data = raw, id_col = "id", timestamp_col = "timestamp", load_col = "load", temp_col = "temp", dataset = "fixture", resolution_minutes = 60L, tz = "UTC" ) } test_that("elcf4r_build_benchmark_index returns a day-level index", { panel <- make_benchmark_panel_fixture() index <- elcf4r_build_benchmark_index(panel, carry_cols = "dataset") expect_true(all(c( "day_key", "entity_id", "date", "dow", "month", "temp_mean", "temp_min", "temp_max", "dataset", "n_points" ) %in% names(index))) expect_equal(nrow(index), 8L) expect_equal(index$n_points[[1]], 24L) expect_identical(unique(index$dataset), "fixture") }) test_that("benchmark index slicing supports first and last windows", { panel <- make_benchmark_panel_fixture() index <- elcf4r_build_benchmark_index(panel, carry_cols = "dataset") first_window <- .elcf4r_slice_benchmark_index( index, required_days = 3L, entity_ids = "home_a", anchor = "first" ) last_window <- .elcf4r_slice_benchmark_index( index, required_days = 3L, entity_ids = "home_a", anchor = "last" ) expect_equal(nrow(first_window), 3L) expect_equal(nrow(last_window), 3L) expect_equal(first_window$date, sort(first_window$date)) expect_equal(last_window$date, sort(last_window$date)) expect_lt(max(first_window$date), max(last_window$date)) }) test_that("elcf4r_benchmark runs built-in methods on a normalized panel", { panel <- make_benchmark_panel_fixture() index <- elcf4r_build_benchmark_index(panel, carry_cols = "dataset") out <- elcf4r_benchmark( panel = panel, benchmark_index = index, methods = c("gam", "mars", "kwf", "kwf_clustered"), train_days = 3L, test_days = 1L, cohort_size = 2L, benchmark_name = "fixture_hourly_2_ids_3_train_1_test_4_methods", include_predictions = TRUE, seed = 17L ) expect_s3_class(out, "elcf4r_benchmark") expect_true(all(c("results", "predictions", "cohort_index", "spec", "backend") %in% names(out))) expect_equal(nrow(out$results), 8L) expect_equal(sort(unique(out$results$method)), c("gam", "kwf", "kwf_clustered", "mars")) expect_true(all(out$results$status == "ok")) expect_true(all(!is.na(out$results$mase))) expect_equal(nrow(out$predictions), 8L * 24L) expect_true(all(out$predictions$benchmark_name == "fixture_hourly_2_ids_3_train_1_test_4_methods")) expect_true(all(out$predictions$dataset == "fixture")) expect_equal(length(out$spec$entity_ids), 2L) }) test_that("shipped benchmark datasets have populated metrics", { shipped <- list( iflex = elcf4r_iflex_benchmark_results, storenet = elcf4r_storenet_benchmark_results, lcl = elcf4r_lcl_benchmark_results, refit = elcf4r_refit_benchmark_results ) for (nm in names(shipped)) { x <- shipped[[nm]] expect_true(all(x$status == "ok"), info = nm) expect_true(all(!is.na(x$nmae)), info = nm) expect_true(all(!is.na(x$nrmse)), info = nm) expect_true(all(!is.na(x$smape)), info = nm) expect_true(all(!is.na(x$mase)), info = nm) } })