make_iflex_fixture <- function() { timestamps <- seq( as.POSIXct("2020-02-01 00:00:00", tz = "UTC"), by = "hour", length.out = 48L ) data.frame( ID = rep(c("Exp_1", "Exp_2"), each = length(timestamps)), From = format(rep(timestamps, 2L), "%Y-%m-%dT%H:%M:%SZ", tz = "UTC"), Participation_Phase = rep(c("Phase_1", "Phase_2"), each = length(timestamps)), Demand_kWh = c(seq_len(length(timestamps)), seq_len(length(timestamps)) + 100) / 10, Price_signal = c( "", rep("A_10", 23L), rep("B_2", 24L), rep("C_15", 48L) ), Experiment_price_NOK_kWh = c(rep(1.0, 24L), rep(2.0, 24L), rep(3.0, 48L)), Temperature = rep(seq(1, 48), 2L), Temperature24 = rep(seq(0, 47), 2L), Temperature48 = rep(seq(-1, 46), 2L), Temperature72 = rep(seq(-2, 45), 2L), stringsAsFactors = FALSE ) } make_storenet_fixture <- function() { timestamps <- seq( as.POSIXct("2020-01-01 00:00:00", tz = "UTC"), by = "min", length.out = 1440L ) data.frame( date = format(timestamps, "%Y-%m-%d %H:%M:%S", tz = "UTC"), " Discharge(W)" = rep(0, length(timestamps)), " Charge(W)" = rep(5, length(timestamps)), " Production(W)" = rep(1, length(timestamps)), " Consumption(W)" = seq_len(length(timestamps)) + 100, " State of Charge(%)" = rep(50, length(timestamps)), H6_W = rep(1, length(timestamps)), check.names = FALSE, stringsAsFactors = FALSE ) } make_lcl_fixture <- function() { timestamps <- seq( as.POSIXct("2013-01-01 00:00:00", tz = "UTC"), by = "30 min", length.out = 48L ) data.frame( DateTime = format(timestamps, "%Y-%m-%d %H:%M:%OS", tz = "UTC"), MAC000002 = sprintf(" %.3f ", seq_len(48L) / 10), MAC000003 = sprintf(" %.3f ", (seq_len(48L) + 100) / 10), check.names = FALSE, stringsAsFactors = FALSE ) } make_refit_fixture <- function() { timestamps <- seq( as.POSIXct("2013-10-09 00:00:00", tz = "UTC"), by = "30 sec", length.out = 2880L ) minute_index <- rep(seq_len(1440L), each = 2L) aggregate <- rep(seq_len(1440L), each = 2L) + c(0, 2) appliance1 <- rep(seq_len(1440L), each = 2L) / 10 + c(0, 0.4) data.frame( Time = format(timestamps, "%Y-%m-%d %H:%M:%S", tz = "UTC"), Unix = as.integer(as.numeric(timestamps)), Aggregate = aggregate, Appliance1 = appliance1, Issues = rep(0, length(timestamps)), check.names = FALSE, stringsAsFactors = FALSE ) } make_ideal_fixture <- function() { timestamps <- seq( as.POSIXct("2017-01-01 00:00:00", tz = "Europe/London"), by = "hour", length.out = 48L ) data.frame( home_id = rep(c("Home_1", "Home_2"), each = length(timestamps)), timestamp = format(rep(timestamps, 2L), "%Y-%m-%d %H:%M:%S", tz = "Europe/London"), aggregate_electricity = c(seq_len(48L), seq_len(48L) + 100) / 10, stringsAsFactors = FALSE ) } make_gx_fixture <- function() { timestamps <- seq( as.POSIXct("2020-07-01 00:00:00", tz = "Asia/Shanghai"), by = "hour", length.out = 48L ) data.frame( community_id = rep(c("GX_A", "GX_B"), each = length(timestamps)), timestamp = format(rep(timestamps, 2L), "%Y-%m-%d %H:%M:%S", tz = "Asia/Shanghai"), load = c(seq_len(48L), seq_len(48L) + 200) / 10, temperature = rep(seq(25, 72), 2L), humidity = rep(seq(60, 107), 2L), holiday = rep(c(0, 1), each = 48L), extreme_weather = rep(c(0, 1), times = 48L), stringsAsFactors = FALSE ) } test_that("elcf4r_read_iflex returns normalized hourly data", { tmp_csv <- tempfile(fileext = ".csv") utils::write.csv(make_iflex_fixture(), tmp_csv, row.names = FALSE, na = "") dat_full <- elcf4r_read_iflex(path = tmp_csv, ids = "Exp_1") dat <- elcf4r_read_iflex( path = tmp_csv, ids = "Exp_1", start = "2020-02-01 12:00:00", end = "2020-02-02 11:00:00" ) expect_true(all(c( "dataset", "entity_id", "timestamp", "date", "time_index", "y", "temp", "dow", "month", "participation_phase", "price_signal", "price_nok_kwh", "temp24", "temp48", "temp72", "resolution_minutes" ) %in% names(dat))) expect_identical(unique(dat$dataset), "iflex") expect_identical(unique(dat$entity_id), "Exp_1") expect_equal(nrow(dat), 24L) expect_equal(min(dat$timestamp), as.POSIXct("2020-02-01 12:00:00", tz = "UTC")) expect_equal(max(dat$timestamp), as.POSIXct("2020-02-02 11:00:00", tz = "UTC")) expect_equal(unique(dat$resolution_minutes), 60L) expect_equal(dat$time_index[1], 13L) expect_true(is.na(dat_full$price_signal[1])) }) test_that("elcf4r_build_daily_segments drops incomplete days and returns matrices", { tmp_csv <- tempfile(fileext = ".csv") fixture <- make_iflex_fixture() fixture <- fixture[fixture$ID == "Exp_1", ] fixture <- fixture[-48L, ] utils::write.csv(fixture, tmp_csv, row.names = FALSE, na = "") dat <- elcf4r_read_iflex(path = tmp_csv) seg <- elcf4r_build_daily_segments( data = dat, carry_cols = c("dataset", "participation_phase", "price_signal") ) expect_true(is.matrix(seg$segments)) expect_equal(dim(seg$segments), c(1L, 24L)) expect_equal(seg$points_per_day, 24L) expect_equal(seg$resolution_minutes, 60L) expect_equal(rownames(seg$segments), "Exp_1__2020-02-01") expect_equal(seg$covariates$dataset[[1]], "iflex") expect_equal(seg$covariates$participation_phase[[1]], "Phase_1") expect_equal(seg$covariates$price_signal[[1]], "A_10") expect_equal(seg$covariates$temp_mean[[1]], mean(seq_len(24L))) expect_equal(seg$segments[1, 1], 0.1) expect_equal(seg$segments[1, 24], 2.4) }) test_that("elcf4r_read_storenet returns normalized minute data", { tmp_dir <- tempfile("storenet") dir.create(tmp_dir) tmp_csv <- file.path(tmp_dir, "H6_W.csv") utils::write.csv(make_storenet_fixture(), tmp_csv, row.names = FALSE, na = "") dat <- elcf4r_read_storenet( path = tmp_dir, ids = "H6_W", start = "2020-01-01 00:00:00", end = "2020-01-01 23:59:00" ) seg <- elcf4r_build_daily_segments( data = dat, carry_cols = c("dataset", "source_file") ) expect_true(all(c( "dataset", "entity_id", "timestamp", "date", "time_index", "y", "resolution_minutes", "discharge_w", "charge_w", "production_w", "state_of_charge_pct", "source_file" ) %in% names(dat))) expect_identical(unique(dat$dataset), "storenet") expect_identical(unique(dat$entity_id), "H6_W") expect_equal(nrow(dat), 1440L) expect_equal(unique(dat$resolution_minutes), 1) expect_equal(dim(seg$segments), c(1L, 1440L)) expect_equal(seg$segments[1, 1], 101) expect_equal(seg$segments[1, 1440], 1540) }) test_that("elcf4r_read_lcl reshapes wide household data to the common schema", { tmp_csv <- tempfile(fileext = ".csv") utils::write.table( make_lcl_fixture(), file = tmp_csv, sep = ",", row.names = FALSE, col.names = TRUE, quote = FALSE ) dat <- elcf4r_read_lcl(path = tmp_csv, ids = "MAC000002") seg <- elcf4r_build_daily_segments(data = dat, carry_cols = "dataset") expect_true(all(c( "dataset", "entity_id", "timestamp", "date", "time_index", "y", "resolution_minutes" ) %in% names(dat))) expect_identical(unique(dat$dataset), "lcl") expect_identical(unique(dat$entity_id), "MAC000002") expect_equal(nrow(dat), 48L) expect_equal(unique(dat$resolution_minutes), 30) expect_equal(dim(seg$segments), c(1L, 48L)) expect_equal(seg$segments[1, 1], 0.1) expect_equal(seg$segments[1, 48], 4.8) }) test_that("elcf4r_read_refit resamples channels onto the common schema", { tmp_dir <- tempfile("refit") dir.create(tmp_dir) tmp_csv <- file.path(tmp_dir, "CLEAN_House1.csv") utils::write.csv(make_refit_fixture(), tmp_csv, row.names = FALSE, na = "") dat_aggregate <- elcf4r_read_refit(path = tmp_dir) dat_multi <- elcf4r_read_refit( path = tmp_dir, channels = c("Aggregate", "Appliance1") ) seg <- elcf4r_build_daily_segments( data = dat_aggregate, carry_cols = c("dataset", "house_id", "channel") ) expect_true(all(c( "dataset", "entity_id", "timestamp", "date", "time_index", "y", "resolution_minutes", "house_id", "channel", "unix", "issues" ) %in% names(dat_aggregate))) expect_identical(unique(dat_aggregate$dataset), "refit") expect_identical(unique(dat_aggregate$entity_id), "CLEAN_House1") expect_identical(unique(dat_aggregate$house_id), "CLEAN_House1") expect_identical(unique(dat_aggregate$channel), "Aggregate") expect_equal(nrow(dat_aggregate), 1440L) expect_equal(unique(dat_aggregate$resolution_minutes), 1) expect_equal(dim(seg$segments), c(1L, 1440L)) expect_equal(seg$segments[1, 1], 2) expect_true(all(c("CLEAN_House1::Aggregate", "CLEAN_House1::Appliance1") %in% unique(dat_multi$entity_id))) }) test_that("elcf4r_read_ideal resolves and normalizes an hourly aggregate scaffold", { tmp_dir <- tempfile("ideal") dir.create(tmp_dir) tmp_csv <- file.path(tmp_dir, "ideal_auxiliary_hourly.csv") utils::write.csv(make_ideal_fixture(), tmp_csv, row.names = FALSE, na = "") dat <- elcf4r_read_ideal( path = tmp_dir, ids = "Home_1", start = "2017-01-01 00:00:00", end = "2017-01-01 23:00:00" ) seg <- elcf4r_build_daily_segments( data = dat, carry_cols = c("dataset", "home_id", "source_file") ) expect_true(all(c( "dataset", "entity_id", "timestamp", "date", "time_index", "y", "temp", "resolution_minutes", "home_id", "source_file" ) %in% names(dat))) expect_identical(unique(dat$dataset), "ideal") expect_identical(unique(dat$entity_id), "Home_1") expect_equal(nrow(dat), 24L) expect_true(all(is.na(dat$temp))) expect_equal(unique(dat$resolution_minutes), 60L) expect_equal(dim(seg$segments), c(1L, 24L)) expect_identical(seg$covariates$home_id[[1L]], "Home_1") expect_identical(seg$covariates$source_file[[1L]], "ideal_auxiliary_hourly.csv") }) test_that("elcf4r_read_ideal errors when multiple directory candidates remain", { tmp_dir <- tempfile("ideal-ambiguous") dir.create(tmp_dir) utils::write.csv( make_ideal_fixture(), file.path(tmp_dir, "ideal_hourly_one.csv"), row.names = FALSE, na = "" ) utils::write.csv( make_ideal_fixture(), file.path(tmp_dir, "ideal_hourly_two.csv"), row.names = FALSE, na = "" ) expect_error( elcf4r_read_ideal(path = tmp_dir), "multiple IDEAL hourly candidates", ignore.case = TRUE ) }) test_that("elcf4r_read_gx normalizes a flat transformer-level export", { tmp_csv <- tempfile(fileext = ".csv") utils::write.csv(make_gx_fixture(), tmp_csv, row.names = FALSE, na = "") dat <- elcf4r_read_gx( path = tmp_csv, ids = "GX_A", start = "2020-07-01 00:00:00", end = "2020-07-01 23:00:00" ) seg <- elcf4r_build_daily_segments( data = dat, carry_cols = c("dataset", "community_id", "source_file", "holiday", "extreme_weather") ) expect_true(all(c( "dataset", "entity_id", "timestamp", "date", "time_index", "y", "temp", "resolution_minutes", "community_id", "source_file", "humidity", "holiday", "extreme_weather" ) %in% names(dat))) expect_identical(unique(dat$dataset), "gx") expect_identical(unique(dat$entity_id), "GX_A") expect_equal(nrow(dat), 24L) expect_equal(unique(dat$resolution_minutes), 60L) expect_equal(dim(seg$segments), c(1L, 24L)) expect_false(all(is.na(dat$temp))) expect_identical(seg$covariates$community_id[[1L]], "GX_A") }) test_that("elcf4r_read_gx reads the matching SQLite table and keeps metadata", { tmp_db <- tempfile(fileext = ".sqlite") con <- DBI::dbConnect(RSQLite::SQLite(), tmp_db) on.exit(DBI::dbDisconnect(con), add = TRUE) DBI::dbWriteTable( con, "notes", data.frame(id = 1, message = "ignore", stringsAsFactors = FALSE) ) DBI::dbWriteTable(con, "gx_profiles", make_gx_fixture()) dat <- elcf4r_read_gx(path = tmp_db, ids = "GX_B") expect_identical(unique(dat$entity_id), "GX_B") expect_true("source_table" %in% names(dat)) expect_identical(unique(dat$source_table), "gx_profiles") expect_true("humidity" %in% names(dat)) expect_true("holiday" %in% names(dat)) expect_true("extreme_weather" %in% names(dat)) })