# Tests for R/data_conversion.R # wide_to_long, long_to_wide, prepare_for_tna, action_to_onehot, prepare_onehot # --- Fixtures ----------------------------------------------------------- make_wide <- function() { data.frame( V1 = c("A", "B", "C"), V2 = c("B", "C", "A"), V3 = c("C", "A", "B"), stringsAsFactors = FALSE ) } make_long <- function() { data.frame( Actor = rep(1:3, each = 3), Time = rep(1:3, 3), Action = c("A", "B", "C", "B", "C", "A", "C", "A", "B"), stringsAsFactors = FALSE ) } # --- wide_to_long -------------------------------------------------------- test_that("wide_to_long basic conversion", { w <- make_wide() l <- wide_to_long(w) expect_s3_class(l, "data.frame") expect_equal(nrow(l), 9) expect_true(all(c("id", "Time", "Action") %in% names(l))) # Row 1 sequence: A, B, C sub1 <- l[l$id == 1, ] expect_equal(sub1$Action, c("A", "B", "C")) expect_equal(sub1$Time, 1:3) }) test_that("wide_to_long auto-generates IDs when id_col is NULL", { w <- make_wide() l <- wide_to_long(w) expect_equal(sort(unique(l$id)), 1:3) }) test_that("wide_to_long uses existing id_col", { w <- make_wide() w$person <- c("x", "y", "z") l <- wide_to_long(w, id_col = "person") expect_true("person" %in% names(l)) expect_equal(sort(unique(l$person)), c("x", "y", "z")) expect_false("id" %in% names(l)) }) test_that("wide_to_long drops NA by default", { w <- data.frame(V1 = c("A", "B"), V2 = c("B", NA), stringsAsFactors = FALSE) l <- wide_to_long(w, drop_na = TRUE) expect_equal(nrow(l), 3) expect_false(any(is.na(l$Action))) }) test_that("wide_to_long preserves NA when drop_na = FALSE", { w <- data.frame(V1 = c("A", "B"), V2 = c("B", NA), stringsAsFactors = FALSE) l <- wide_to_long(w, drop_na = FALSE) expect_equal(nrow(l), 4) expect_equal(sum(is.na(l$Action)), 1) }) test_that("wide_to_long preserves extra columns", { w <- make_wide() w$group <- c("g1", "g1", "g2") l <- wide_to_long(w) expect_true("group" %in% names(l)) expect_equal(l$group[l$id == 1], rep("g1", 3)) }) test_that("wide_to_long custom column names", { w <- data.frame(T1 = c("X", "Y"), T2 = c("Y", "X"), stringsAsFactors = FALSE) l <- wide_to_long(w, time_prefix = "T", action_col = "State", time_col = "Step") expect_true(all(c("State", "Step") %in% names(l))) expect_equal(nrow(l), 4) }) test_that("wide_to_long errors on missing time prefix", { w <- data.frame(x = 1:3, y = 4:6) expect_error(wide_to_long(w), "No columns matching") }) test_that("wide_to_long errors on invalid id_col", { w <- make_wide() expect_error(wide_to_long(w, id_col = "nonexistent"), "not found") }) # --- long_to_wide -------------------------------------------------------- test_that("long_to_wide basic conversion", { l <- make_long() w <- long_to_wide(l) expect_s3_class(w, "data.frame") expect_equal(nrow(w), 3) expect_true(all(c("V1", "V2", "V3") %in% names(w))) # Check first actor's sequence row1 <- w[w$Actor == 1, ] expect_equal(as.character(row1$V1), "A") expect_equal(as.character(row1$V3), "C") }) test_that("long_to_wide handles unequal sequence lengths", { l <- data.frame( Actor = c(1, 1, 1, 2, 2), Time = c(1, 2, 3, 1, 2), Action = c("A", "B", "C", "X", "Y"), stringsAsFactors = FALSE ) w <- long_to_wide(l) expect_equal(nrow(w), 2) expect_true("V3" %in% names(w)) expect_true(is.na(w$V3[w$Actor == 2])) }) test_that("long_to_wide without Time column uses row order", { l <- data.frame( Actor = c(1, 1, 2, 2), Action = c("A", "B", "C", "D"), stringsAsFactors = FALSE ) w <- long_to_wide(l, time_col = "Time") expect_equal(nrow(w), 2) expect_equal(as.character(w$V1[w$Actor == 1]), "A") expect_equal(as.character(w$V2[w$Actor == 1]), "B") }) test_that("long_to_wide custom prefix", { l <- make_long() w <- long_to_wide(l, time_prefix = "T") expect_true(all(c("T1", "T2", "T3") %in% names(w))) }) test_that("long_to_wide errors on missing columns", { l <- data.frame(x = 1:3, y = letters[1:3]) expect_error(long_to_wide(l), "Missing required columns") }) # --- Round-trip tests ---------------------------------------------------- test_that("wide -> long -> wide round-trip preserves data", { w_orig <- make_wide() l <- wide_to_long(w_orig) w_back <- long_to_wide(l, id_col = "id") # Compare just the V columns vcols <- grep("^V[0-9]+$", names(w_back), value = TRUE) recovered <- w_back[order(w_back$id), vcols] rownames(recovered) <- NULL expect_equal(recovered, w_orig) }) test_that("long -> wide -> long round-trip preserves data", { l_orig <- make_long() w <- long_to_wide(l_orig) l_back <- wide_to_long(w, id_col = "Actor") l_back <- l_back[order(l_back$Actor, l_back$Time), ] rownames(l_back) <- NULL expect_equal(l_back$Actor, l_orig$Actor) expect_equal(l_back$Action, l_orig$Action) }) # --- prepare_for_tna ----------------------------------------------------- test_that("prepare_for_tna sequences type returns only V columns", { w <- make_wide() w$extra <- "meta" result <- prepare_for_tna(w, type = "sequences") expect_equal(names(result), c("V1", "V2", "V3")) expect_false("extra" %in% names(result)) }) test_that("prepare_for_tna long type converts to wide", { l <- make_long() result <- prepare_for_tna(l, type = "long") expect_true(all(grepl("^V[0-9]+$", names(result)))) expect_equal(nrow(result), 3) }) test_that("prepare_for_tna auto-detects wide", { w <- make_wide() result <- prepare_for_tna(w, type = "auto") expect_equal(ncol(result), 3) }) test_that("prepare_for_tna auto-detects long", { l <- make_long() result <- prepare_for_tna(l, type = "auto") expect_true(all(grepl("^V[0-9]+$", names(result)))) }) test_that("prepare_for_tna validates state names", { w <- make_wide() expect_warning( prepare_for_tna(w, type = "sequences", state_names = c("A", "B"), validate = TRUE), "Unknown states" ) }) test_that("prepare_for_tna skips validation when validate = FALSE", { w <- make_wide() expect_no_warning( prepare_for_tna(w, type = "sequences", state_names = c("A", "B"), validate = FALSE) ) }) test_that("prepare_for_tna converts factors to characters", { w <- data.frame(V1 = factor(c("A", "B")), V2 = factor(c("B", "A"))) result <- prepare_for_tna(w, type = "sequences") expect_true(all(vapply(result, is.character, logical(1)))) }) test_that("prepare_for_tna errors on no V columns", { d <- data.frame(x = 1:3, y = letters[1:3]) expect_error(prepare_for_tna(d, type = "sequences"), "No sequence columns") }) test_that("prepare_for_tna auto errors when ambiguous", { d <- data.frame(x = 1:3) expect_error(prepare_for_tna(d, type = "auto"), "Cannot auto-detect") }) test_that("prepare_for_tna long errors on missing action col", { d <- data.frame(Actor = 1:3, Time = 1:3, State = c("A", "B", "C")) expect_error(prepare_for_tna(d, type = "long"), "Action column") }) # --- action_to_onehot ---------------------------------------------------- test_that("action_to_onehot basic encoding", { d <- data.frame(id = 1:4, Action = c("A", "B", "A", "C"), stringsAsFactors = FALSE) result <- action_to_onehot(d) expect_true(all(c("A", "B", "C") %in% names(result))) expect_false("Action" %in% names(result)) expect_equal(result$A, c(1L, 0L, 1L, 0L)) expect_equal(result$C, c(0L, 0L, 0L, 1L)) }) test_that("action_to_onehot keeps action when drop_action = FALSE", { d <- data.frame(Action = c("X", "Y"), stringsAsFactors = FALSE) result <- action_to_onehot(d, drop_action = FALSE) expect_true("Action" %in% names(result)) }) test_that("action_to_onehot sorts states", { d <- data.frame(Action = c("C", "A", "B"), stringsAsFactors = FALSE) result <- action_to_onehot(d, sort_states = TRUE) state_cols <- setdiff(names(result), "Action") expect_equal(state_cols, c("A", "B", "C")) }) test_that("action_to_onehot custom states includes extras as all-zero", { d <- data.frame(Action = c("A", "B"), stringsAsFactors = FALSE) result <- action_to_onehot(d, states = c("A", "B", "Z")) expect_true("Z" %in% names(result)) expect_equal(result$Z, c(0L, 0L)) }) test_that("action_to_onehot prefix works", { d <- data.frame(Action = c("A", "B"), stringsAsFactors = FALSE) result <- action_to_onehot(d, prefix = "s_") expect_true(all(c("s_A", "s_B") %in% names(result))) }) test_that("action_to_onehot errors on missing column", { d <- data.frame(State = c("A", "B")) expect_error(action_to_onehot(d), "action_col") }) # --- prepare_onehot ------------------------------------------------------ test_that("prepare_onehot basic conversion", { d <- data.frame(A = c(1, 0, 1), B = c(0, 1, 0)) result <- prepare_onehot(d, cols = c("A", "B")) expect_s3_class(result, "data.frame") expect_equal(nrow(result), 1) # Row 1 time 1: A active expect_equal(result[[1]], "A") expect_true(attr(result, "windowed") == FALSE) }) test_that("prepare_onehot with actor grouping", { d <- data.frame( actor = c(1, 1, 2, 2), X = c(1, 0, 0, 1), Y = c(0, 1, 1, 0) ) result <- prepare_onehot(d, cols = c("X", "Y"), actor = "actor") expect_equal(nrow(result), 2) }) test_that("prepare_onehot non-overlapping window", { d <- data.frame(A = c(1, 0, 1, 0), B = c(0, 1, 0, 1)) result <- prepare_onehot(d, cols = c("A", "B"), window_size = 2, window_type = "non-overlapping") expect_true(attr(result, "windowed")) expect_equal(attr(result, "window_size"), 2L) }) test_that("prepare_onehot overlapping window", { d <- data.frame(A = c(1, 0, 1, 0), B = c(0, 1, 0, 1)) result <- prepare_onehot(d, cols = c("A", "B"), window_size = 2, window_type = "overlapping") expect_true(attr(result, "windowed")) }) test_that("prepare_onehot aggregate within windows", { d <- data.frame(A = c(1, 0, 1, 0), B = c(0, 1, 0, 1)) result <- prepare_onehot(d, cols = c("A", "B"), window_size = 2, window_type = "non-overlapping", aggregate = TRUE) expect_true(attr(result, "windowed")) }) test_that("prepare_onehot errors on missing cols", { d <- data.frame(A = c(1, 0)) expect_error(prepare_onehot(d, cols = c("A", "Z")), "cols") }) test_that("prepare_onehot codes attribute set", { d <- data.frame(X = c(1, 0), Y = c(0, 1)) result <- prepare_onehot(d, cols = c("X", "Y")) expect_equal(attr(result, "codes"), c("X", "Y")) }) # --- prepare_for_tna auto-detect: both time-cols AND action col present ---- test_that("prepare_for_tna auto detects long when nrow >> n_ids (both cols present)", { # Data has both V-pattern cols AND an action column, with many rows per id d <- data.frame( V1 = rep(NA_character_, 8), Action = rep(c("A", "B"), 4), id = rep(1:2, each = 4), stringsAsFactors = FALSE ) # Many rows (8) vs 2 unique ids => auto should pick "long" result <- prepare_for_tna(d, type = "auto", id_col = "id", action_col = "Action") expect_true(all(grepl("^V[0-9]+$", names(result)))) }) test_that("prepare_for_tna auto falls back to sequences when nrow <= n_ids * 2", { # Each id has just 1 row -> nrow(2) is NOT > n_ids(2) * 2 -> sequences d <- data.frame( V1 = c("A", "B"), Action = c("X", "Y"), id = c(1, 2), stringsAsFactors = FALSE ) result <- prepare_for_tna(d, type = "auto", id_col = "id", action_col = "Action") expect_equal(names(result), "V1") }) test_that("prepare_for_tna auto falls back to sequences when no id_col", { # Both V-cols and Action col present but no id_col -> sequences d <- data.frame( V1 = c("A", "B"), Action = c("X", "Y"), stringsAsFactors = FALSE ) result <- prepare_for_tna(d, type = "auto") expect_equal(names(result), "V1") }) test_that("prepare_for_tna long errors on missing id col", { d <- data.frame(Actor = 1:3, Time = 1:3, Action = c("A", "B", "C")) expect_error( prepare_for_tna(d, type = "long", id_col = "no_such_col"), "not found" ) }) # --- prepare_onehot: overlapping window with group shorter than window_size --- test_that("prepare_onehot overlapping window: group shorter than window_size produces NA row", { # Group has 1 row, window_size = 3 -> n < window_size -> that group's row is NA # actor=1 has 1 row < ws=3; actor=2 has 3 rows -> 1 window # Both groups still produce 1 row each (2 total), actor=1 row has NA content d <- data.frame(actor = c(1, 2, 2, 2), A = c(1, 1, 0, 1), B = c(0, 0, 1, 0)) result <- prepare_onehot(d, cols = c("A", "B"), actor = "actor", window_size = 3, window_type = "overlapping") expect_s3_class(result, "data.frame") expect_true(attr(result, "windowed")) # actor=2 contributes a non-NA window expect_true(any(!is.na(result[[grep("^W", names(result))[1]]]))) }) # --- prepare_onehot: interval grouping --- test_that("prepare_onehot with interval creates wider columns", { # interval creates W0_T*, W1_T* columns within the single output row d <- data.frame(A = c(1, 0, 1, 0, 1, 0), B = c(0, 1, 0, 1, 0, 1)) result <- prepare_onehot(d, cols = c("A", "B"), interval = 2L) expect_s3_class(result, "data.frame") # Multiple W* columns created for the interval windows w_cols <- grep("^W", names(result), value = TRUE) expect_true(length(w_cols) > 2L) })