# ---- Synthetic fixtures ---- make_events <- function() { data.frame( student = c("s1", "s1", "s1", "s2", "s2"), code = c("A", "B", "C", "A", "B"), timestamp = as.POSIXct(c( "2024-01-01 10:00:00", "2024-01-01 10:05:00", "2024-01-01 10:10:00", "2024-01-01 10:00:00", "2024-01-01 10:05:00" )), stringsAsFactors = FALSE ) } # ---- Basic: actor + action, no time ---- test_that("basic actor + action returns nestimate_data with correct structure", { ev <- make_events() res <- prepare_data(ev, actor = "student", action = "code") expect_s3_class(res, "nestimate_data") expect_named(res, c("sequence_data", "long_data", "meta_data", "time_data", "statistics")) expect_null(res$time_data) expect_equal(res$statistics$total_sessions, 2L) expect_equal(res$statistics$total_actions, 5L) expect_equal(res$statistics$unique_actors, 2L) # Wide columns named T1, T2, ... expect_true(all(grepl("^T\\d+$", names(res$sequence_data)))) expect_equal(nrow(res$sequence_data), 2L) }) test_that("sequence order follows row order when no time given", { ev <- make_events() res <- prepare_data(ev, actor = "student", action = "code") s1_row <- which(res$meta_data[[grep("student|actor", names(res$meta_data), value = TRUE)[1]]] == "s1") expect_equal(as.character(res$sequence_data[s1_row, 1:3]), c("A", "B", "C")) }) # ---- With time ---- test_that("time column is parsed and time_data is returned", { ev <- make_events() res <- prepare_data(ev, actor = "student", action = "code", time = "timestamp") expect_false(is.null(res$time_data)) expect_equal(ncol(res$time_data), res$statistics$max_sequence_length) expect_true(all(grepl("^time_T\\d+$", names(res$time_data)))) expect_s3_class(res$time_data[[1]], "POSIXct") }) # ---- Session splitting by time_threshold ---- test_that("time_threshold splits sessions within same actor", { ev <- data.frame( student = rep("s1", 4), code = c("A", "B", "C", "D"), timestamp = as.POSIXct(c( "2024-01-01 10:00:00", "2024-01-01 10:05:00", "2024-01-01 12:00:00", "2024-01-01 12:05:00" )), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code", time = "timestamp", time_threshold = 900) expect_equal(res$statistics$total_sessions, 2L) expect_equal(nrow(res$sequence_data), 2L) # First session: A, B; Second session: C, D expect_equal(as.character(res$sequence_data[1, 1:2]), c("A", "B")) expect_equal(as.character(res$sequence_data[2, 1:2]), c("C", "D")) }) test_that("large time_threshold keeps everything in one session", { ev <- make_events() res <- prepare_data(ev, actor = "student", action = "code", time = "timestamp", time_threshold = 1e6) expect_equal(res$statistics$total_sessions, 2L) }) # ---- Missing actor ---- test_that("missing actor treats all data as one actor", { ev <- data.frame(code = c("A", "B", "C"), stringsAsFactors = FALSE) res <- prepare_data(ev, action = "code") expect_equal(res$statistics$total_sessions, 1L) expect_null(res$statistics$unique_actors) expect_equal(as.character(res$sequence_data[1, ]), c("A", "B", "C")) }) # ---- Multiple actor columns (interaction) ---- test_that("multiple actor columns create interaction grouping", { ev <- data.frame( student = c("s1", "s1", "s2", "s2"), group = c("g1", "g1", "g1", "g2"), code = c("A", "B", "C", "D"), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = c("student", "group"), action = "code") expect_equal(res$statistics$unique_actors, 3L) expect_equal(res$statistics$total_sessions, 3L) }) # ---- Explicit session column ---- test_that("session column creates separate sessions per actor-session combo", { ev <- data.frame( student = c("s1", "s1", "s1", "s1"), course = c("math", "math", "bio", "bio"), code = c("A", "B", "C", "D"), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code", session = "course") expect_equal(res$statistics$total_sessions, 2L) }) test_that("multiple session columns create interaction sessions", { ev <- data.frame( student = rep("s1", 4), course = c("math", "math", "bio", "bio"), semester = c("fall", "spring", "fall", "fall"), code = c("A", "B", "C", "D"), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code", session = c("course", "semester")) expect_equal(res$statistics$total_sessions, 3L) }) # ---- Order column for tie-breaking ---- test_that("order column controls sequence within tied timestamps", { ev <- data.frame( student = rep("s1", 3), code = c("C", "A", "B"), timestamp = rep(as.POSIXct("2024-01-01 10:00:00"), 3), priority = c(3, 1, 2), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code", time = "timestamp", order = "priority") expect_equal(as.character(res$sequence_data[1, 1:3]), c("A", "B", "C")) }) # ---- Time parsing: ISO8601 string ---- test_that("ISO8601 T-separator timestamps are parsed", { ev <- data.frame( student = rep("s1", 2), code = c("A", "B"), ts = c("2024-01-01T10:00:00", "2024-01-01T10:05:00"), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code", time = "ts") expect_false(is.null(res$time_data)) expect_equal(nrow(res$sequence_data), 1L) }) # ---- Time parsing: numeric auto-detected as unix ---- test_that("numeric time column auto-detected as unix timestamps", { ev <- data.frame( student = rep("s1", 3), code = c("A", "B", "C"), ts = c(1704100000, 1704100300, 1704100600), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code", time = "ts") expect_false(is.null(res$time_data)) expect_s3_class(res$time_data[[1]], "POSIXct") }) # ---- Time parsing: explicit unix milliseconds ---- test_that("unix milliseconds are correctly converted", { ev <- data.frame( student = rep("s1", 2), code = c("A", "B"), ts = c(1704100000000, 1704100300000), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code", time = "ts", is_unix_time = TRUE, unix_time_unit = "milliseconds") t1 <- res$time_data[[1]][1] expect_equal(as.numeric(t1), 1704100000, tolerance = 1) }) # ---- Custom time format ---- test_that("custom_format parses non-standard timestamps", { ev <- data.frame( student = rep("s1", 2), code = c("A", "B"), ts = c("01-Jan-2024 10:00", "01-Jan-2024 10:05"), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code", time = "ts", custom_format = "%d-%b-%Y %H:%M") expect_false(is.null(res$time_data)) }) # ---- Extra columns aggregated per session ---- test_that("numeric extra columns are aggregated as mean", { ev <- data.frame( student = c("s1", "s1", "s1"), code = c("A", "B", "C"), score = c(10, 20, 30), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code") expect_true("score" %in% names(res$meta_data)) expect_equal(res$meta_data$score, 20) }) test_that("character extra columns are aggregated as mode", { ev <- data.frame( student = c("s1", "s1", "s1"), code = c("A", "B", "C"), level = c("high", "high", "low"), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code") expect_equal(res$meta_data$level, "high") }) # ---- print method ---- test_that("print.nestimate_data produces expected output", { ev <- make_events() res <- prepare_data(ev, actor = "student", action = "code", time = "timestamp") out <- capture.output(print(res)) expect_true(any(grepl("Prepared Data", out))) expect_true(any(grepl("Sessions:", out))) expect_true(any(grepl("Actors:", out))) expect_true(any(grepl("Time data: available", out))) }) test_that("print without actors omits Actors line", { ev <- data.frame(code = c("A", "B"), stringsAsFactors = FALSE) res <- prepare_data(ev, action = "code") out <- capture.output(print(res)) expect_false(any(grepl("Actors:", out))) }) # ---- Error conditions ---- test_that("non-data.frame input errors", { expect_error(prepare_data("not_a_df", actor = "a", action = "b")) }) test_that("missing action column errors", { ev <- make_events() expect_error(prepare_data(ev, actor = "student", action = "nonexistent")) }) test_that("missing actor column errors", { ev <- make_events() expect_error(prepare_data(ev, actor = "nonexistent", action = "code")) }) test_that("missing time column errors", { ev <- make_events() expect_error(prepare_data(ev, actor = "student", action = "code", time = "nonexistent")) }) test_that("invalid time_threshold errors", { ev <- make_events() expect_error(prepare_data(ev, actor = "student", action = "code", time_threshold = -1)) }) test_that("missing session column errors", { ev <- make_events() expect_error(prepare_data(ev, actor = "student", action = "code", session = "nonexistent")) }) test_that("missing order column errors", { ev <- make_events() expect_error(prepare_data(ev, actor = "student", action = "code", order = "nonexistent")) }) # ---- NA padding in wide format ---- test_that("shorter sessions are NA-padded in wide format", { ev <- data.frame( student = c("s1", "s1", "s1", "s2"), code = c("A", "B", "C", "X"), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code") expect_equal(res$statistics$max_sequence_length, 3L) s2_row <- which(res$meta_data$student == "s2") expect_true(is.na(res$sequence_data[s2_row, 2])) expect_true(is.na(res$sequence_data[s2_row, 3])) }) # ---- Unix timestamp numeric path (L333-338) ---- test_that(".parse_time numeric unix path with milliseconds divisor", { # Call .parse_time directly via prepare_data with is_unix_time + milliseconds ev <- data.frame( student = rep("s1", 2), code = c("A", "B"), ts = c(1704100000000, 1704100300000), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code", time = "ts", is_unix_time = TRUE, unix_time_unit = "milliseconds") expect_false(is.null(res$time_data)) # Should be about 2024 timestamps expect_s3_class(res$time_data[[1]], "POSIXct") }) test_that(".parse_time numeric unix path with microseconds divisor", { ev <- data.frame( student = rep("s1", 2), code = c("A", "B"), ts = c(1704100000000000, 1704100300000000), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code", time = "ts", is_unix_time = TRUE, unix_time_unit = "microseconds") expect_false(is.null(res$time_data)) expect_s3_class(res$time_data[[1]], "POSIXct") }) test_that(".parse_time errors on unparseable string timestamps (L341-343)", { # Provide completely unparseable strings that cannot be coerced to numeric ev <- data.frame( student = rep("s1", 2), code = c("A", "B"), ts = c("not-a-date-xyz", "also-garbage"), stringsAsFactors = FALSE ) expect_error( prepare_data(ev, actor = "student", action = "code", time = "ts"), "Could not parse" ) }) # ---- .aggregate_metadata: all-NA extra column (L362) ---- test_that("extra column all-NA returns NA in meta_data", { ev <- data.frame( student = c("s1", "s1"), code = c("A", "B"), level = c(NA_character_, NA_character_), stringsAsFactors = FALSE ) res <- prepare_data(ev, actor = "student", action = "code") # level is extra column; all NA -> aggregated to NA expect_true("level" %in% names(res$meta_data)) expect_true(is.na(res$meta_data$level)) }) # ---- .aggregate_metadata: tied mode emits message (L366-373) ---- test_that("tied-mode character extra column emits message and returns first value", { ev <- data.frame( student = c("s1", "s1", "s1", "s1"), code = c("A", "B", "A", "B"), level = c("high", "low", "medium", "low"), stringsAsFactors = FALSE ) # "low" appears twice, "high" and "medium" once each -> tie between all equal # Actually: high=1, low=2, medium=1 -> mode is "low" (unique), no tie # To get a tie: use equal counts ev2 <- data.frame( student = c("s1", "s1"), code = c("A", "B"), level = c("high", "low"), stringsAsFactors = FALSE ) expect_message( res <- prepare_data(ev2, actor = "student", action = "code"), "ties resolved by first occurrence" ) expect_true(res$meta_data$level %in% c("high", "low")) })