# splitByDate tests # Use a small slice of mydata plus a tiny synthetic frame for exact checks dat <- selectByDate(mydata, year = 2003) # Shared results sbd_single <- splitByDate( dat, dates = "1/7/2003", labels = c("before", "after") ) sbd_multi <- splitByDate( dat, dates = c("1/4/2003", "1/10/2003"), labels = c("early", "mid", "late") ) # --- Output structure -------------------------------------------------------- test_that("splitByDate returns a data frame with one more column than input", { expect_equal(ncol(sbd_single), ncol(dat) + 1L) expect_true("split.by" %in% names(sbd_single)) }) test_that("row count is unchanged", { expect_equal(nrow(sbd_single), nrow(dat)) }) test_that("custom name argument is used for the new column", { result <- splitByDate( dat, dates = "1/7/2003", labels = c("before", "after"), name = "period" ) expect_true("period" %in% names(result)) expect_false("split.by" %in% names(result)) }) test_that("new column is an ordered factor with the supplied labels", { expect_true(is.factor(sbd_single$split.by)) expect_true(is.ordered(sbd_single$split.by)) expect_equal(levels(sbd_single$split.by), c("before", "after")) }) # --- Correctness with known split -------------------------------------------- test_that("single split: all rows are labelled 'before' or 'after'", { expect_true(all(sbd_single$split.by %in% c("before", "after"))) }) test_that("single split: rows before split date are 'before', on/after are 'after'", { split_dt <- as.POSIXct("2003-07-01", tz = "GMT") expect_true(all(sbd_single$split.by[dat$date < split_dt] == "before")) expect_true(all(sbd_single$split.by[dat$date > split_dt] == "after")) }) test_that("multiple splits produce correct label for each partition", { early_dt <- as.POSIXct("2003-04-01", tz = "GMT") late_dt <- as.POSIXct("2003-10-01", tz = "GMT") expect_true(all(sbd_multi$split.by[dat$date < early_dt] == "early")) expect_true(all(sbd_multi$split.by[dat$date > late_dt] == "late")) mid_rows <- dat$date > early_dt & dat$date < late_dt expect_true(all(sbd_multi$split.by[mid_rows] == "mid")) }) test_that("all three labels are present in a three-partition split", { expect_setequal(levels(sbd_multi$split.by), c("early", "mid", "late")) expect_true(all(c("early", "mid", "late") %in% sbd_multi$split.by)) }) # --- Date input formats ------------------------------------------------------ test_that("YYYY/MM/DD string format is accepted", { result <- splitByDate( dat, dates = "2003/07/01", labels = c("before", "after") ) expect_setequal(as.character(unique(result$split.by)), c("before", "after")) }) test_that("R Date object is accepted as dates argument", { result <- splitByDate( dat, dates = as.POSIXct("2003-07-01", tz = "GMT"), labels = c("before", "after") ) expect_true(all(result$split.by %in% c("before", "after"))) }) # --- Synthetic data (Date class, not POSIXct) -------------------------------- test_that("works correctly with a Date-class date column", { df <- data.frame( date = seq(as.Date("2020-01-01"), as.Date("2020-12-31"), by = "day"), x = rnorm(366) ) result <- splitByDate(df, dates = "2020/07/01", labels = c("h1", "h2")) expect_true(is.ordered(result$split.by)) expect_true(all(result$split.by[df$date < as.Date("2020-07-01")] == "h1")) expect_true(all(result$split.by[df$date > as.Date("2020-07-01")] == "h2")) }) # --- Input validation -------------------------------------------------------- test_that("mismatched dates and labels raises an informative error", { # 1 date needs exactly 2 labels expect_error( splitByDate(dat, dates = "1/7/2003", labels = c("only_one")), regexp = "mis-match" ) expect_error( splitByDate(dat, dates = "1/7/2003", labels = c("a", "b", "c")), regexp = "mis-match" ) })