test_that("constructor has sensible defaults", { first <- step_first(data.table(x = 1), "DT") step <- step_mutate(first) expect_s3_class(step, "dtplyr_step_mutate") expect_equal(step$parent, first) expect_equal(step$vars, "x") expect_equal(step$groups, character()) expect_equal(step$new_vars, list()) }) # copies ------------------------------------------------------------------ test_that("need to copy when there's a mutate", { dt <- lazy_dt(data.table(x = 1)) expect_false(dt %>% .$needs_copy) expect_false(dt %>% filter(x == 1) %>% .$needs_copy) expect_false(dt %>% head() %>% .$needs_copy) expect_true(dt %>% mutate(y = 1) %>% .$needs_copy) expect_true(dt %>% mutate(y = 1) %>% filter(x == 1) %>% .$needs_copy) expect_true(dt %>% mutate(y = 1) %>% head() %>% .$needs_copy) }) test_that("unless there's already an implicit copy", { dt <- lazy_dt(data.table(x = 1)) expect_true(dt %>% filter(x == 1) %>% .$implicit_copy) expect_false(dt %>% filter(x == 1) %>% mutate(y = 1) %>% .$needs_copy) expect_true(dt %>% head() %>% .$implicit_copy) expect_false(dt %>% head() %>% mutate(y = 1) %>% .$needs_copy) }) test_that("properly copies with chained operations, #210", { dt <- lazy_dt(data.table(x = 1)) query <- dt %>% mutate(z1 = 1) %>% summarize(z2 = 2) %>% mutate(z3 = 4) expect_true(query$implicit_copy) expect_true(query$needs_copy) }) # dplyr verbs ------------------------------------------------------------- test_that("generates single calls as expect", { dt <- lazy_dt(data.table(x = 1), "DT") expect_equal( dt %>% mutate(x2 = x * 2) %>% show_query(), expr(copy(DT)[, `:=`(x2 = x * 2)]) ) expect_equal( dt %>% group_by(x) %>% mutate(x2 = x * 2) %>% show_query(), expr(copy(DT)[, `:=`(x2 = x * 2), by = .(x)]) ) }) test_that("mutate generates compound expression if needed", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_equal( dt %>% mutate(x2 = x * 2, x4 = x2 * 2) %>% show_query(), expr(copy(DT)[, c("x2", "x4") := { x2 <- x * 2 x4 <- x2 * 2 .(x2, x4) }]) ) }) test_that("allows multiple assignment to the same variable", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") # when nested expect_equal( dt %>% mutate(x = x * 2, x = x * 2) %>% show_query(), expr(copy(DT)[, c("x") := { x <- x * 2 x <- x * 2 .(x) }]) ) # when not nested expect_equal( dt %>% mutate(z = 2, z = 3) %>% show_query(), expr(copy(DT)[, `:=`(c("z"), { z <- 2 z <- 3 .(z) })]) ) }) test_that("can use across", { dt <- lazy_dt(data.table(x = 1, y = 2), "DT") expect_equal( dt %>% mutate(across(everything(), ~ . + 1)) %>% show_query(), expr(copy(DT)[, `:=`(x = x + 1, y = y + 1)]) ) expect_equal( dt %>% mutate(across(.fns = ~ . + 1)) %>% show_query(), expr(copy(DT)[, `:=`(x = x + 1, y = y + 1)]) ) }) test_that("across() can access previously created variables", { dt <- lazy_dt(data.frame(x = 1), "DT") step <- mutate(dt, y = 2, across(y, sqrt)) expect_equal( collect(step), tibble(x = 1, y = sqrt(2)) ) expect_equal( step$vars, c("x", "y") ) expect_equal( show_query(step), expr(copy(DT)[, `:=`(c("y"), { y <- 2 y <- sqrt(y) .(y) })]) ) }) test_that("vars set correctly", { dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) expect_equal(dt %>% mutate(z = 1) %>% .$vars, c("x", "y", "z")) expect_equal(dt %>% mutate(x = NULL, z = 1) %>% .$vars, c("y", "z")) }) test_that("empty mutate returns input", { dt <- lazy_dt(data.frame(x = 1)) expect_equal(mutate(dt), dt) expect_equal(mutate(dt, !!!list()), dt) }) test_that("unnamed arguments matching column names are ignored", { dt <- lazy_dt(data.frame(x = 1), "DT") expect_identical(mutate(dt, x), dt) expect_snapshot(mutate(dt, y), error = TRUE) }) test_that("new columns take precedence over global variables", { dt <- lazy_dt(data.frame(x = 1), "DT") y <- 'global var' step <- mutate(dt, y = 2, z = y + 1) expect_equal( collect(step), tibble(x = 1, y = 2, z = 3) ) expect_equal( show_query(step), expr(copy(DT)[, `:=`(c("y", "z"), { y <- 2 z <- y + 1 .(y, z) })]) ) }) test_that("works with `.by`", { dt <- lazy_dt(data.table(x = 1:3, y = c("a", "a", "b"))) step <- dt %>% mutate(row_num = row_number(), .by = y) expect_equal(as_tibble(step), tibble(x = 1:3, y = c("a", "a", "b"), row_num = c(1, 2, 1))) expect_true(length(step$groups) == 0) }) test_that("Using `.by` doesn't group prior step, #439", { dt <- lazy_dt(data.table(x = 1:3, y = c(1, 1, 2), z = 1), "DT") res <- dt %>% select(x, y) %>% mutate(row_num = row_number(), .by = y) %>% filter(row_num < 3, .by = y) %>% as.data.frame() # Note: Why this test catches the potential error... # data.frames/data.tables allow duplicate column names. # If using `.by` affected the `select` step, data.table would duplicate the "y" column. # and there would therefore be two "y" columns in the result. expect_equal(names(res), c("x", "y", "row_num")) }) # var = NULL ------------------------------------------------------------- test_that("var = NULL works when var is in original data", { dt <- lazy_dt(data.frame(x = 1), "DT") step <- dt %>% mutate(x = 2, z = x*2, x = NULL) expect_equal( collect(step), tibble(z = 4) ) expect_equal( step$vars, "z" ) expect_equal( show_query(step), expr(copy(DT)[, `:=`(c("x", "z"), { x <- 2 z <- x * 2 .(x, z) })][, `:=`("x", NULL)] ) ) }) test_that("var = NULL when var is in final output", { dt <- lazy_dt(data.frame(x = 1), "DT") step <- mutate(dt, y = NULL, y = 3) expect_equal( collect(step), tibble(x = 1, y = 3) ) expect_equal( step$vars, c("x", "y") ) expect_equal( show_query(step), expr(copy(DT)[, `:=`(c("y"), { y <- NULL y <- 3 .(y) })]) ) }) test_that("temp var with nested arguments", { dt <- lazy_dt(data.frame(x = 1), "DT") step <- mutate(dt, y = 2, z = y*2, y = NULL) expect_equal( collect(step), tibble(x = 1, z = 4) ) expect_equal( step$vars, c("x", "z") ) expect_equal( show_query(step), expr(copy(DT)[, `:=`(c("y", "z"), { y <- 2 z <- y * 2 .(y, z) })][, `:=`("y", NULL)]) ) }) test_that("temp var with no new vars added", { dt <- lazy_dt(data.frame(x = 1), "DT") # when no other vars are added step <- mutate(dt, y = 2, y = NULL) expect_equal( collect(step), tibble(x = 1) ) expect_equal( step$vars, "x" ) expect_equal( show_query(step), expr(copy(DT)[, `:=`(c("y"), { y <- 2 .(y) })][, `:=`("y", NULL)]) ) }) test_that("var = NULL works when data is grouped", { dt <- lazy_dt(data.frame(x = 1, g = 1), "DT") %>% group_by(g) # when var is not in original data step <- mutate(dt, y = 2, z = y*2, y = NULL) expect_equal( collect(step), tibble(x = 1, g = 1, z = 4) %>% group_by(g) ) expect_equal( step$vars, c("x", "g", "z") ) expect_equal( show_query(step), expr(copy(DT)[, `:=`(c("y", "z"), { y <- 2 z <- y * 2 .(y, z) }), by = .(g)][, `:=`("y", NULL)]) ) # when var is in original data step <- dt %>% mutate(x = 2, z = x*2, x = NULL) expect_equal( collect(step), tibble(g = 1, z = 4) %>% group_by(g) ) expect_equal( step$vars, c("g", "z") ) expect_equal( show_query(step), expr(copy(DT)[, `:=`(c("x", "z"), { x <- 2 z <- x * 2 .(x, z) }), by = .(g)][, `:=`("x", NULL)]) ) }) # .before and .after ----------------------------------------------------------- test_that("can use .before and .after to control column position", { dt <- lazy_dt(data.frame(x = 1, y = 2)) expect_named( mutate(dt, z = 1) %>% as_tibble(), c("x", "y", "z") ) expect_named( mutate(dt, z = 1, .before = x) %>% as_tibble(), c("z", "x", "y") ) expect_named( mutate(dt, z = 1, .after = x) %>% as_tibble(), c("x", "z", "y") ) # but doesn't affect order of existing columns expect_named( mutate(dt, x = 1, .after = y) %>% as_tibble(), c("x", "y") ) }) # .before and .after ----------------------------------------------------------- test_that(".keep = 'unused' keeps variables explicitly mentioned", { df <- data.table(x = 1, y = 2) out <- df %>% mutate(x1 = x + 1, y = y, .keep = "unused") %>% as.data.table() expect_named(out, c("y", "x1")) }) test_that(".keep = 'used' not affected by across()", { df <- data.table(x = 1, y = 2, z = 3, a = "a", b = "b", c = "c") # This must evaluate every column in order to figure out if should # be included in the set or not, but that shouldn't be counted for # the purposes of "used" variables out <- df %>% mutate(across(c(x, y, z), identity), .keep = "unused") %>% as.data.table() expect_named(out, names(df)) }) test_that(".keep = 'used' keeps variables used in expressions", { df <- data.table(a = 1, b = 2, c = 3, x = 1, y = 2) out <- df %>% mutate(xy = x + y, .keep = "used") %>% as.data.table() expect_named(out, c("x", "y", "xy")) }) test_that(".keep = 'none' only keeps grouping variables", { df <- data.table(x = 1, y = 2) gf <- df %>% group_by(x) out1 <- df %>% mutate(z = 1, .keep = "none") %>% as.data.table() expect_named(out1, "z") out2 <- gf %>% mutate(z = 1, .keep = "none") %>% as.data.table() expect_named(out2, c("x", "z")) }) test_that(".keep = 'none' retains original ordering", { df <- data.table(x = 1, y = 2) out1 <- df %>% mutate(y = 1, x = 2, .keep = "none") %>% as.data.table() expect_named(out1, c("x", "y")) # even when grouped out2 <- df %>% group_by(x) %>% mutate(y = 1, x = 2, .keep = "none") %>% as.data.table() expect_named(out2, c("x", "y")) }) test_that("works with empty dots", { df <- data.table(x = 1, y = 2) out <- df %>% mutate(.keep = "used") %>% as.data.table() expect_equal(ncol(out), 0) }) test_that("works with trivial dots", { out <- lazy_dt(mtcars) %>% mutate(mpg, .keep = 'used') %>% as.data.table() expect_named(out, "mpg") })