test_that("distinct equivalent to local unique when keep_all is TRUE", { df <- data.frame( x = c(1, 1, 1, 1), y = c(1, 1, 2, 2), z = c(1, 2, 1, 2) ) expect_equal(duckplyr_distinct(df), unique(df)) }) test_that("distinct for single column works as expected (#1937)", { df <- tibble( x = c(1, 1, 1, 1), y = c(1, 1, 2, 2), z = c(1, 2, 1, 2) ) expect_equal(duckplyr_distinct(df, x, .keep_all = FALSE), unique(df["x"])) expect_equal(duckplyr_distinct(df, y, .keep_all = FALSE), unique(df["y"])) }) test_that("distinct works for 0-sized columns (#1437)", { skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE") df <- tibble(x = 1:10) %>% duckplyr_select(-x) ddf <- duckplyr_distinct(df) expect_equal(ncol(ddf), 0L) }) test_that("if no variables specified, uses all", { df <- tibble(x = c(1, 1), y = c(2, 2)) expect_equal(duckplyr_distinct(df), tibble(x = 1, y = 2)) }) test_that("distinct keeps only specified cols", { df <- tibble(x = c(1, 1, 1), y = c(1, 1, 1)) expect_equal(df %>% duckplyr_distinct(x), tibble(x = 1)) }) test_that("unless .keep_all = TRUE", { df <- tibble(x = c(1, 1, 1), y = 3:1) expect_equal(df %>% duckplyr_distinct(x), tibble(x = 1)) expect_equal(df %>% duckplyr_distinct(x, .keep_all = TRUE), tibble(x = 1, y = 3L)) }) test_that("distinct doesn't duplicate columns", { df <- tibble(a = 1:3, b = 4:6) expect_named(df %>% duckplyr_distinct(a, a), "a") expect_named(df %>% duckplyr_group_by(a) %>% duckplyr_distinct(a), "a") }) test_that("grouped distinct always includes group cols", { df <- tibble(g = c(1, 2), x = c(1, 2)) out <- df %>% duckplyr_group_by(g) %>% duckplyr_distinct(x) expect_named(out, c("g", "x")) }) test_that("empty grouped distinct equivalent to empty ungrouped", { df <- tibble(g = c(1, 2), x = c(1, 2)) df1 <- df %>% duckplyr_distinct() %>% duckplyr_group_by(g) df2 <- df %>% duckplyr_group_by(g) %>% duckplyr_distinct() expect_equal(df1, df2) }) test_that("distinct on a new, mutated variable is equivalent to mutate followed by distinct", { df <- tibble(g = c(1, 2), x = c(1, 2)) df1 <- df %>% duckplyr_distinct(aa = g * 2) df2 <- df %>% duckplyr_mutate(aa = g * 2) %>% duckplyr_distinct(aa) expect_equal(df1, df2) }) test_that("distinct on a new, copied variable is equivalent to mutate followed by distinct (#3234)", { df <- tibble(g = c(1, 2), x = c(1, 2)) df1 <- df %>% duckplyr_distinct(aa = g) df2 <- df %>% duckplyr_mutate(aa = g) %>% duckplyr_distinct(aa) expect_equal(df1, df2) }) test_that("distinct on a dataframe or tibble with columns of type list throws an error", { skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE") df <- tibble( a = c("1", "1", "2", "2", "3", "3"), b = list("A") ) df2 <- data.frame(x = 1:5, y = I(list(1:3, 2:4, 3:5, 4:6, 5:7))) expect_identical(df2 %>% duckplyr_distinct(), df2) expect_identical(df %>% duckplyr_distinct(), df %>% duckplyr_slice(c(1, 3, 5))) }) test_that("distinct handles 0 columns edge case (#2954)", { skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE") d <- duckplyr_select(data.frame(x= c(1, 1)), one_of(character(0))) res <- duckplyr_distinct(d) expect_equal(nrow(res), 1L) expect_equal(nrow(duckplyr_distinct(tibble())), 0L) }) test_that("distinct respects order of the specified variables (#3195, #6156)",{ d <- data.frame(x = 1:2, y = 3:4) expect_named(duckplyr_distinct(d, y, x), c("y", "x")) }) test_that("distinct adds grouping variables to front if missing",{ d <- data.frame(x = 1:2, y = 3:4) expect_named(duckplyr_distinct(duckplyr_group_by(d, y), x), c("y", "x")) expect_named(duckplyr_distinct(duckplyr_group_by(d, y), x, y), c("x", "y")) }) test_that("duckplyr_distinct() understands both NA variants (#4516)", { df <- data.frame(col_a = c(1, NA, NA)) df$col_a <- df$col_a+0 df$col_a[2] <- NA_real_ expect_equal(nrow(duckplyr_distinct(df)), 2L) df_1 <- data.frame(col_a = c(1, NA)) df_2 <- data.frame(col_a = c(1, NA)) df_1$col_a <- df_1$col_a+0 df_2$col_a <- df_2$col_a+0 df_1$col_a[2] <- NA expect_equal(nrow(duckplyr_setdiff(df_1, df_2)), 0L) }) test_that("duckplyr_distinct() handles auto splicing", { skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE") expect_equal( iris %>% duckplyr_distinct(Species), iris %>% duckplyr_distinct(data.frame(Species=Species)) ) expect_equal( iris %>% duckplyr_distinct(Species), iris %>% duckplyr_distinct(pick(Species)) ) expect_equal( iris %>% duckplyr_mutate(across(starts_with("Sepal"), round)) %>% duckplyr_distinct(Sepal.Length, Sepal.Width), iris %>% duckplyr_distinct(across(starts_with("Sepal"), round)) ) }) test_that("distinct preserves grouping", { gf <- duckplyr_group_by(tibble(x = c(1, 1, 2, 2), y = x), x) i <- count_regroups(out <- duckplyr_distinct(gf)) expect_equal(i, 0) expect_equal(duckplyr_group_vars(out), "x") i <- count_regroups(out <- duckplyr_distinct(gf, x = x + 2)) expect_equal(i, 1) expect_equal(duckplyr_group_vars(out), "x") }) test_that("duckplyr_distinct() preserves attributes on bare data frames (#6318)", { df <- vctrs::data_frame(x = c(1, 1)) attr(df, "foo") <- "bar" out <- duckplyr_distinct(df, x) expect_identical(attr(out, "foo"), "bar") out <- duckplyr_distinct(df, y = x + 1L) expect_identical(attr(out, "foo"), "bar") }) # Errors ------------------------------------------------------------------ test_that("distinct errors when selecting an unknown column (#3140)", { skip_if(Sys.getenv("DUCKPLYR_FORCE") == "TRUE") expect_snapshot({ df <- tibble(g = c(1, 2), x = c(1, 2)) (expect_error(df %>% duckplyr_distinct(aa, x))) (expect_error(df %>% duckplyr_distinct(aa, bb))) (expect_error(df %>% duckplyr_distinct(.data$aa))) (expect_error(df %>% duckplyr_distinct(y = a + 1))) }) })