context("test_group_by") library("dplyr") library("tidylog") test_that("join", { expect_message({ out <- tidylog::inner_join(dplyr::band_members, dplyr::band_instruments, by = "name") }) expect_equal(out, dplyr::inner_join(dplyr::band_members, dplyr::band_instruments, by = "name")) expect_message({ out <- tidylog::full_join(dplyr::band_members, dplyr::band_instruments, by = "name") }) expect_equal(out, dplyr::full_join(dplyr::band_members, dplyr::band_instruments, by = "name")) expect_message({ out <- tidylog::left_join(dplyr::band_members, dplyr::band_instruments, by = "name") }) expect_equal(out, dplyr::left_join(dplyr::band_members, dplyr::band_instruments, by = "name")) expect_message({ out <- tidylog::right_join(dplyr::band_members, dplyr::band_instruments, by = "name") }) expect_equal(out, dplyr::right_join(dplyr::band_members, dplyr::band_instruments, by = "name")) expect_message({ out <- tidylog::anti_join(dplyr::band_members, dplyr::band_instruments, by = "name") }) expect_equal(out, dplyr::anti_join(dplyr::band_members, dplyr::band_instruments, by = "name")) expect_message({ out <- tidylog::semi_join(dplyr::band_members, dplyr::band_instruments, by = "name") }) expect_equal(out, dplyr::semi_join(dplyr::band_members, dplyr::band_instruments, by = "name")) expect_silent({ out <- dplyr::inner_join(dplyr::band_members, dplyr::band_instruments, by = "name") out <- dplyr::full_join(dplyr::band_members, dplyr::band_instruments, by = "name") out <- dplyr::left_join(dplyr::band_members, dplyr::band_instruments, by = "name") out <- dplyr::right_join(dplyr::band_members, dplyr::band_instruments, by = "name") out <- dplyr::anti_join(dplyr::band_members, dplyr::band_instruments, by = "name") out <- dplyr::semi_join(dplyr::band_members, dplyr::band_instruments, by = "name") }) }) test_that("join: argument order", { expect_message({ out <- tidylog::inner_join(y = dplyr::band_instruments, x = dplyr::band_members) }) expect_equal(out, dplyr::inner_join(y = dplyr::band_instruments, x = dplyr::band_members)) expect_equal(out, dplyr::inner_join(x = dplyr::band_members, y = dplyr::band_instruments)) expect_message({ out <- tidylog::inner_join(by = "name", x = dplyr::band_members, y = dplyr::band_instruments) }) expect_equal(out, dplyr::inner_join(by = "name", x = dplyr::band_members, y = dplyr::band_instruments)) }) # adapted from dplyr tests a <- data.frame(x = c(1, 1, 2, 3), y = 1:4) b <- data.frame(x = c(1, 2, 2, 4), z = 1:4) test_that("univariate inner join has all columns, repeated matching rows", { msg <- capture_messages({ j <- tidylog::inner_join(a, b, "x", relationship = "many-to-many") }) # one row from x not included expect_match(msg, "rows only in a\\s*\\(1\\)", all = FALSE) # one row from y not included expect_match(msg, "rows only in b\\s*\\(1\\)", all = FALSE) # four rows matched, including duplicates expect_match(msg, "matched rows\\s*4", all = FALSE) expect_match(msg, "duplicate", all = FALSE) # total rows expect_match(msg, paste0("rows total\\s*", nrow(j)), all = FALSE) }) test_that("univariate semi join has x columns, matching rows", { msg <- capture_messages({ j <- tidylog::semi_join(a, b, "x") }) # one row from x not included expect_match(msg, "rows only in a\\s*\\(1\\)", all = FALSE) # one row from y not included expect_match(msg, "rows only in b\\s*\\(1\\)", all = FALSE) # three rows matched, no duplication expect_match(msg, "matched rows\\s*3", all = FALSE) expect_true(!any(grepl("duplicate", msg))) # total rows expect_match(msg, paste0("rows total\\s*", nrow(j)), all = FALSE) }) test_that("univariate full join has all columns, all rows", { msg <- capture_messages({ j <- tidylog::full_join(a, b, "x", relationship = "many-to-many") }) # one row from x included expect_match(msg, "rows only in a\\s*1", all = FALSE) # one row from y included expect_match(msg, "rows only in b\\s*1", all = FALSE) # four rows matched, including duplicates expect_match(msg, "matched rows\\s*4", all = FALSE) expect_match(msg, "duplicate", all = FALSE) # total rows expect_match(msg, paste0("rows total\\s*", nrow(j)), all = FALSE) }) test_that("univariate left join has all columns, all rows", { msg <- capture_messages({ j <- tidylog::left_join(a, b, "x", relationship = "many-to-many") }) # one row from x included expect_match(msg, "rows only in a\\s*1", all = FALSE) # one row from y not included expect_match(msg, "rows only in b\\s*\\(1\\)", all = FALSE) # four rows matched, including duplicates expect_match(msg, "matched rows\\s*4", all = FALSE) expect_match(msg, "duplicate", all = FALSE) # total rows expect_match(msg, paste0("rows total\\s*", nrow(j)), all = FALSE) }) test_that("univariate right join has all columns, all rows", { msg <- capture_messages({ j <- tidylog::right_join(a, b, "x", relationship = "many-to-many") }) # one row from x not included expect_match(msg, "rows only in a\\s*\\(1\\)", all = FALSE) # one row from y included expect_match(msg, "rows only in b\\s*1", all = FALSE) # four rows matched, including duplicates expect_match(msg, "matched rows\\s*4", all = FALSE) expect_match(msg, "duplicate", all = FALSE) # total rows expect_match(msg, paste0("rows total\\s*", nrow(j)), all = FALSE) }) test_that("univariate anti join has x columns, missing rows", { msg <- capture_messages({ j <- tidylog::anti_join(a, b, "x") }) # one row from x included expect_match(msg, "rows only in a\\s*1", all = FALSE) # one row from y not included expect_match(msg, "rows only in b\\s*\\(1\\)", all = FALSE) # three matched rows not included expect_match(msg, "matched rows\\s*\\(3\\)", all = FALSE) # total rows expect_match(msg, paste0("rows total\\s*", nrow(j)), all = FALSE) }) # edge cases for duplication test_that("no duplication", { a <- data.frame(x = c(1, 2, 3, 5), y = 1:4) b <- data.frame(x = c(1, 2, 3, 4), z = 1:4) msg <- capture_messages(tidylog::inner_join(a, b, "x")) expect_true(!any(grepl("duplicate", msg))) msg <- capture_messages(tidylog::full_join(a, b, "x")) expect_true(!any(grepl("duplicate", msg))) msg <- capture_messages(tidylog::left_join(a, b, "x")) expect_true(!any(grepl("duplicate", msg))) }) get_indices <- function(msg) { matched <- gregexpr(pattern =',', msg[c(2, 3, 4, 6)]) unique(sapply(matched, function(x) x[[1]])) } test_that("correct alignment", { msg <- function(fun) { capture_messages({ j <- fun(nycflights13::flights, nycflights13::weather, by = c("year", "month", "day", "origin", "hour", "time_hour")) }) } expect_equal(length(get_indices(msg(inner_join))), 1) expect_equal(length(get_indices(msg(semi_join))), 1) expect_equal(length(get_indices(msg(left_join))), 1) expect_equal(length(get_indices(msg(right_join))), 1) expect_equal(length(get_indices(msg(full_join))), 1) expect_equal(length(get_indices(msg(anti_join))), 1) }) test_that("correct alignment -- long df name", { msg <- function(fun) { capture_messages({ verylongnameverylongnameverylongname <- nycflights13::flights j <- fun(verylongnameverylongnameverylongname, nycflights13::weather, by = c("year", "month", "day", "origin", "hour", "time_hour")) }) } expect_equal(length(get_indices(msg(inner_join))), 1) expect_equal(length(get_indices(msg(semi_join))), 1) expect_equal(length(get_indices(msg(left_join))), 1) expect_equal(length(get_indices(msg(right_join))), 1) expect_equal(length(get_indices(msg(full_join))), 1) expect_equal(length(get_indices(msg(anti_join))), 1) msg <- function(fun) { capture_messages({ verylongnameverylongnameverylongname <- nycflights13::weather j <- fun(nycflights13::flights, verylongnameverylongnameverylongname, by = c("year", "month", "day", "origin", "hour", "time_hour")) }) } expect_equal(length(get_indices(msg(inner_join))), 1) expect_equal(length(get_indices(msg(semi_join))), 1) expect_equal(length(get_indices(msg(left_join))), 1) expect_equal(length(get_indices(msg(right_join))), 1) expect_equal(length(get_indices(msg(full_join))), 1) expect_equal(length(get_indices(msg(anti_join))), 1) })