skip_on_cran() require(testthat) require(EdSurvey) options(width = 500) options(useFancyQuotes = FALSE) options(digits = 7) context("merge with NAEPprimer") # When this fails all regression tests are invalid. test_that("merge with NAEPprimer", { sdf <- readNAEP(system.file("extdata/data", "M36NT2PM.dat", package = "NAEPprimer")) sdf$pseudoID <- 10 * seq_along(sdf$scrpsu) # file doesn't have a true 'ID' field. for NAEP be mindful of 'defaultConditions' as that impacts size of 'cache' assignment but not nrow/dim tmpDF <- data.frame( pseudoID = sdf$pseudoID, otherID = sdf$pseudoID, tVar1 = ifelse((seq_along(sdf$pseudoID) %% 2) == 0, "Even", "Odd"), tVar2 = ifelse((seq_along(sdf$pseudoID) %% 7) == 0, "Seven", "Not Seven"), knownVar1 = sdf$dsex, knownVar2 = sdf$origwt ) # shuffle the dataframe by rows for testing tmpDF <- tmpDF[sample(1:nrow(tmpDF)), ] # test - normal merge (ensure vectors are ordered properly on return) res <- merge(sdf, tmpDF, by = "pseudoID") expect_equal(res$knownVar1, res$dsex) expect_equal(res$knownVar2, res$origwt) # shuffle the dataframe by rows for testing tmpDF <- tmpDF[sample(1:nrow(tmpDF)), ] # test - different by.x and by.y res <- merge(sdf, tmpDF, by.x = "pseudoID", by.y = "otherID") expect_equal(res$knownVar1, res$dsex) expect_equal(res$knownVar2, res$origwt) # test - smaller sample smallerSamp <- sample(1:nrow(tmpDF), 100) tmpDF2 <- tmpDF[smallerSamp, ] withr::with_options(list(digits = 7, scipen = 999), { res <- merge(sdf, tmpDF2, by.x = "pseudoID", by.y = "otherID", all.x = FALSE, all.y = TRUE, suffixes = c("", ".dupe")) expect_equal(table(res$knownVar1), table(tmpDF2$knownVar1)) expect_equal(summary(res$knownVar2[!is.na(res$knownVar2)]), summary(tmpDF2$knownVar2)) # res will have many NA values with small merge }) }) context("merge with TIMSS") # When this fails all regression tests are invalid. test_that("merge with TIMSS", { skip_on_cran() usa <- readTIMSS(file.path(edsurveyHome, "TIMSS", "2015"), "usa", 4, verbose = FALSE) #acbg20 = years principal at school schDat <- getData(usa, c("idschool", "acbg20"), dropOmittedLevels = FALSE) schDat$Prin10 <- schDat$acbg20 > 10 schDat <- unique(subset(schDat, Prin10 == TRUE)) # drop to small subset (22 records) # test merge without idstud being the cache usa2 <- merge(usa, schDat, by = "idschool") expect_equal(ifelse(usa$acbg20 > 10, TRUE, NA), usa2$Prin10) })