context("Model matrix") set.seed(1, kind="Mersenne-Twister", normal.kind="Inversion") library(survey) data(api) model <- ~ stype + api99 + sch.wide*meals + ell + pct.resp test_that("model.matrix and model_matrix yield same result", { M1 <- model.matrix(model, apipop) M2 <- model_matrix(model, apipop, sparse=FALSE) expect_equivalent(M1, M2) expect_identical(order_interactions(colnames(M1)), order_interactions(colnames(M2))) }) test_that("contrasts as a list work", { expect_equal( model_matrix(model, apipop, contrasts.arg=list(sch.wide=levels(apipop$sch.wide)[1L], stype=levels(apipop$stype)[1L])), model_matrix(model, apipop) ) expect_equal( model_matrix(model, apipop, contrasts.arg=list(stype="E")), model_matrix(model, apipop) ) }) test_that("contrasts work as intended", { n <- 100 dat <- data.frame( f = as.factor(sample(1:3, n, replace=TRUE)) ) expect_equivalent( model.matrix(~ f, dat), model_matrix(~ f, dat, sparse=FALSE) ) expect_equivalent( model.matrix(~ 0 + f, dat), model_matrix(~ 0 + f, dat, sparse=FALSE) ) expect_equivalent( model_matrix(~ f, dat, contrasts.arg="contr.none", sparse=FALSE), cbind(1, model.matrix(~ 0 + f, dat)) ) expect_equal(sum(abs( model_matrix(~ f, dat, contrasts.arg="contr.none", sparse=FALSE) - model_matrix(~ f, dat, contrasts.arg="contr.none", sparse=TRUE) )), 0 ) }) test_that("aggregation with model_matrix works", { expect_equivalent( model_matrix(model, apipop, by=apipop$sch.wide, sparse=FALSE), rowsum(model.matrix(model, apipop), apipop$sch.wide) ) expect_equivalent( as.matrix(model_matrix(model, apipop, by=apipop$sch.wide, sparse=TRUE)), rowsum(model.matrix(model, apipop), apipop$sch.wide) ) }) test_that("matrix term works", { x <- rnorm(3) M <- matrix(1:6, 3, 2) a <- model.matrix(~ M) b <- model_matrix(~ M) expect_equivalent(a, b) expect_identical(colnames(a), colnames(b)) a <- model.matrix(~ x + M + -1) b <- model_matrix(~ x + M + -1) expect_equivalent(a, b) expect_identical(colnames(a), colnames(b)) }) test_that("mix of various term types works", { n <- 100 dat <- data.frame( x = rnorm(n), f = factor(sample(1:10, n, replace=TRUE)), g = factor(sample(1:25, n, replace=TRUE)), st = sample(c("a", "b", "c"), n, replace=TRUE), i = 1:n, d = as.Date(sample(c("21/5/69", "22/7/67"), n, replace=TRUE), "%d/%m/%y"), stringsAsFactors = FALSE ) mod <- ~ d + i + g + st + g:f + x + f + I(g) + I(2*x) + g*x + sin(x) + interaction(g,f) a <- model.matrix(mod, dat) b <- model_matrix(mod, dat, sparse=FALSE) expect_equivalent(a, b) expect_identical(order_interactions(colnames(a)), order_interactions(colnames(b))) a <- model_matrix(mod, dat, sparse=TRUE) expect_equal(sum(abs(a - b)), 0) expect_identical(order_interactions(colnames(a)), order_interactions(colnames(b))) }) n <- 100 data <- data.frame( t = 1:n, x = rnorm(n), z = as.numeric(sample(c(0, 1), n, replace=TRUE)), z0 = rep.int(0, n), z1 = rep.int(1, n), f1 = as.factor(sample(1:10, n, replace=TRUE)), f2 = as.factor(sample(1:16, n, replace=TRUE)), f3 = as.factor(sample(1:7, n, replace=TRUE)), f4 = as.factor(1) ) formulas <- list( ~ 0, ~ 1, ~ x, ~ 0 + x, ~ 0 + exp(x), ~ 1 + offset(x), ~ poly(x, 3), ~ 0 + f1 + f2, ~ 0 + f2 + f1, ~ 0 + f1*f2 + f2*f3 + f1*f3, ~ 0 + x:f2 + f1*f2 + f2*f3 + f1*f3, ~ 0 + f2:f1, ~ 0 + f1:f2 + f3:f2, ~ x*t, ~ I(x*t), ~ x + t*f1*f2 + f1*f2*f3, ~ 0 + t:x:f1:f2, ~ 0 + f1:z, ~ 0 + f1:z0, ~ 0 + f1:z1, ~ f2:z + f2*f1*z + f3:f1*z*z0 + z + z0 + z1 + z1*z:f2 ) test_that("many special case formulas work", { for (mod in formulas) { expect_equivalent( model.matrix(mod, data), model_matrix(mod, data, sparse=FALSE) ) } expect_error(model.matrix(~ f4, data)) expect_warning(model_matrix(~ f4, data)) }) data <- data.frame( Stratum = c("1", "1"), # character/factor variable with a single level S2 = c("2", "2"), C2 = factor(c("1", "2"), levels=1:2), C3 = factor("1", levels=1:2), C4 = factor(c("1", "3"), levels=1:4) ) test_that("single-level factors are allowed and corresponding terms are removed when contrasts are applied", { expect_warning(model_matrix(~ 0 + Stratum + S2, data)) expect_warning(model_matrix(~ 0 + S2 + Stratum + S2, data)) expect_equivalent(suppressWarnings(model_matrix(~ S2 + Stratum + S2, data)), matrix(1, nrow=2)) expect_identical(ncol(suppressWarnings(model_matrix(~ C2 + Stratum + S2 + C2, data))), 2L) expect_identical(ncol(suppressWarnings(model_matrix(~ C3 + Stratum + S2 + C2, data))), 3L) expect_identical(ncol(suppressWarnings(model_matrix(~ C4 + C3 + S2 + Stratum + S2 + C2, data))), 6L) expect_identical(colnames(model_matrix(~ 0 + C4, data)), c("C41", "C42", "C43", "C44")) expect_identical(colnames(model_matrix(~ 0 + C4, data, drop.unused.levels=TRUE)), c("C41", "C43")) expect_warning(model_matrix(~ C4 + C3 + S2 + Stratum + S2 + C2, data, drop.unused.levels=TRUE)) expect_equivalent(suppressWarnings(model_matrix(~ S2*Stratum + S2, data)), matrix(1, nrow=2)) }) test_that("special characters in variable names pose no problem", { dat <- data.frame(runif(10)); names(dat) <- "a[b]" # also for ;,! etc etc expect_equivalent( model.matrix(~ ., dat), model_matrix(~ ., dat, sparse=FALSE) ) })