## for R_DEFAULT_PACKAGES=NULL : library(stats) library(utils) library(Matrix) ## This is example(sp....) -- much extended mEQ <- function(x, y, check.attributes = NA, ...) { ## first drop columns from y which are all 0 : if(any(i0 <- colSums(abs(x)) == 0)) { message(gettextf("x had %d zero-columns", sum(i0))) x <- x[, !i0, drop = FALSE] } if(any(i0 <- colSums(abs(y)) == 0)) { message(gettextf("y had %d zero-columns", sum(i0))) y <- y[, !i0, drop = FALSE] } isTRUE(all.equal(x, y, tolerance = 0, check.attributes = check.attributes, ...)) } ##' Is sparse.model.matrix() giving the "same" as dense model.matrix() ? ##' ##' @return logical ##' @param frml formula ##' @param dat data frame ##' @param showFactors ##' @param ... further arguments passed to {sparse.}model.matrix() isEQsparseDense <- function(frml, dat, showFactors = isTRUE(getOption("verboseSparse")), ...) { ## Author: Martin Maechler, Date: 21 Jul 2009 stopifnot(inherits(frml, "formula"), is.data.frame(dat)) if(showFactors) print(attr(terms(frml, data=dat), "factors")) smm <- sparse.model.matrix(frml, dat, ...) mm <- model.matrix(frml, dat, ...) sc <- smm@contrasts mEQ(as(smm, "generalMatrix"), Matrix(mm, sparse=TRUE)) & identical(smm@assign, attr(mm, "assign")) & (if(is.null(mc <- attr(mm, "contrasts"))) length(sc) == 0 else identical(sc, mc)) } ### ------------ all the "datasets" we construct for use ------------- dd <- data.frame(a = gl(3,4), b = gl(4,1,12))# balanced 2-way (dd3 <- cbind(dd, c = gl(2,6), d = gl(3,8))) dd. <- dd3[- c(1, 13:15, 17), ] set.seed(17) dd4 <- cbind(dd, c = gl(2,6), d = gl(8,3)) dd4 <- cbind(dd4, x = round(rnorm(nrow(dd4)), 1)) dd4 <- dd4[- c(1, 13:15, 17), ] ##-> 'd' has unused levels dM <- dd4 dM$X <- outer(10*rpois(nrow(dM), 2), 1:3) dM$Y <- cbind(pmax(0, dM$x - .3), floor(4*rnorm(nrow(dM)))) str(dM)# contains *matrices* options("contrasts") # the default: "contr.treatment" op <- options(sparse.colnames = TRUE) # for convenience stopifnot(identical(## non-sensical, but "should work" (with a warning each): sparse.model.matrix(a~ 1, dd), sparse.model.matrix( ~ 1, dd))) sparse.model.matrix(~ a + b, dd, contrasts.arg = list(a="contr.sum")) sparse.model.matrix(~ a + b, dd, contrasts.arg = list(b="contr.SAS")) xm <- sparse.model.matrix(~ x, dM) # {no warning anymore ...} dxm <- Matrix(model.matrix(~ x, dM), sparse=TRUE) stopifnot(is(xm, "sparseMatrix"), mEQ(as(xm,"generalMatrix"), dxm)) ## Sparse method is equivalent to the traditional one : stopifnot(isEQsparseDense(~ a + b, dd), suppressWarnings(isEQsparseDense(~ x, dM)), isEQsparseDense(~ 0 + a + b, dd), identical(sparse.model.matrix(~ 0 + a + b, dd), sparse.model.matrix(~ -1 + a + b, dd)), isEQsparseDense(~ a + b, dd, contrasts.arg = list(a="contr.sum")), isEQsparseDense(~ a + b, dd, contrasts.arg = list(a="contr.SAS")), ## contrasts as *functions* or contrast *matrices* : isEQsparseDense(~ a + b, dd, contrasts.arg = list( a=contr.sum, b=contr.treatment(4))), isEQsparseDense(~ a + b, dd, contrasts.arg = list( a=contr.SAS(3), b = function(n, contr=TRUE, sparse=FALSE) contr.sum(n=n, contrasts=contr, sparse=sparse)))) sm <- sparse.model.matrix(~a * b, dd, contrasts.arg = list(a=contr.SAS(3, sparse=TRUE))) sm ## FIXME: Move part of this to ../../MatrixModels/tests/ ##stopifnot(all(sm == model.Matrix( ~a * b, dd, contrasts= list(a= contr.SAS(3))))) ## stopifnot(isEQsparseDense(~ a + b + c + d, dd.)) stopifnot(isEQsparseDense(~ a + b:c + c + d, dd.)) ## no intercept -- works too stopifnot(isEQsparseDense(~ -1+ a + b + c + d, dd.)) stopifnot(isEQsparseDense(~ 0 + a + b:c + c + d, dd.)) Sparse.model.matrix <- function(...) { s <- sparse.model.matrix(...) as(s, "generalMatrix")# dropping 'assign',.. slots } ## dim(mm <- Matrix(model.matrix(~ a + b + c + d, dd4), sparse=TRUE)) dim(sm <- Sparse.model.matrix(~ a + b + c + d, dd4)) ## was (19 13), when 'drop.unused.levels' was implicitly TRUE dim(sm. <- Sparse.model.matrix(~ a + b + c + d, dd4, drop.unused.levels=TRUE)) stopifnot(mEQ(sm , mm), ## (both have a zero column) mEQ(sm., mm)) ## << that's ok, since mm has all-0 column ! ## look at this : all(mm[,"d5"] == 0) ## !!!! --- correct: a column of all 0 <--> dropped level! stopifnot(all.equal(sm., mm[, - which("d5" == colnames(mm))], ## indeed ! check.attributes = NA)) ## i.e., sm has just dropped an all zero column --- which it should! stopifnot(isEQsparseDense(~ 1 + sin(x) + b*c + a:x, dd4, showFactors=TRUE)) stopifnot(isEQsparseDense(~ I(a) + b*c + a:x, dd4, showFactors=TRUE)) ## no intercept -- works too stopifnot(isEQsparseDense(~ 0+ I(a) + b*c + a:x, dd4, showFactors=TRUE)) f <- ~ 1 + a + b*c + a*x attr(terms(f, data=dd4), "factors") dim(mm <- Matrix(model.matrix(f, data=dd4), sparse=TRUE)) dim(sm <- Sparse.model.matrix(f, data=dd4)) # == stopifnot(mEQ(sm, mm)) f <- ~ a*X + X*Y + a*c attr(terms(f, data=dM), "factors") dim(mm <- Matrix(model.matrix(f, data=dM), sparse=TRUE)) dim(sm <- Sparse.model.matrix(f, data=dM, verbose=TRUE)) stopifnot(mEQ(sm, mm)) ## high order f <- ~ a:b:X:c:Y mm <- Matrix(model.matrix(f, data=dM), sparse=TRUE) sm <- Sparse.model.matrix(f, data=dM, verbose=2) stopifnot(mEQ(sm, mm)) f <- ~ 1 + a + b*c + a*x + b*d*x + b:c:d attr(terms(f, data=dd4), "factors") dim(mm <- Matrix(model.matrix(f, data=dd4), sparse=TRUE)) ## 19 100 dim(sm <- Sparse.model.matrix(f, data=dd4)) ## (ditto) dim(sm. <- Sparse.model.matrix(f, data=dd4, drop.unused.levels=TRUE)) # 19 88 stopifnot(mEQ(sm, mm), mEQ(sm., mm))# {32, 32; 20 and 32 zero-columns ..} ## now get a bit courageous: ## ## stopifnot(isEQsparseDense(~ 1 + c + a:b:d, dat=dd4)) dim(mm <- Matrix(model.matrix(~ 1 + a + b*c + a:b:c:d, data=dd4), sparse=TRUE)) ## 19 202 dim(sm <- Sparse.model.matrix(~ 1 + a + b*c + a:b:c:d, data=dd4)) dim(sm. <- Sparse.model.matrix(~ 1 + a + b*c + a:b:c:d, data=dd4, drop.unused.levels=TRUE)) stopifnot(mEQ(sm, mm), mEQ(sm., mm))# {173, 173, 149 and 173 zero-columns !} ## stopifnot(isEQsparseDense(~ 1 + a + b*c + a:b:c:d, dat=dd4)) dim(mm <- Matrix(model.matrix(~ 1 + a + b:c + a:b:d, data=dd4), sparse=TRUE)) ## 19 107 dim(sm <- Sparse.model.matrix(~ 1 + a + b:c + a:b:d, data=dd4)) dim(sm. <- Sparse.model.matrix(~ 1 + a + b:c + a:b:d, data=dd4, drop.unused.levels=TRUE)) stopifnot(mEQ(sm, mm), mEQ(sm., mm)) dim(mm <- Matrix(model.matrix(~ a*b*c +c*d, dd4), sparse=TRUE)) ## 19 38 dim(sm <- Sparse.model.matrix(~ a*b*c +c*d, dd4))# (ditto) dim(sm. <- Sparse.model.matrix(~ a*b*c +c*d, dd4, drop.unused.levels=TRUE)) stopifnot(mEQ(sm, mm), mEQ(sm., mm)) f1 <- ~ (a+b+c+d)^2 + (a+b):c:d + a:b:c:d f2 <- ~ (a+b+c+d)^4 - a:b:c - a:b:d mm1 <- Matrix(model.matrix(f1, dd4), sparse=TRUE) dim(mm2 <- Matrix(model.matrix(f2, dd4), sparse=TRUE)) sm1 <- sparse.model.matrix(f1, dd4) dim(sm2 <- sparse.model.matrix(f2, dd4)) s.1 <- sparse.model.matrix(f1, dd4, drop.unused.levels=TRUE) dim(s.2 <- sparse.model.matrix(f2, dd4, drop.unused.levels=TRUE)) stopifnot(identical(mm1,mm2), identical(sm1,sm2), identical(s.1,s.2), mEQ(sm1,mm1), mEQ(s.1,mm1)) str(dd <- data.frame(d = gl(10,6), a = ordered(gl(3,20)))) X. <- sparse.model.matrix(~ a + d, data = dd) ## failed because of contr.poly default in Matrix 0.999375-33 stopifnot(dim(X.) == c(60, 12), nnzero(X.) == 234, isEQsparseDense(~ 0 + d + I(as.numeric(d)^2), dd)) ## I(.) failed (upto 2010-05-07) ## When the *contrasts* are sparse : spC <- as(contrasts(dd$d), "sparseMatrix") ddS <- dd contrasts(ddS$d) <- spC Xs <- sparse.model.matrix(~ a + d, data=ddS) stopifnot(exprs = { inherits(spC, "sparseMatrix") identical(spC, contrasts(ddS[,"d"])) mEQ(X., Xs) }) ## Fixing matrix-Bugs [#6673] by Davor Josipovic df <- data.frame('a' = factor(1:3), 'b' = factor(4:6)) Cid <- lapply(df, contrasts, contrasts=FALSE) CidS <- lapply(df, contrasts, contrasts=FALSE, sparse=TRUE) X2 <- sparse.model.matrix(~ . -1, data = df, contrasts.arg = Cid) X2S <- sparse.model.matrix(~ . -1, data = df, contrasts.arg = CidS) X2 stopifnot(all.equal(X2, X2S, tolerance = 0, check.attributes = NA)) ## X2S was missing the last column ('b6') in Matrix <= 1.x-y ## Fixing (my repr.ex.) of Matrix bug [#6657] by Nick Hanewinckel mkD <- function(n, p2 = 2^ceiling(log2(n)), sd = 10, rf = 4) { stopifnot(p2 >= n, n >= 0, p2 %% 2 == 0) G <- gl(2, p2/2, labels=c("M","F"))[sample.int(p2, n)] data.frame(sex = G, age = round(rf*rnorm(n, mean=32 + 2*as.numeric(G), sd=sd)) / rf) } set.seed(101) D1 <- mkD(47) Xs <- sparse.model.matrix(~ sex* poly(age, 2), data = D1) ## Error in model.spmatrix(..): no slot of name "i" for .. class "dgeMatrix" validObject(Xs) stopifnot(exprs = { identical(c(47L, 6L), dim(Xs)) identical(colnames(Xs)[3:6], c(1:2, outer("sexF", 1:2, paste, sep=":"))) all(Xs == model.matrix(~ sex* poly(age, 2), data = D1)) }) cat('Time elapsed: ', proc.time(),'\n') # for ``statistical reasons'' if(!interactive()) warnings()