suppressWarnings(RNGversion("3.5")) library(testthat) library(rpf) library(mvtnorm) options(mc.cores=1L) #otherwise not picked up by covr context("chen & thissen 1997") test_that("collapseCategoricalCells", { O = matrix(c(7,31,42,20,0), 1,5, dimnames=list('a',letters[1:5])) E = matrix(c(3,39,50,8,0), 1,5, dimnames=list('a',letters[1:5])) got <- collapseCategoricalCells(O,E,9) expect_equal(O['a','a'],7) expect_true(is.na(got$O['a','a'])) expect_equal(got$collapsed, 2) expect_equal(E[,letters[2:3]], got$E[,letters[2:3]]) }) mxSimplify2Array <- function(x, higher=FALSE) { if (higher) { stop("higher=TRUE is not implemented. Consider using simplify2array") } len <- sapply(x, length) biggest <- which(len == max(len))[1] out <- matrix(NA, nrow=max(len), ncol=length(x)) for (iter in 1:length(x)) { if (len[iter] == 0) next out[1:len[iter],iter] <- x[[iter]] } colnames(out) <- names(x) rownames(out) <- names(x[[biggest]]) out } test_that("ct1997", { set.seed(1) spec <- list() spec[1:5] <- list(rpf.grm(factors=2)) spec[6] <- list(rpf.drm(factors=2)) gen.param <- mxSimplify2Array(lapply(spec, rpf.rparam, version=1)) colnames(gen.param) <- paste("i", 1:ncol(gen.param), sep="") gen.param[2,] <- c(0,0,.5,.5,1,1) theta <- rmvnorm(1000, c(0,0), diag(2)) resp <- rpf.sample(t(theta), spec, gen.param) # hide latent factor that we don't know about tspec <- list() tspec[1:length(spec)] <- lapply(spec, rpf.modify, factors=1) grp <- list(spec=tspec, param=gen.param[-2,], mean=c(0), cov=diag(1), data=resp) got <- ChenThissen1997(grp) #cat(deparse(round(got$pval[!is.na(got$pval)], 2))) expect_equal(got$pval[!is.na(got$pval)], c(-1.31, -0.35, 3.15, 5.7, -5.66, 0.54, 3.43, -5.45, -0.57, 7.5, 8.11, 1.78, 15.37, 2.79, 5.68), tolerance=.001) #cat(deparse(round(got$gamma[!is.na(got$gamma)], 3))) expect_equal(got$gamma[!is.na(got$gamma)], c(-0.064, -0.002, 0.058, 0.062, -0.197, 0.023, 0.055, -0.008, -0.025, 0.158, 0.13, 0.081, 0.232, 0.02, 0.052), tolerance=.01) }) test_that("ct1997 2tier", { set.seed(1) require(rpf) spec <- list() spec[1:5] <- list(rpf.drm(factors=3)) gen.param <- sapply(spec, rpf.rparam) gen.param['a2', 1:2] <- 0 gen.param['a3', 3] <- 0 gen.param[c('a2','a3'), 4:5] <- 0 colnames(gen.param) <- paste("i", 1:ncol(gen.param), sep="") resp <- rpf.sample(500, spec, gen.param) grp <- list(spec=spec, param=gen.param, mean=runif(3, 0, 1), cov=diag(runif(3,1,2)), data=resp, qpoints=13L, qwidth=4) slow <- ChenThissen1997(grp, .twotier=FALSE) fast <- ChenThissen1997(grp, .twotier=TRUE) expect_equal(slow$raw[!is.na(slow$raw)], fast$raw[!is.na(fast$raw)], tolerance=.001) grp$data <- rpf.sample(200, spec, gen.param, mcar=.2) fast <- ChenThissen1997(grp) got <- fast$pval[,'i1'] names(got) <- NULL # cat(deparse(round(fast$pval[,'i1'],2))) expect_equal(got, c(1.49, 1.34, 2.79, -1.54), tolerance=.1) }) mxSimplify2Array <- function(x) { par <- x len <- sapply(par, length) biggest <- which(len == max(len))[1] out <- matrix(NA, nrow=max(len), ncol=length(par)) for (x in 1:length(par)) { out[1:len[x],x] <- par[[x]] } rownames(out) <- names(par[[biggest]]) out } test_that("ct1997 permutations", { require(rpf) set.seed(1) grp <- list(spec=list()) grp$spec[1:10] <- lapply(sample.int(6, 10, replace=TRUE), function(o) rpf.grm(outcomes=1+o)) grp$param <- mxSimplify2Array(lapply(grp$spec, rpf.rparam)) colnames(grp$param) <- paste("i", 1:10, sep="") grp$mean <- 0 grp$cov <- diag(1) grp$free <- !is.na(grp$param) & grp$param != 0 grp$data <- rpf.sample(500, grp=grp) grp$data <- grp$data[,colnames(grp$data)[sample.int(10)]] ChenThissen1997(grp, inames = colnames(grp$param)[sample.int(10, 9)]) # will stop if something is wrong grp1 <- grp grp1$data <- grp$data[1:250,] grp2 <- grp grp2$data <- grp$data[251:500,] r1 <- ChenThissen1997(grp) r2 <- ChenThissen1997(grp1) + ChenThissen1997(grp2) expect_equal(r1$pval, r2$pval) expect_equal(r1$gamma, r2$gamma) }) drawRandomProportion <- function(expected) { total <- sum(expected) prob <- expected / total sim <- rep(NA, length(expected)) rowSim <- sample.int(length(expected), size=total, prob=prob, replace=TRUE) sim <- tabulate(rowSim, length(expected)) sim } if (0) { spec <- list() spec[1:6] <- rpf.grm(factors=1) gen.param <- sapply(spec, rpf.rparam) grp <- list(spec=spec, param=gen.param, mean=c(0), cov=diag(1)) pair <- c(1L,2L) numPeople <- 185 E <- (numPeople * pairwiseExpected(grp, pair)) ms <- pairwiseItemDistribution(grp, pair) hist(ms) quantile(ms, c(.95, .99)) # 25 38 trials <- 10000 ms <- rep(NA, trials) for (tx in 1:trials) { O <- drawPairwiseSample(grp, pair, numPeople) ms[tx] <- sum((c(E) - O)^2) } hist(ms) quantile(ms, c(.95, .99)) # 25 38 } pearson.gof <- function(observed, expected, df) { x2 <- sum((observed - expected)^2/expected) if (missing(df)) { df <- (dim(observed)[1]-1) * (dim(observed)[2]-1) } pchisq(x2, df, lower.tail=FALSE) } if (0) { spec <- list() spec[1:6] <- rpf.grm(factors=1) gen.param <- sapply(spec, rpf.rparam) grp <- list(spec=spec, param=gen.param, mean=c(0), cov=diag(1)) pair <- c(1L,2L) numPeople <- 500 E <- (numPeople * pairwiseExpected(grp, pair)) trials <- 50 got <- expand.grid(trial=1:trials, method=c("pearson","rms"), pval=NA) for (rep in 1:trials) { O <- drawPairwiseSample(grp, pair, 50, qpts=11L, qwidth=4) # doesn't work! O <- O * numPeople / 50 got[got$trial==rep,'pval'] <- c(pearson.gof(O, E), pairwiseItemTest(grp, pair, O, qpts=11, qwidth=4)) # ptw2011.gof.test(O, E) print(rep) } # require(ggplot2) mask <- got$method=="rms" #mask <- rep(TRUE, nrow(got)) pval <- got[mask,'pval'] got[mask,'pval'] <- 1 / (1+exp(-(logit(pval) - 2.8))) tbl <- expand.grid(alpha=seq(0,1,.01), method=c("pearson","rms"), emp=NA) tbl[tbl$method=="rms",'emp'] <- Vectorize(function(x) sum(got[got$method=="rms",'pval'] < x))(seq(0,1,.01)) tbl[tbl$method=="pearson",'emp'] <- Vectorize(function(x) sum(got[got$method=="pearson",'pval'] < x))(seq(0,1,.01)) tbl$emp <- tbl$emp / trials ggplot(tbl, aes(alpha, emp, color=method)) + geom_line() + geom_abline(slope=1, color="yellow")+ coord_fixed() }