# Copyright 2018 Steven E. Pav. All Rights Reserved.
# Author: Steven E. Pav
# This file is part of ohenery.
#
# ohenery is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ohenery is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with ohenery. If not, see .
# env var:
# nb:
# see also:
# todo:
# changelog:
#
# Created: 2018.09.25
# Copyright: Steven E. Pav, 2018-2019
# Author: Steven E. Pav
# Comments: Steven E. Pav
# helpers#FOLDUP
set.char.seed <- function(str) {
set.seed(as.integer(charToRaw(str)))
}
#UNFOLD
library(dplyr)
library(numDeriv)
context("code runs at all") #FOLDUP
test_that("rsm bits", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 5
set.seed(1234)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
expect_error(mu <- smax(eta, g = g), NA)
set.seed(5234)
expect_error(y1 <- rsm(eta, g = g), NA)
# first, is it deterministic?
set.seed(5234)
expect_error(y2 <- rsm(eta, g = g), NA)
expect_equal(y1, y2)
# secondly, does it also accept mu?
set.seed(5234)
expect_error(y3 <- rsm(g = g, mu = mu), NA)
expect_equal(y1, y3)
# rsm for henery
set.seed(1212)
expect_error(ya <- rsm(g = g, mu = mu, gamma = c(1, 1, 1)), NA)
set.seed(1212)
expect_error(yb <- rsm(g = g, mu = mu), NA)
expect_equal(ya, yb)
# rsm henery: is this ok?
set.seed(1212)
expect_error(ya <- rsm(g = g, mu = mu, gamma = c(1, 0.8, 0.8, 0.8)), NA)
expect_error(ya <- rsm(g = g, mu = mu, gamma = c(1, rep(0.8, 15))), NA)
}) #UNFOLD
test_that("harsmlik bits", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 8
set.seed(321)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
y <- rsm(eta, g = g)
idx <- order(g, y, decreasing = TRUE) - 1
fastval <- attr(harsmlik(g, idx, eta, deleta = X), 'gradient')
numap <- grad(
function(beta, g, idx, X) {
eta <- X %*% beta
as.numeric(harsmlik(g, idx, eta))
},
x = beta,
g = g,
idx = idx,
X = X
)
expect_equal(fastval, numap, tolerance = 0.1)
# now reweight them
wt <- runif(length(g))
wt <- wt / mean(wt) # mean 1 is recommended
expect_error(foores <- harsmlik(g, idx, eta, wt = wt), NA)
}) #UNFOLD
test_that("hensmlik and harsmlik nest", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 8
set.seed(321)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
y <- rsm(eta, g = g)
# hensmlik and harsmlik should return the same for gamma=1
gams <- c(1.0, 1.0, 1.0)
idx <- order(g, y, decreasing = TRUE) - 1
expect_error(
hooval <- hensmlik(g = g, idx = idx, eta = eta, gamma = gams, deleta = X),
NA
)
expect_true(!is.na(hooval))
expect_error(sooval <- harsmlik(g = g, idx = idx, eta = eta, deleta = X), NA)
expect_equal(as.numeric(sooval), as.numeric(hooval))
}) #UNFOLD
test_that("hensmlik bits", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 8
set.seed(321)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
y <- rsm(eta, g = g)
gams <- c(1.0, 1.0, 1.0)
idx <- order(g, y, decreasing = TRUE) - 1
expect_error(
hooval <- hensmlik(g = g, idx = idx, eta = eta, gamma = gams, deleta = X),
NA
)
expect_true(!is.na(hooval))
fastval <- attr(hooval, 'gradient')
numap <- grad(
function(beta, g, idx, X, gams) {
eta <- X %*% beta
as.numeric(hensmlik(g, idx, eta, gamma = gams))
},
x = beta,
g = g,
idx = idx,
X = X,
gams = gams
)
expect_equal(fastval, numap, tolerance = 0.1)
gams <- c(1.0, 0.9, 0.8)
idx <- order(g, y, decreasing = TRUE) - 1
expect_error(
hooval <- hensmlik(g = g, idx = idx, eta = eta, gamma = gams, deleta = X),
NA
)
expect_true(!is.na(hooval))
fastval <- attr(hooval, 'gradient')
numap <- grad(
function(beta, g, idx, X, gams) {
eta <- X %*% beta
as.numeric(hensmlik(g, idx, eta, gamma = gams))
},
x = beta,
g = g,
idx = idx,
X = X,
gams = gams
)
expect_equal(fastval, numap, tolerance = 0.1)
# now reweight them
wt <- runif(length(g))
wt <- wt / mean(wt) # mean 1 is recommended
expect_error(foores <- hensmlik(g, idx, eta, wt = wt, gamma = gams), NA)
}) #UNFOLD
test_that("smax bits", {
#FOLDUP
# location invariant
nfeat <- 10
set.seed(1001)
eta <- rnorm(nfeat)
keta <- eta + 10
expect_error(mu <- smax(eta), NA)
expect_error(kmu <- smax(keta), NA)
expect_equal(mu, kmu, tolerance = 1e-7)
expect_error(smax(c(1, 0)), NA)
expect_error(mubig <- smax(c(10000, 0, 0, 0)), NA)
expect_equal(mubig, c(1, 0, 0, 0))
nfeat <- 5
set.seed(4234)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
eta <- data.frame(eta = as.numeric(eta), g = g) %>%
group_by(g) %>%
mutate(eta = eta - mean(eta)) %>%
ungroup() %>%
{
.$eta
}
expect_error(mu <- smax(eta, g = g), NA)
expect_error(eta0 <- inv_smax(mu, g = g), NA)
expect_equal(eta, eta0, tolerance = 1e-14)
nfeat <- 8
set.seed(808)
expect_error(mu <- normalize(runif(nfeat)), NA)
expect_error(eta0 <- inv_smax(mu), NA)
expect_error(mu1 <- smax(eta0), NA)
expect_equal(mu, mu1, tolerance = 1e-10)
# fingers crossed.
mu <- c(1, 0, 0, 0)
expect_error(eta0 <- inv_smax(mu), NA)
expect_error(mu1 <- smax(eta0), NA)
expect_equal(mu, mu1, tolerance = 1e-10)
}) #UNFOLD
test_that("rhenery bits", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 5
set.seed(1234)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
eta <- rnorm(10)
mu <- smax(eta)
gamma <- runif(length(eta) - 1, min = 0.5, max = 1.5)
set.seed(5234)
expect_error(y1 <- rhenery(mu, gamma), NA)
# first, is it deterministic?
set.seed(5234)
expect_error(y2 <- rhenery(mu, gamma), NA)
expect_equal(y1, y2)
}) #UNFOLD
test_that("erank bits", {
#FOLDUP
set.seed(321)
for (nfeat in c(1, 2, 3, 4, 8)) {
probs <- runif(nfeat)
probs <- probs / sum(probs)
expect_error(erank(probs), NA)
}
set.seed(2345)
eta <- rnorm(7)
val1 <- rowMeans(replicate(5000, rsm(eta)))
val2 <- erank(smax(eta))
expect_equal(val1, val2, tolerance = 0.05)
}) #UNFOLD
test_that("utils", {
#FOLDUP
set.seed(321)
x <- rnorm(10)
expect_error(normalize(x), NA)
expect_error(smax(x), NA)
}) #UNFOLD
test_that("normalize ok", {
#FOLDUP
set.seed(111)
x <- rnorm(10)
kx <- 4 * x
expect_error(y <- normalize(x), NA)
expect_error(ky <- normalize(kx), NA)
expect_equal(y, ky, tolerance = 1e-8)
}) #UNFOLD
#UNFOLD
context("group invariance") # FOLDUP
test_that("smax", {
#FOLDUP
nfeat <- 8
set.seed(123)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
mu <- smax(eta, g = g)
set.seed(789)
newidx <- sample.int(length(g), length(g))
gi <- g[newidx]
ei <- eta[newidx]
mui <- smax(ei, g = gi)
expect_equal(mu[newidx], mui)
}) #UNFOLD
test_that("harsm_invlink", {
#FOLDUP
nfeat <- 8
set.seed(123)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
expect_error(ernk <- harsm_invlink(eta), NA)
expect_error(ernk <- harsm_invlink(eta, g = g), NA)
expect_error(ernk2 <- harsm_invlink(eta, g = rep(g[1], length(g))), NA)
# get warning
expect_error(mu <- smax(eta, g), NA)
expect_warning(ernk2 <- harsm_invlink(eta = eta, mu = mu, g = g))
set.seed(789)
newidx <- sample.int(length(g), length(g))
gi <- g[newidx]
ei <- eta[newidx]
expect_error(ernki <- harsm_invlink(ei, g = gi), NA)
expect_equal(ernk[newidx], ernki)
}) #UNFOLD
test_that("rsm invariant wrt reordering", {
#FOLDUP
#nfeat <- 8
for (nfeat in c(4, 8)) {
set.seed(123)
for (g in list(rep(1, 20), ceiling(seq(0.1, 100, by = 0.1)))) {
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
set.seed(115)
y <- rsm(eta, g = g)
set.seed(789)
newidx <- sample.int(length(y), length(y))
gi <- g[newidx]
ei <- eta[newidx]
set.seed(115)
yo <- rsm(ei, g = gi)
yi <- y[newidx]
expect_equal(yo, yi)
}
}
}) #UNFOLD
test_that("rhenery not invariant wrt reordering", {
#FOLDUP
set.seed(432)
mu <- normalize(runif(40))
set.seed(1111)
expect_error(r1 <- rhenery(mu), NA)
set.seed(1111)
expect_error(r2 <- rhenery(rev(mu)), NA)
expect_true(!all(r1 == rev(r2)))
}) #UNFOLD
# UNFOLD
context("sm makes sense") # FOLDUP
test_that("sm foo", {
#FOLDUP
nfeat <- 8
set.seed(123)
g <- ceiling(seq(0.1, 1000, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
set.seed(678)
beta <- rnorm(nfeat, sd = 3)
eta <- X %*% beta
y <- rsm(eta, g = g)
# now the consensus
beta0 <- beta
beta0[1] <- 0
beta0[2] <- 0
beta0[3] <- 0
eta0 <- X %*% beta0
expect_error(foo <- harsmfit(y = y, g = g, X = X, eta0 = eta0), NA)
donotuse <- capture.output(expect_error(print(foo), NA))
#.mse <- function(x,y) sum((x-y)^2)
#ppooh <- data.frame(y=foo$y,
#g=foo$g,
#er=foo$erank) %>%
#group_by(g) %>%
#mutate(qo=rank(er),
#qdumb=(n() + 1)/2) %>%
#ungroup() %>%
#summarize(ssre=.mse(qo,y),
#ssto=.mse(qdumb,y))
}) #UNFOLD
# UNFOLD
context("modeling functions") #FOLDUP
test_that("harsmfit bits", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 5
set.seed(1234)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
expect_error(y <- rsm(eta, g = g), NA)
expect_error(mod0 <- harsmfit(y = y, g = g, X = X), NA)
expect_equal(as.numeric(coefficients(mod0)), beta, tolerance = 0.1)
# now the pretty frontend
data <- cbind(data.frame(outcome = y, race = g), as.data.frame(X))
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
expect_error(fitm <- harsm(fmla, group = race, data = data), NA)
expect_equal(
as.numeric(coefficients(mod0)),
as.numeric(coefficients(fitm)),
tolerance = 0.0001
)
donotuse <- capture.output(expect_error(print(fitm), NA))
expect_error(vcov(fitm), NA)
# check broom methods
expect_error(foo1 <- tidy(fitm), NA)
expect_error(foo2 <- glance(fitm), NA)
# group given by name
expect_error(fitm2 <- harsm(fmla, group = 'race', data = data), NA)
expect_equal(
as.numeric(coefficients(fitm2)),
as.numeric(coefficients(fitm)),
tolerance = 0.0001
)
# can deal with a single offset
fmla <- outcome ~ offset(V1) + V2
expect_error(fitm <- harsm(fmla, group = race, data = data), NA)
fmla <- outcome ~ V1 + offset(V2)
expect_error(fitm <- harsm(fmla, group = race, data = data), NA)
# with consensus odds, but there
eta0 <- rowMeans(X)
data <- cbind(
data.frame(outcome = y, race = g, eta0 = eta0),
as.data.frame(X)
)
fmla <- outcome ~ offset(eta0) + V1 + V2 + V3 + V4 + V5
expect_error(fitm <- harsm(fmla, group = race, data = data), NA)
# with consensus odds, but there isn't Z1 and Z2
fmla <- outcome ~ offset(eta0) + Z1 + Z2
expect_error(fitm <- harsm(fmla, group = race, data = data))
# fit with weights
wt <- runif(length(y))
data <- cbind(data.frame(outcome = y, race = g, wt = wt), as.data.frame(X))
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
expect_error(fitm <- harsm(fmla, data, group = race, weights = wt), NA)
# weights given by name
expect_error(fitm2 <- harsm(fmla, data, group = race, weights = 'wt'), NA)
expect_equal(
as.numeric(coefficients(fitm2)),
as.numeric(coefficients(fitm)),
tolerance = 0.0001
)
# this should error: negative weights
data <- cbind(
data.frame(outcome = y, race = g, wt = rep(-1, length(y))),
as.data.frame(X)
)
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
expect_error(fitm <- harsm(fmla, data, group = race, weights = 'wt'))
# check on non-numeric race ids
data <- cbind(data.frame(outcome = y, race = g), as.data.frame(X))
data <- data[data$race <= 26, ]
data$letrace <- letters[data$race]
data$facrace <- factor(data$letrace)
data$intrace <- as.integer(data$race)
expect_error(fitnum <- harsm(outcome ~ V1 + V2, data, group = race), NA)
expect_error(fitlet <- harsm(outcome ~ V1 + V2, data, group = letrace), NA)
expect_error(fitfac <- harsm(outcome ~ V1 + V2, data, group = facrace), NA)
expect_error(fitint <- harsm(outcome ~ V1 + V2, data, group = intrace), NA)
expect_equal(
as.numeric(coefficients(fitnum)),
as.numeric(coefficients(fitlet)),
tolerance = 1e-7
)
expect_equal(
as.numeric(coefficients(fitnum)),
as.numeric(coefficients(fitfac)),
tolerance = 1e-7
)
# warm start
expect_error(fitnum <- harsm(outcome ~ V1 + V2, data, group = race), NA)
expect_error(
fitnum2 <- harsm(outcome ~ V1 + V2, data, group = race, fit0 = fitnum),
NA
)
expect_error(
fitnum3 <- harsm(outcome ~ V2, data, group = race, fit0 = fitnum),
NA
)
expect_error(
fitnum4 <- harsm(outcome ~ V2 + V3, data, group = race, fit0 = fitnum),
NA
)
expect_error(
fitnum5 <- harsm(outcome ~ V3 + V4, data, group = race, fit0 = fitnum),
NA
)
}) #UNFOLD
test_that("harsm vs logistic", {
#FOLDUP
# travis only?
#skip_on_cran()
library(dplyr)
nop <- 5000
set.seed(1234)
adf <- data.frame(eventnum = floor(seq(1, nop + 0.7, by = 0.5))) %>%
mutate(x = rnorm(n())) %>%
mutate(program_num = rep(c(1, 2), nop)) %>%
mutate(intercept = as.numeric(program_num == 1)) %>%
mutate(eta = 1.5 * x + 0.3 * intercept) %>%
mutate(place = ohenery::rsm(eta, g = eventnum))
expect_error(
modo <- harsm(place ~ intercept + x, data = adf, group = eventnum),
NA
)
ddf <- adf %>%
arrange(eventnum, program_num) %>%
group_by(eventnum) %>%
summarize(
resu = as.numeric(first(place) == 1),
delx = first(x) - last(x),
deli = first(intercept) - last(intercept)
) %>%
ungroup()
modg <- glm(resu ~ delx + 1, data = ddf, family = binomial(link = 'logit'))
expect_equal(as.numeric(coef(modo)), as.numeric(coef(modg)), tolerance = 1e-3)
expect_equal(as.numeric(vcov(modo)), as.numeric(vcov(modg)), tolerance = 1e-5)
}) #UNFOLD
test_that("harsmfit prediction", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 2
set.seed(1234)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
y <- rsm(eta, g = g)
# check on non-numeric race ids
data <- cbind(data.frame(outcome = y, race = g), as.data.frame(X))
data <- data[data$race <= 26, ]
data$letrace <- letters[data$race]
data$facrace <- factor(data$letrace)
expect_error(fitnum <- harsm(outcome ~ V1 + V2, data, group = race), NA)
expect_error(fitlet <- harsm(outcome ~ V1 + V2, data, group = letrace), NA)
expect_error(fitfac <- harsm(outcome ~ V1 + V2, data, group = facrace), NA)
expect_error(
fitoff <- harsm(outcome ~ V1 + offset(V2), data, group = race),
NA
)
for (ttype in c('eta', 'mu', 'erank')) {
expect_error(fuh <- predict(fitnum, newdata = data, type = ttype), NA)
expect_error(fuh <- predict(fitlet, newdata = data, type = ttype), NA)
expect_error(
fuh <- predict(fitnum, newdata = data, type = ttype, group = race),
NA
)
expect_error(
fuh <- predict(fitlet, newdata = data, type = ttype, group = letrace),
NA
)
expect_error(
fuh <- predict(fitoff, newdata = data, type = ttype, group = race),
NA
)
}
# given by name
ttype <- 'eta'
expect_error(
fuhbar <- predict(fitnum, newdata = data, type = ttype, group = race),
NA
)
expect_error(
barfuh <- predict(fitnum, newdata = data, type = ttype, group = 'race'),
NA
)
expect_equal(fuhbar, barfuh, tolerance = 1e-7)
# if predict data is missing some columns, should error
baddata <- data[c('race', 'V1')]
ttype <- 'eta'
expect_error(
fuhbar <- predict(fitnum, newdata = baddata, type = ttype, group = race)
)
# deal with na actions
expect_error(
fuh <- as.numeric(predict(
fitlet,
newdata = data,
type = 'eta',
group = letrace,
na.action = na.pass
)),
NA
)
expect_equal(length(fuh), nrow(data))
# outcome should have no affect on the na action
badata <- data
badata$outcome[1] <- NA
expect_error(
fuh <- as.numeric(predict(
fitlet,
newdata = badata,
type = 'eta',
group = letrace,
na.action = na.pass
)),
NA
)
expect_equal(length(fuh), nrow(badata))
expect_error(
fuh <- as.numeric(predict(
fitlet,
newdata = badata,
type = 'eta',
group = letrace,
na.action = na.omit
)),
NA
)
expect_equal(length(fuh), nrow(badata))
# but for V1
badata <- data
badata$V1[1] <- NA
expect_error(
fuh <- as.numeric(predict(
fitlet,
newdata = badata,
type = 'eta',
group = letrace,
na.action = na.pass
)),
NA
)
expect_equal(length(fuh), nrow(badata))
expect_true(all(is.na(fuh[is.na(badata$V1)])))
expect_true(all(!is.na(fuh[!is.na(badata$V1)])))
expect_error(
fuh <- as.numeric(predict(
fitlet,
newdata = badata,
type = 'eta',
group = letrace,
na.action = na.omit
)),
NA
)
expect_equal(length(fuh), sum(!is.na(badata$V1)))
expect_true(all(!is.na(fuh)))
}) #UNFOLD
test_that("harsmfit regularization", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 5
set.seed(1234)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
expect_error(y <- rsm(eta, g = g), NA)
# these should error out for having bad weights or powers or indices, etc.
expect_error(
ignore <- harsmfit(
y = y,
g = g,
X = X,
reg_wt = rep(-1, nfeat),
reg_power = 1,
reg_zero = 0,
reg_coef_idx = 1:nfeat
)
)
expect_error(
ignore <- harsmfit(
y = y,
g = g,
X = X,
reg_wt = rep(1, nfeat),
reg_power = 0,
reg_zero = 0,
reg_coef_idx = 1:nfeat
)
)
expect_error(
ignore <- harsmfit(
y = y,
g = g,
X = X,
reg_wt = rep(1, nfeat),
reg_power = 1,
reg_zero = 0,
reg_coef_idx = (1:nfeat) + 2
)
)
expect_error(
ignore <- harsmfit(
y = y,
g = g,
X = X,
reg_wt = rep(1, nfeat),
reg_power = 1,
reg_zero = 0,
reg_coef_idx = (1:nfeat) - 2
)
)
# smoke test.
expect_error(mod0 <- harsmfit(y = y, g = g, X = X), NA)
expect_error(
mod0r1 <- harsmfit(
y = y,
g = g,
X = X,
reg_wt = rep(2, nfeat),
reg_power = 1,
reg_zero = 0,
reg_coef_idx = 1:nfeat
),
NA
)
expect_error(
mod0r2 <- harsmfit(
y = y,
g = g,
X = X,
reg_wt = rep(2, nfeat),
reg_power = 2,
reg_zero = 0,
reg_coef_idx = 1:nfeat
),
NA
)
# expect smaller coefficients for regularization terms.
expect_true(norm(coefficients(mod0), '2') > norm(coefficients(mod0r2), '2'))
# the zeroes should default to zero.
expect_error(
mod0r2_alt <- harsmfit(
y = y,
g = g,
X = X,
reg_wt = rep(2, nfeat),
reg_power = 2,
reg_zero = NULL,
reg_coef_idx = 1:nfeat
),
NA
)
expect_equal(
as.numeric(coefficients(mod0r2)),
as.numeric(coefficients(mod0r2_alt)),
tolerance = 0.0001
)
# can we drive a coefficient to zero via l1 regression?
expect_error(
mod0r1_harsh <- harsmfit(
y = y,
g = g,
X = X,
reg_wt = rep(200, nfeat),
reg_power = 1,
reg_zero = 0,
reg_coef_idx = 1:nfeat
),
NA
)
expect_true(min(abs(coefficients(mod0r1_harsh))) < 1e-7)
# now the pretty frontend
data <- cbind(data.frame(outcome = y, race = g), as.data.frame(X))
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
expect_error(fitm <- harsm(fmla, group = race, data = data), NA)
expect_error(
fitmr1 <- harsm(
fmla,
group = race,
data = data,
reg_wt = rep(2, nfeat),
reg_power = 1,
reg_zero = 0,
reg_coef_idx = 1:nfeat
),
NA
)
expect_error(
fitmr2 <- harsm(
fmla,
group = race,
data = data,
reg_wt = rep(2, nfeat),
reg_power = 2,
reg_zero = 0,
reg_coef_idx = 1:nfeat
),
NA
)
expect_equal(
as.numeric(coefficients(mod0r1)),
as.numeric(coefficients(fitmr1)),
tolerance = 0.0001
)
expect_equal(
as.numeric(coefficients(mod0r2)),
as.numeric(coefficients(fitmr2)),
tolerance = 0.0001
)
skip_on_cran()
# test the standardization
inflate <- 5
fiveX <- inflate * X
expect_error(mod0 <- harsmfit(y = y, g = g, X = X), NA)
expect_error(mod5 <- harsmfit(y = y, g = g, X = fiveX), NA)
expect_equal(
as.numeric(inflate * coefficients(mod5)),
as.numeric(coefficients(mod0)),
tolerance = 0.002
)
WW <- 100
expect_error(
mod0_r1 <- harsmfit(
y = y,
g = g,
X = X,
reg_wt = WW,
reg_power = 2,
reg_coef_idx = 1:nfeat,
reg_standardize = FALSE
),
NA
)
expect_error(
mod0_r1_t <- harsmfit(
y = y,
g = g,
X = X,
reg_wt = WW,
reg_power = 2,
reg_coef_idx = 1:nfeat,
reg_standardize = TRUE
),
NA
)
expect_error(
mod5_r1 <- harsmfit(
y = y,
g = g,
X = fiveX,
reg_wt = WW,
reg_power = 2,
reg_coef_idx = 1:nfeat,
reg_standardize = FALSE
),
NA
)
expect_error(
mod5_r1_t <- harsmfit(
y = y,
g = g,
X = fiveX,
reg_wt = WW,
reg_power = 2,
reg_coef_idx = 1:nfeat,
reg_standardize = TRUE
),
NA
)
expect_equal(
as.numeric(coefficients(mod0_r1_t)),
as.numeric(inflate * coefficients(mod5_r1_t)),
tolerance = 0.002
)
}) #UNFOLD
test_that("hensm bits", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 5
set.seed(1234)
g <- 1 + ((1:1000) %% 100)
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
expect_error(y <- rsm(eta, g = g), NA)
expect_error(mod0 <- hensmfit(y = y, g = g, X = X), NA)
# now the pretty frontend
data <- cbind(data.frame(outcome = y, race = g), as.data.frame(X))
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
# runs?
expect_error(fitm <- hensm(fmla, data, group = race), NA)
expect_equal(
as.numeric(coefficients(mod0)),
as.numeric(coefficients(fitm)),
tolerance = 0.0001
)
# check for multiple gamma
for (ngam in c(2, 5)) {
expect_error(tmp0 <- hensmfit(y = y, g = g, X = X, ngamma = ngam), NA)
expect_error(tmp1 <- hensm(fmla, data, group = race, ngamma = ngam), NA)
expect_equal(
as.numeric(coefficients(tmp0)),
as.numeric(coefficients(tmp1)),
tolerance = 0.0001
)
}
# now the pretty frontend
data <- cbind(data.frame(outcome = y, race = g), as.data.frame(X))
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
# runs?
expect_error(fitm <- hensm(fmla, data, group = race), NA)
expect_equal(
as.numeric(coefficients(mod0)),
as.numeric(coefficients(fitm)),
tolerance = 0.0001
)
donotuse <- capture.output(expect_error(print(fitm), NA))
expect_error(vcov(fitm), NA)
# deterministic?
expect_error(fitm2 <- hensm(fmla, data, group = race), NA)
# fails
expect_equal(
as.numeric(coefficients(fitm2)),
as.numeric(coefficients(fitm)),
tolerance = 1e-7
)
# check broom methods
expect_error(foo1 <- tidy(fitm), NA)
expect_error(foo2 <- glance(fitm), NA)
# group given by name
expect_error(fitm3 <- hensm(fmla, data, group = 'race'), NA)
expect_equal(fitm$coefficients, fitm3$coefficients)
# can deal with a single offset
fmla <- outcome ~ offset(V1) + V2
expect_error(fitm <- hensm(fmla, data, group = race), NA)
fmla <- outcome ~ V1 + offset(V2)
expect_error(fitm <- hensm(fmla, data, group = race), NA)
# with consensus odds, but there
eta0 <- rowMeans(X)
data <- cbind(
data.frame(outcome = y, race = g, eta0 = eta0),
as.data.frame(X)
)
fmla <- outcome ~ offset(eta0) + V1 + V2 + V3 + V4 + V5
expect_error(fitm <- hensm(fmla, data, group = race), NA)
# with consensus odds, but there isn't Z1 and Z2
fmla <- outcome ~ offset(eta0) + Z1 + Z2
expect_error(fitm <- hensm(fmla, data, group = race))
# fit with weights
wt <- runif(length(y))
data <- cbind(data.frame(outcome = y, race = g, wt = wt), as.data.frame(X))
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
expect_error(fitm <- hensm(fmla, data, group = race, weights = wt), NA)
# weights given by name
expect_error(fitm2 <- hensm(fmla, data, group = race, weights = 'wt'), NA)
expect_equal(
as.numeric(coefficients(fitm2)),
as.numeric(coefficients(fitm)),
tolerance = 1e-7
)
# this should error: negative weights
data <- cbind(
data.frame(outcome = y, race = g, wt = rep(-1, length(y))),
as.data.frame(X)
)
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
expect_error(fitm <- hensm(fmla, data, group = race, weights = wt))
# check on non-numeric race ids
data <- cbind(data.frame(outcome = y, race = g), as.data.frame(X))
data <- data[data$race <= 26, ]
data$letrace <- letters[data$race]
data$facrace <- factor(data$letrace)
data$intrace <- as.integer(data$race)
expect_error(fitnum <- hensm(outcome ~ V1 + V2, data, group = race), NA)
expect_error(fitlet <- hensm(outcome ~ V1 + V2, data, group = letrace), NA)
expect_error(fitfac <- hensm(outcome ~ V1 + V2, data, group = facrace), NA)
expect_error(fitint <- hensm(outcome ~ V1 + V2, data, group = intrace), NA)
# if predict data is missing some columns, should error
baddata <- data[c('race', 'V1')]
ttype <- 'eta'
expect_error(
fuhbar <- predict(fitnum, newdata = baddata, type = ttype, group = race)
)
expect_equal(
as.numeric(coefficients(fitnum)),
as.numeric(coefficients(fitlet)),
tolerance = 1e-7
)
expect_equal(
as.numeric(coefficients(fitnum)),
as.numeric(coefficients(fitfac)),
tolerance = 1e-7
)
# warm start!
expect_error(fitnum <- hensm(outcome ~ V1 + V2, data, group = race), NA)
expect_error(
fitnum2 <- hensm(outcome ~ V1 + V2, data, group = race, fit0 = fitnum),
NA
)
# why are they not closer?
expect_equal(
as.numeric(coefficients(fitnum)),
as.numeric(coefficients(fitnum2)),
tolerance = 1e-3
)
expect_error(
fitnum3 <- hensm(
outcome ~ V1 + V2,
data,
group = race,
fit0 = fitnum,
ngamma = 3
),
NA
)
expect_error(
fitnum4 <- hensm(
outcome ~ V1 + V2,
data,
group = race,
fit0 = fitnum,
ngamma = 4
),
NA
)
expect_error(
fitnum2b <- hensm(outcome ~ V1, data, group = race, fit0 = fitnum),
NA
)
expect_error(
fitnum2c <- hensm(outcome ~ V1 + V3, data, group = race, fit0 = fitnum),
NA
)
# warm start from harsm object.
expect_error(har_fitnum <- harsm(outcome ~ V1 + V2, data, group = race), NA)
expect_error(
fitnum2 <- hensm(outcome ~ V1 + V2, data, group = race, fit0 = har_fitnum),
NA
)
#for (ttype in c('eta','mu','erank')) {
#expect_error(fuh <- predict(fitnum,newdata=data,type=ttype),NA)
#expect_error(fuh <- predict(fitlet,newdata=data,type=ttype),NA)
#expect_error(fuh <- predict(fitnum,newdata=data,type=ttype,group=race),NA)
#expect_error(fuh <- predict(fitlet,newdata=data,type=ttype,group=letrace),NA)
#}
}) #UNFOLD
test_that("hensm consistency", {
#FOLDUP
# travis only?
skip_on_cran()
nfeat <- 2
set.seed(1234)
g <- 1 + ((1:120000) %% 20000)
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
mu <- smax(eta, g = g)
gammas <- c(0.9, 0.8, 0.7, rep(1, 6))
# now the pretty frontend
data <- cbind(data.frame(mu = mu, race = g), as.data.frame(X)) %>%
group_by(race) %>%
mutate(outcome = rhenery(mu, gammas[1:(n() - 1)])) %>%
ungroup()
fmla <- outcome ~ V1 + V2
expect_error(fitm <- hensm(fmla, data, group = race, ngamma = 5), NA)
# close enough
expect_equal(fitm$gammas[1:3], gammas[1:3], tolerance = 0.03)
expect_equal(as.numeric(fitm$beta), beta, tolerance = 0.03)
}) #UNFOLD
test_that("hensmfit regularization", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 5
ngam <- 3
set.seed(1234)
g <- 1 + ((1:1000) %% 100)
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
expect_error(y <- rsm(eta, g = g), NA)
# now the pretty frontend
data <- cbind(data.frame(outcome = y, race = g), as.data.frame(X))
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
ncoef <- nfeat + ngam - 1
# these should error out for having bad weights or powers or indices, etc.
expect_error(
ignore <- hensm(
fmla,
data,
group = race,
reg_wt = rep(-1, ncoef),
reg_power = 1,
reg_zero = 0,
reg_coef_idx = 1:ncoef
)
)
expect_error(
ignore <- hensm(
fmla,
data,
group = race,
reg_wt = rep(1, ncoef),
reg_power = 0,
reg_zero = 0,
reg_coef_idx = 1:ncoef
)
)
expect_error(
ignore <- hensm(
fmla,
data,
group = race,
reg_wt = rep(1, ncoef),
reg_power = 1,
reg_zero = 0,
reg_coef_idx = (1:ncoef) + 2
)
)
expect_error(
ignore <- hensm(
fmla,
data,
group = race,
reg_wt = rep(1, ncoef),
reg_power = 1,
reg_zero = 0,
reg_coef_idx = (1:ncoef) - 2
)
)
# smoke test.
expect_error(mod0 <- hensm(fmla, data, group = race, ngamma = ngam), NA)
expect_error(
mod0r1 <- hensm(
fmla,
data,
group = race,
ngamma = ngam,
reg_wt = rep(2, ncoef),
reg_power = 1,
reg_zero = 0,
reg_coef_idx = 1:(ncoef)
),
NA
)
expect_error(
mod0r2 <- hensm(
fmla,
data,
group = race,
ngamma = ngam,
reg_wt = rep(50, ncoef),
reg_power = 2,
reg_zero = 0,
reg_coef_idx = 1:(ncoef)
),
NA
)
# expect smaller coefficients for regularization terms.
expect_true(norm(coefficients(mod0), '2') > norm(coefficients(mod0r2), '2'))
# the zeroes should default to zero or one as appropriate
expect_error(
mod0r2_a <- hensm(
fmla,
data,
group = race,
ngamma = ngam,
reg_wt = rep(2, ncoef),
reg_power = 2,
reg_zero = c(rep(0, nfeat), rep(1, ngam - 1)),
reg_coef_idx = 1:ncoef
),
NA
)
expect_error(
mod0r2_b <- hensm(
fmla,
data,
group = race,
ngamma = ngam,
reg_wt = rep(2, ncoef),
reg_power = 2,
reg_zero = NULL,
reg_coef_idx = 1:ncoef
),
NA
)
expect_equal(
as.numeric(coefficients(mod0r2_a)),
as.numeric(coefficients(mod0r2_b)),
tolerance = 0.0001
)
skip_on_cran()
# test the standardization
inflate <- 5
five_data <- cbind(
data.frame(outcome = y, race = g),
as.data.frame(inflate * X)
)
expect_error(mod0 <- hensm(fmla, data, group = race, ngamma = ngam), NA)
expect_error(mod5 <- hensm(fmla, five_data, group = race, ngamma = ngam), NA)
expect_equal(
as.numeric(inflate * coefficients(mod5)),
as.numeric(coefficients(mod0)),
tolerance = 0.002
)
WW <- 100
expect_error(
mod0_r1 <- hensm(
fmla,
data,
group = race,
ngamma = ngam,
reg_wt = WW,
reg_power = 2,
reg_zero = 0,
reg_coef_idx = 1:(ncoef),
reg_standardize = FALSE
),
NA
)
expect_error(
mod0_r1_t <- hensm(
fmla,
data,
group = race,
ngamma = ngam,
reg_wt = WW,
reg_power = 2,
reg_zero = 0,
reg_coef_idx = 1:(ncoef),
reg_standardize = TRUE
),
NA
)
expect_error(
mod5_r1 <- hensm(
fmla,
five_data,
group = race,
ngamma = ngam,
reg_wt = WW,
reg_power = 2,
reg_zero = 0,
reg_coef_idx = 1:(ncoef),
reg_standardize = FALSE
),
NA
)
expect_error(
mod5_r1_t <- hensm(
fmla,
five_data,
group = race,
ngamma = ngam,
reg_wt = WW,
reg_power = 2,
reg_zero = 0,
reg_coef_idx = 1:(ncoef),
reg_standardize = TRUE
),
NA
)
expect_equal(
as.numeric(coefficients(mod0_r1_t)),
as.numeric(inflate * coefficients(mod5_r1_t)),
tolerance = 0.002
)
}) #UNFOLD
test_that("predictions with factors", {
#FOLDUP
# travis only?
#skip_on_cran()
nfeat <- 2
set.seed(1234)
g <- ceiling(seq(0.1, 30, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
y <- rsm(eta, g = g)
# create a variable V3, which is a factor variable.
# in the training data is has values 'a':'q'; we build a model.
# in the test data we try data with values 'a':'m' and only those levels.
# and also test data with values 'a':'m' but levels 'a':'z'.
# In each case we should be able to call predict on the model.
data <- cbind(data.frame(outcome = y, race = g), as.data.frame(X))
data_AQ <- data
data_AQ$V3 <- factor(sample(letters[1:17], nrow(data_AQ), replace = TRUE))
data_AM <- data
data_AM$V3 <- factor(sample(letters[1:13], nrow(data_AM), replace = TRUE))
data_AMZ <- data_AM
data_AMZ$V3 <- factor(as.character(data_AMZ$V3), levels = letters)
expect_error(fitmod <- harsm(outcome ~ V1 + V3, data_AQ, group = race), NA)
expect_error(henmod <- hensm(outcome ~ V1 + V3, data_AQ, group = race), NA)
for (ttype in c('eta', 'mu', 'erank')) {
expect_error(fuh <- predict(fitmod, newdata = data_AQ, type = ttype), NA)
expect_error(
fuh <- predict(fitmod, newdata = data_AQ, type = ttype, group = race),
NA
)
expect_error(fuh <- predict(fitmod, newdata = data_AM, type = ttype), NA)
expect_error(
fuh <- predict(fitmod, newdata = data_AM, type = ttype, group = race),
NA
)
expect_error(fuh <- predict(fitmod, newdata = data_AMZ, type = ttype), NA)
expect_error(
fuh <- predict(fitmod, newdata = data_AMZ, type = ttype, group = race),
NA
)
expect_error(fuh <- predict(henmod, newdata = data_AQ, type = ttype), NA)
expect_error(
fuh <- predict(henmod, newdata = data_AQ, type = ttype, group = race),
NA
)
expect_error(fuh <- predict(henmod, newdata = data_AM, type = ttype), NA)
expect_error(
fuh <- predict(henmod, newdata = data_AM, type = ttype, group = race),
NA
)
expect_error(fuh <- predict(henmod, newdata = data_AMZ, type = ttype), NA)
expect_error(
fuh <- predict(henmod, newdata = data_AMZ, type = ttype, group = race),
NA
)
}
}) #UNFOLD
#UNFOLD
context("weighting") #FOLDUP
test_that("harsmfit zero weights", {
#FOLDUP
# confirm that zero weights are equivalent to removing the data altogether
# travis only?
#skip_on_cran()
nfeat <- 5
set.seed(1234)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
expect_error(y <- rsm(eta, g = g), NA)
# usually we use weights for the first k outcomes.
wt <- ifelse(y <= 3, 1, 0)
data <- cbind(data.frame(outcome = y, race = g, wt = wt), as.data.frame(X))
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
expect_error(fitm <- harsm(fmla, data, group = race, weights = wt), NA)
# now say we want to ignore some of the races
ignore <- g <= 10
data <- cbind(
data.frame(
outcome = y,
race = g,
pre_wt = wt,
wt = as.numeric(!ignore) * wt
),
as.data.frame(X)
)
# fit twice
expect_error(fitm1 <- harsm(fmla, data, group = race, weights = wt), NA)
subdata <- data[!ignore, ]
expect_error(
fitm2 <- harsm(fmla, subdata, group = race, weights = pre_wt),
NA
)
expect_equal(
as.numeric(coefficients(fitm2)),
as.numeric(coefficients(fitm1)),
tolerance = 0.0001
)
}) #UNFOLD
test_that("hensmfit zero weights", {
#FOLDUP
# confirm that zero weights are equivalent to removing the data altogether
# travis only?
#skip_on_cran()
nfeat <- 5
set.seed(1234)
g <- ceiling(seq(0.1, 100, by = 0.1))
X <- matrix(rnorm(length(g) * nfeat), ncol = nfeat)
beta <- rnorm(nfeat)
eta <- X %*% beta
expect_error(y <- rsm(eta, g = g), NA)
# usually we use weights for the first k outcomes.
wt <- ifelse(y <= 3, 1, 0)
data <- cbind(data.frame(outcome = y, race = g, wt = wt), as.data.frame(X))
fmla <- outcome ~ V1 + V2 + V3 + V4 + V5
expect_error(
fitm <- hensm(fmla, data, group = race, weights = wt, ngamma = 2),
NA
)
# now say we want to ignore some of the races
ignore <- g <= 10
data <- cbind(
data.frame(
outcome = y,
race = g,
pre_wt = wt,
wt = as.numeric(!ignore) * wt
),
as.data.frame(X)
)
# fit twice
expect_error(
fitm1 <- hensm(fmla, data, group = race, weights = wt, ngamma = 2),
NA
)
subdata <- data[!ignore, ]
expect_error(
fitm2 <- hensm(fmla, subdata, group = race, weights = pre_wt, ngamma = 2),
NA
)
expect_equal(
as.numeric(coefficients(fitm2)),
as.numeric(coefficients(fitm1)),
tolerance = 0.0001
)
}) #UNFOLD
#UNFOLD
#for vim modeline: (do not edit)
# vim:ts=2:sw=2:tw=79:fdm=marker:fmr=FOLDUP,UNFOLD:cms=#%s:syn=r:ft=r:ai:si:cin:nu:fo=croql:cino=p0t0c5(0: