library("model4you")
library("survival")

### survreg
set.seed(1)
data(GBSG2, package = "TH.data")

## base model
bmod <- survreg(Surv(time, cens) ~ horTh, data = GBSG2, model = TRUE)
survreg_plot(bmod)
grid.newpage()
coeftable.survreg(bmod)


## partitioned model
tr <- pmtree(bmod)
plot(tr, terminal_panel = node_pmterminal(tr, plotfun = survreg_plot,
                                          confint = TRUE))
summary(tr)
summary(tr, node = 1:2)

coef(tr)
coef(tr, node = 1)
coef(bmod)

logLik(bmod)
logLik(tr)

## alternative table in plot
plot(tr, terminal_panel = node_pmterminal(tr, plotfun = survreg_plot,
  confint = TRUE, coeftable = coeftable.survreg))


### glm binomial
set.seed(2)
n <- 1000
trt <- factor(rep(1:2, each = n/2))
age <- sample(40:60, size = n, replace = TRUE)
eff <- -1 + I(trt == 2) + 1 * I(trt == 2) * I(age > 50)
expit <- function(x) 1/(1 + exp(-x))

success <- rbinom(n = n, size = 1, prob = expit(eff))

dat <- data.frame(success, trt, age)
library("plyr")
dattab <- ddply(.data = dat, .variables = .(trt, age),
                function(x) data.frame(nsuccess = sum(x$success),
                                       nfailure = NROW(x) - sum(x$success)))

bmod1 <- glm(success ~ trt, family = binomial)
bmod2 <- glm(success ~ trt, family = "binomial")
bmod3 <- glm(success ~ trt, data = dat, family = binomial)
bmod4 <- glm(cbind(nsuccess, nfailure) ~ trt, data = dattab, family = binomial)

(tr1 <- pmtree(bmod1, data = dat))
(tr2 <- pmtree(bmod2, data = dat))
(tr3 <- pmtree(bmod3))
(tr4 <- pmtree(bmod4))

(mtr1 <- glmtree(success ~ trt | age, data = dat, family = binomial))
# (mtr2 <- glmtree(cbind(nsuccess, nfailure) ~ trt | age, data = dattab, family = binomial))

library("strucchange")
sctest(tr3)
sctest(tr4)

## check logLik
logLik(mtr1)
logLik(tr1)

sum(objfun(tr1, newdata = dat))
objfun(tr1, newdata = dat, sum = TRUE)
sum(objfun(tr1))
objfun(tr1, sum = TRUE)


## variable importance
logLik(tr1)
logLik(tr1, perm = "age")
a1 <- predict.party(tr1, perm = "age", type = "node")
a2 <- predict(tr1, perm = "age", type = "node")
a3 <- predict(tr1, perm = 3, type = "node")
b <- predict.party(tr1, type = "node")
varimp(tr1, nperm = 5)


library("ggplot2")
ofs <- data.frame(objfun_bmod1 = objfun(bmod1),
                  objfun_tr1 = objfun(tr1))
ggplot(ofs, aes(objfun_bmod1, objfun_tr1)) + geom_jitter(alpha = 0.3)


### linear model and missings
data("MathExam14W", package = "psychotools")

## scale points achieved to [0, 100] percent
MathExam14W$tests <- 100 * MathExam14W$tests/26
MathExam14W$pcorrect <- 100 * MathExam14W$nsolved/13

## select variables to be used
MathExam <- MathExam14W[ , c("pcorrect", "group", "tests", "study",
                             "attempt", "semester", "gender")]

## compute base model
bmod_math <- lm(pcorrect ~ group, data = MathExam)

## compute tree
(tr_math <- pmtree(bmod_math, control = ctree_control(maxdepth = 2)))

## create data with NAs
Math_mx <- Math_mz <- MathExam
Math_mx$group[1:2] <- NA
Math_mz$tests[1:20] <- NA

bmod_math_mx <- lm(pcorrect ~ group, data = Math_mx)

(tr_math_mx1 <- pmtree(bmod_math, control = ctree_control(maxdepth = 2), data = Math_mx))
(tr_math_mx2 <- pmtree(bmod_math_mx, control = ctree_control(maxdepth = 2)))
(tr_math_mz <- pmtree(bmod_math, control = ctree_control(maxdepth = 2), data = Math_mz))


## check logLik
(tr_math_mob <- lmtree(pcorrect ~  group | ., data = MathExam, maxdepth = 2))

logLik(bmod_math)
logLik(tr_math)
logLik(tr_math_mob)

sum(bmod_math$residuals^2)

## varimp
of <- function(x, newdata = NULL, weights = NULL,
               perm = NULL, ...) {
  - objfun(x, newdata = newdata, weights = weights, perm = perm, sum = TRUE, ...)
}
varimp(tr_math, nperm = 2, risk = of)

# deeper tree
w <- rep(1, nrow(Math_mx))
w[5:100] <- 0
tr_math_d <- pmtree(bmod_math, data = Math_mx, weights = w,
                    control = ctree_control(alpha = 0.7))
varimp(tr_math_d, risk = of)


### check different formulas
set.seed(1212)
n <- 90
d1 <- d2 <- d3 <- data.frame(y = abs(rnorm(n) + 5), x = rep(1:(n/15), each = 15), #1:n - 10,
  trt = rep(1:3, each = n/3), z1 = rnorm(n))
d2$trt <- factor(d2$trt)
d3$trt <- ordered(d3$trt)

f <- list(
  y ~ 1,
  y ~ x,
  y ~ trt,
  y ~ trt + x,
  y ~ trt + offset(x),
  y ~ trt + x + offset(x),
  y ~ trt + offset(as.numeric(trt)),
  y ~ factor(trt),
  y ~ factor(trt) + offset(x),
  y ~ factor(x > as.numeric(trt)),
  # y ~ interaction(x, trt),
  y ~ 0 + trt
)

try_pmtree <- function(bmod, data) {
  "pmtree" %in% class(tryCatch(pmtree(bmod, data = data), 
                               error = function(e) e, 
                               warning = function(w) w))
  # "pmtree" %in% class(try(pmtree(bmod, data = data), silent = TRUE))
}

run_lm <- function(formula, data, ...) {
  eval(substitute(lm(fm, data = data, ...), list(fm = formula)))
}

run_survreg <- function(formula, data, ...) {
  eval(substitute(survreg(fm, data = data, ...), list(fm = formula)))
}

run_coxph <- function(formula, data, ...) {
  eval(substitute(coxph(fm, data = data, ...), list(fm = formula)))
}

## expected results
ok1 <- list(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE)
ok2 <- list(FALSE, FALSE, TRUE,  FALSE, TRUE,  FALSE, TRUE,  TRUE, TRUE, TRUE, TRUE)
ok3 <- ok2


## checks with lm
lm1 <- lapply(f, run_lm, data = d1, model = FALSE)
identical(lapply(lm1, try_pmtree, data = d1), ok1)

lm2 <- lapply(f, run_lm, data = d2, model = FALSE)
identical(lapply(lm2, try_pmtree, data = d2), ok2)

lm3 <- lapply(f, run_lm, data = d3, model = FALSE)
identical(lapply(lm3, try_pmtree, data = d3), ok3)

## checks with survreg
library("survival")
d1$y <- d2$y <- d3$y <- Surv(d1$y + 0.5)

survreg1 <- lapply(f, run_survreg, data = d1, model = FALSE)
identical(lapply(survreg1, try_pmtree, data = d1), ok1)

survreg2 <- lapply(f, run_survreg, data = d2, model = FALSE)
identical(lapply(survreg2, try_pmtree, data = d2), ok2)

survreg3 <- lapply(f, run_survreg, data = d3, model = FALSE)
identical(lapply(survreg3, try_pmtree, data = d3), ok3)


## checks with coxph
coxph1 <- lapply(f, run_coxph, data = d1, model = FALSE)
identical(lapply(coxph1, try_pmtree, data = d1), ok1)

coxph2 <- lapply(f, run_coxph, data = d2, model = FALSE)
identical(lapply(coxph2, try_pmtree, data = d2), ok2)

coxph3 <- lapply(f, run_coxph, data = d3, model = FALSE)
identical(lapply(coxph3, try_pmtree, data = d3), ok3)