# Testing consistency
## predict methods
test_that("Testing consistency of predict methods", {
  library(BranchGLM)
  set.seed(8621)
  x <- sapply(rep(0, 10), rnorm, n = 1000, simplify = TRUE)
  x <- cbind(1, x)
  x <- rbind(x, matrix(NA, nrow = 100, ncol = 11))
  beta <- rnorm(11, sd = 0.1)
  y <- exp(x %*% beta)
  Data <- cbind(y, x[,-1]) |>
    as.data.frame()
  colnames(Data)[1] <- "y"
  Fit <- BranchGLM(y ~ ., data = Data, family = "gamma", link = "log", 
                   offset = rep(0.01, 1100))
  VS <- VariableSelection(Fit)
  
  ### getting predictions with Fit
  preds <- predict(Fit)
  predsNew <- predict(Fit, newdata = Data, offset = rep(0.01, 1100))
  linpreds <- predict(Fit, type = "linpreds")
  linpredsNew <- predict(Fit, newdata = Data, type = "linpreds", offset = rep(0.01, 1100))
  predsNA <- predict(Fit, na.action = na.omit)
  predsNewNA <- predict(Fit, newdata = Data, offset = rep(0.01, 1100), na.action = na.omit)
  
  #### Testing for rownames and equality
  expect_equal(names(preds), rownames(Data))
  expect_equal(names(linpreds), rownames(Data))
  expect_equal(preds, predsNew)
  expect_equal(linpreds, linpredsNew)
  expect_equal(predsNA, predsNewNA)
  expect_equal(length(predsNA), 1000)
  
  ### getting predictions with VS
  preds <- predict(VS)
  predsNew <- predict(VS, newdata = Data, offset = rep(0.01, 1100))
  linpreds <- predict(VS, type = "linpreds")
  linpredsNew <- predict(VS, newdata = Data, type = "linpreds", offset = rep(0.01, 1100))
  predsNA <- predict(VS, na.action = na.omit)
  predsNewNA <- predict(VS, newdata = Data, offset = rep(0.01, 1100), na.action = na.omit)
  
  #### Testing for rownames and equality
  expect_equal(names(preds), rownames(Data))
  expect_equal(names(linpreds), rownames(Data))
  expect_equal(preds, predsNew)
  expect_equal(linpreds, linpredsNew)
  expect_equal(predsNA, predsNewNA)
  expect_equal(length(predsNA), 1000)
  
})

## Testing BranchGLM accessor functions
test_that("BranchGLM accessor functions", {
  library(BranchGLM)
  set.seed(8621)
  x <- sapply(rep(0, 5), rnorm, n = 1000, simplify = TRUE)
  x <- cbind(1, x)
  x <- rbind(x, matrix(NA, nrow = 100, ncol = 6))
  beta <- rnorm(6)
  y <- c(rgamma(n = 1000, shape = 1 / 2, scale = exp(x %*% beta) * 2), 
         rep(NA, 100))
  
  Data <- cbind(y, x[,-1]) |>
    as.data.frame()
  
  ## Fitting model while keeping data and y
  Fit <- BranchGLM(y ~ ., data = Data, family = gaussian())
  
  ### Testing nobs and formula
  expect_equal(formula(Fit), formula(y ~ .))
  expect_equal(nobs(Fit), 1000)
  expect_equal(Fit$missing, 100)
  expect_equal(nobs(Fit), attr(logLik(Fit), "nobs"))
  expect_equal(Fit$logLik, as.numeric(logLik(Fit)))
  expect_equal(length(coef(Fit)), 6)
  
  ### Performing variable selection
  BB <- VariableSelection(Fit)
  BB5 <- VariableSelection(Fit, bestmodels = 5)
  forward <- VariableSelection(Fit, type = "forward")
  
  ### Checking bestmodels
  expect_equal(is.matrix(forward$bestmodels), TRUE)
  expect_equal(is.matrix(BB$bestmodels), TRUE)
  expect_equal(nrow(forward$bestmodels), nrow(BB$bestmodels))
  expect_equal(is.matrix(BB5$bestmodels), TRUE)
  
  ### Checking beta
  expect_equal(is.matrix(forward$beta), TRUE)
  expect_equal(is.matrix(BB$beta), TRUE)
  expect_equal(nrow(forward$beta), nrow(BB$beta))
  expect_equal(is.matrix(BB5$beta), TRUE)
  
  ### Testing coef
  expect_equal(dim(coef(BB)), dim(coef(BB)))
  expect_equal(dim(coef(BB5)), dim(coef(forward)))
  expect_equal(dim(coef(BB5, which = "ALL")), c(6, 5))
  
  ## Fitting model without keeping data and y
  Fit <- BranchGLM(y ~ ., data = Data, family = "Gamma", link = "log", 
                   keepData = FALSE, keepY = FALSE)
  
  ### Testing functions
  expect_equal(formula(Fit), formula(y ~ .))
  expect_equal(nobs(Fit), 1000)
  expect_equal(nobs(Fit), attr(logLik(Fit), "nobs"))
  expect_equal(Fit$logLik, as.numeric(logLik(Fit)))
  expect_equal(length(coef(Fit)), 6)
  
})

# Testing predict function
## Gaussian and Gamma regression
test_that("Continuous regression tests", {
  library(BranchGLM)
  set.seed(8621)
  x <- sapply(rep(1, 10), rgamma, n = 1000, simplify = TRUE)
  x <- cbind(1, x)
  beta <- rgamma(11, 1)
  y <- rgamma(n = 1000, shape = 1, scale = x %*% beta)
  Data <- cbind(y, x[,-1]) |>
    as.data.frame()
  yInv <- rgamma(n = 1000, shape = 1, scale = 1 / (x %*% beta))
  DataInv <- cbind(y = yInv, x[,-1]) |>
    as.data.frame()
  
  ### Fitting models with BranchGLM
  #### Gaussian
  FitGausIden <- BranchGLM(y ~ ., data = Data, family = "gaussian", link = "identity")
  FitGausLog <- BranchGLM(y ~ ., data = Data, family = "gaussian", link = "log")
  FitGausInv <- BranchGLM(y ~ ., data = DataInv, family = "gaussian", link = "inverse")
  
  #### Gamma
  FitGamIden <- BranchGLM(y ~ ., data = Data, family = "gamma", link = "identity")
  FitGamLog <- BranchGLM(y ~ ., data = Data, family = "gamma", link = "log")
  FitGamInv <- BranchGLM(y ~ ., data = DataInv, family = "gamma", link = "inverse")
  
  ### Fitting models with glm
  #### Gaussian
  GLMGausIden <- glm(y ~ ., data = Data, family = gaussian(link = "identity"))
  GLMGausLog <- glm(y ~ ., data = Data, family = gaussian(link = "log"))
  GLMGausInv <- glm(y ~ ., data = DataInv, family = gaussian(link = "inverse"))
  
  #### Gamma
  ##### adding start since this didn't converge
  GLMGamIden <- glm(y ~ ., data = Data, family = Gamma(link = "identity"), 
                    start = beta)
  GLMGamLog <- glm(y ~ ., data = Data, family = Gamma(link = "log"))
  GLMGamInv <- glm(y ~ ., data = DataInv, family = Gamma(link = "inverse"))
  
  ### Checking results
  #### model.frame, and nobs
  expect_equal(model.frame(GLMGausIden), model.frame(FitGausIden))
  expect_equal(nobs(GLMGausIden), nobs(FitGausIden))
  
  #### gaussian
  ##### Checking log-likelihood
  expect_equal(logLik(FitGausIden)[[1]], 
               sum(dnorm(y, FitGausIden$pred, sigma(FitGausIden)[[1]], log = TRUE)))
  
  ##### Identity
  expect_equal(coef(FitGausIden), coef(GLMGausIden), tolerance = 1e-3)
  expect_equal(residuals(FitGausIden), residuals(GLMGausIden, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(predict(FitGausIden), predict(GLMGausIden, type = "response"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitGausIden), deviance(GLMGausIden), tolerance = 1e-3)
  expect_equal(logLik(FitGausIden), logLik(GLMGausIden), tolerance = 1e-3)
  expect_equal(extractAIC(FitGausIden)[[2]], extractAIC(GLMGausIden)[[2]], tolerance = 1e-3)
  expect_equal(sigma(GLMGausIden), sigma(FitGausIden)[[2]], tolerance = 1e-3)
  expect_equal(as.data.frame(coef(summary(GLMGausIden))), 
                             coef(FitGausIden, type = "all"), tolerance = 1e-3)
  expect_equal(vcov(GLMGausIden), vcov(FitGausIden) * (1000 / (1000 - 11)), tolerance = 1e-3)
  expect_equal(family(GLMGausIden),  family(FitGausIden))
  
  ##### Log
  expect_equal(coef(FitGausLog), coef(GLMGausLog), tolerance = 1e-3)
  expect_equal(predict(FitGausLog), predict(GLMGausLog, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitGausLog), residuals(GLMGausLog, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitGausLog), deviance(GLMGausLog), tolerance = 1e-3)
  expect_equal(logLik(FitGausLog), logLik(GLMGausLog), tolerance = 1e-3)
  expect_equal(extractAIC(FitGausLog), extractAIC(GLMGausLog), tolerance = 1e-3)
  expect_equal(sigma(GLMGausLog), sigma(FitGausLog)[[2]], tolerance = 1e-3)
  expect_equal(as.data.frame(coef(summary(GLMGausLog))), 
                             coef(FitGausLog, type = "all"), tolerance = 1e-3)
  expect_equal(family(GLMGausLog),  family(FitGausLog))
  
  ##### Inverse
  expect_equal(coef(FitGausInv), coef(GLMGausInv), tolerance = 1e-3)
  expect_equal(predict(FitGausInv), predict(GLMGausInv, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitGausInv), residuals(GLMGausInv, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitGausInv), deviance(GLMGausInv), tolerance = 1e-3)
  expect_equal(logLik(FitGausInv), logLik(GLMGausInv), tolerance = 1e-3)
  expect_equal(extractAIC(FitGausInv), extractAIC(GLMGausInv), tolerance = 1e-3)
  expect_equal(sigma(GLMGausInv), sigma(FitGausInv)[[2]], tolerance = 1e-3)
  expect_equal(as.data.frame(coef(summary(GLMGausInv))), 
                             coef(FitGausInv, type = "all"), tolerance = 1e-2)
  expect_equal(family(GLMGausInv),  family(FitGausInv))
  
  #### gamma
  ##### Checking log-likelihood
  expect_equal(logLik(FitGamIden)[[1]], 
               sum(dgamma(y, 1 / sigma(FitGamIden)[[1]]^2, 
                          scale = FitGamIden$preds * sigma(FitGamIden)[[1]]^2, 
                          log = TRUE)))
  
  ##### Identity
  expect_equal(coef(FitGamIden), coef(GLMGamIden), tolerance = 1e-3)
  expect_equal(predict(FitGamIden), predict(GLMGamIden, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitGamIden), residuals(GLMGamIden, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitGamIden), deviance(GLMGamIden), tolerance = 1e-3)
  expect_equal(sigma(GLMGamIden), sigma(FitGamIden)[[2]], tolerance = 1e-3)
  expect_equal(as.data.frame(coef(summary(GLMGamIden))), 
                             coef(FitGamIden, type = "all"), tolerance = 1e-3)
  expect_equal(family(FitGamIden), family(GLMGamIden))
  
  ##### Log
  expect_equal(coef(FitGamLog), coef(GLMGamLog), tolerance = 1e-3)
  expect_equal(predict(FitGamLog), predict(GLMGamLog, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitGamLog), residuals(GLMGamLog, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitGamLog), deviance(GLMGamLog), tolerance = 1e-3)
  expect_equal(sigma(GLMGamLog), sigma(FitGamLog)[[2]], tolerance = 1e-3)
  expect_equal(as.data.frame(coef(summary(GLMGamLog))), 
                             coef(FitGamLog, type = "all"), tolerance = 1e-3)
  expect_equal(family(FitGamLog), family(GLMGamLog))
  
  ##### Inverse
  expect_equal(coef(FitGamInv), coef(GLMGamInv), tolerance = 1e-3)
  expect_equal(predict(FitGamInv), predict(GLMGamInv, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitGamInv), residuals(GLMGamInv, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitGamInv), deviance(GLMGamInv), tolerance = 1e-3)
  expect_equal(sigma(GLMGamInv), sigma(FitGamInv)[[2]], tolerance = 1e-3)
  expect_equal(as.data.frame(coef(summary(GLMGamInv))), 
               coef(FitGamInv, type = "all"), tolerance = 1e-3)
  expect_equal(family(FitGamInv), family(GLMGamInv))
  
})


## Binomial regression
test_that("Binomial regression tests", {
  library(BranchGLM)
  set.seed(8621)
  x <- sapply(rep(0, 10), rnorm, n = 1000, simplify = TRUE)
  x <- cbind(1, x)
  beta <- rnorm(11)
  y <- rbinom(n = 1000, size = 1, p = 1 / (1 + exp(-x %*% beta)))
  Data <- cbind(y, x[,-1]) |>
    as.data.frame()
  
  ### Fitting models with BranchGLM
  FitClog <- BranchGLM(y ~ ., data = Data, family = "binomial", link = "cloglog")
  FitLogit <- BranchGLM(y ~ ., data = Data, family = "binomial", link = "logit")
  FitProbit <- BranchGLM(y ~ ., data = Data, family = "binomial", link = "probit")
  
  ### Fitting models with glm
  GLMClog <- suppressWarnings(glm(y ~ ., data = Data, family = binomial(link = "cloglog")))
  GLMLogit <- glm(y ~ ., data = Data, family = binomial(link = "logit"))
  GLMProbit <- glm(y ~ ., data = Data, family = binomial(link = "probit"))
  
  ##### Checking log-likelihood
  expect_equal(logLik(FitClog)[[1]], 
               sum(dbinom(y, 1, FitClog$preds, log = TRUE)))
  
  ### Checking results
  #### Cloglog
  expect_equal(coef(FitClog), coef(GLMClog), tolerance = 1e-3)
  expect_equal(predict(FitClog), predict(GLMClog, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitClog), residuals(GLMClog, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitClog), deviance(GLMClog), tolerance = 1e-3)
  expect_equal(logLik(FitClog), logLik(GLMClog), tolerance = 1e-3)
  expect_equal(extractAIC(FitClog), extractAIC(GLMClog), tolerance = 1e-3)
  expect_equal(vcov(FitClog), vcov(GLMClog), tolerance = 1e-3)
  expect_equal(coef(FitClog, type = "all"), as.data.frame(coef(summary(GLMClog))), 
               tolerance = 1e-3)
  expect_equal(unname(sigma(FitClog)), c(1, 1))
  expect_equal(family(FitClog), family(GLMClog), tolerance = 1e-3)
  
  #### Logit
  expect_equal(coef(FitLogit), coef(GLMLogit), tolerance = 1e-3)
  expect_equal(predict(FitLogit), predict(GLMLogit, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitLogit), residuals(GLMLogit, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitLogit), deviance(GLMLogit), tolerance = 1e-3)
  expect_equal(logLik(FitLogit), logLik(GLMLogit), tolerance = 1e-3)
  expect_equal(extractAIC(FitLogit), extractAIC(GLMLogit), tolerance = 1e-3)
  expect_equal(vcov(FitLogit), vcov(GLMLogit), tolerance = 1e-3)
  expect_equal(coef(FitLogit, type = "all"), as.data.frame(coef(summary(GLMLogit))), 
               tolerance = 1e-3)
  expect_equal(unname(sigma(FitLogit)), c(1, 1))
  expect_equal(family(FitLogit), family(GLMLogit), tolerance = 1e-3)
  
  #### Probit
  expect_equal(coef(FitProbit), coef(GLMProbit), tolerance = 1e-3)
  expect_equal(predict(FitProbit), predict(GLMProbit, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitProbit), residuals(GLMProbit, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitProbit), deviance(GLMProbit), tolerance = 1e-3)
  expect_equal(logLik(FitProbit), logLik(GLMProbit), tolerance = 1e-3)
  expect_equal(extractAIC(FitProbit), extractAIC(GLMProbit), tolerance = 1e-3)
  expect_equal(vcov(FitProbit), vcov(GLMProbit), tolerance = 1e-3)
  expect_equal(coef(FitProbit, type = "all"), as.data.frame(coef(summary(GLMProbit))), 
               tolerance = 1e-3)
  expect_equal(unname(sigma(FitProbit)), c(1, 1))
  expect_equal(family(FitProbit), family(GLMProbit), tolerance = 1e-3)
})

## Poisson regression
test_that("Poisson regression tests", {
  library(BranchGLM)
  set.seed(8621)
  x <- sapply(rep(1, 10), rgamma, n = 1000, simplify = TRUE)
  x <- cbind(1, x)
  beta <- rgamma(11, 1)
  y <- rpois(n = 1000, lambda = x %*% beta)
  Data <- cbind(y, x[,-1]) |>
    as.data.frame()
  
  ### Fitting models with BranchGLM
  FitIden <- BranchGLM(y ~ ., data = Data, family = "poisson", link = "identity")
  FitLog <- BranchGLM(y ~ ., data = Data, family = "poisson", link = "log")
  FitSqrt <- BranchGLM(y ~ ., data = Data, family = "poisson", link = "sqrt")
  
  ### Fitting models with glm
  GLMIden <- glm(y ~ ., data = Data, family = poisson(link = "identity"), 
                 start = beta)
  GLMLog <- glm(y ~ ., data = Data, family = poisson(link = "log"))
  GLMSqrt <- glm(y ~ ., data = Data, family = poisson(link = "sqrt"))
  
  ##### Checking log-likelihood
  expect_equal(logLik(FitLog)[[1]], 
               sum(dpois(y, FitLog$pred, log = TRUE)))
  
  ### Checking results
  #### Identity
  expect_equal(coef(FitIden), coef(GLMIden), tolerance = 1e-3)
  expect_equal(predict(FitIden), predict(GLMIden, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitIden), residuals(GLMIden, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitIden), deviance(GLMIden), tolerance = 1e-3)
  expect_equal(logLik(FitIden), logLik(GLMIden), tolerance = 1e-3)
  expect_equal(extractAIC(FitIden), extractAIC(GLMIden), tolerance = 1e-3)
  expect_equal(vcov(FitIden), vcov(GLMIden), tolerance = 1e-3)
  expect_equal(coef(FitIden, type = "all"), as.data.frame(coef(summary(GLMIden))), 
               tolerance = 1e-3)
  expect_equal(unname(sigma(FitIden)), c(1, 1))
  expect_equal(family(FitIden), family(GLMIden), tolerance = 1e-3)
  
  #### Log
  expect_equal(coef(FitLog), coef(GLMLog), tolerance = 1e-3)
  expect_equal(predict(FitLog), predict(GLMLog, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitLog), residuals(GLMLog, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitLog), deviance(GLMLog), tolerance = 1e-3)
  expect_equal(logLik(FitLog), logLik(GLMLog), tolerance = 1e-3)
  expect_equal(extractAIC(FitLog), extractAIC(GLMLog), tolerance = 1e-3)
  expect_equal(vcov(FitLog), vcov(GLMLog), tolerance = 1e-3)
  expect_equal(coef(FitLog, type = "all"), as.data.frame(coef(summary(GLMLog))), 
               tolerance = 1e-3)
  expect_equal(unname(sigma(FitLog)), c(1, 1))
  expect_equal(family(FitLog), family(GLMLog), tolerance = 1e-3)
  
  ## Square root
  expect_equal(coef(FitSqrt), coef(GLMSqrt), tolerance = 1e-3)
  expect_equal(predict(FitSqrt), predict(GLMSqrt, type = "response"), 
               tolerance = 1e-3)
  expect_equal(residuals(FitSqrt), residuals(GLMSqrt, type = "pearson"), 
               tolerance = 1e-3)
  expect_equal(deviance(FitSqrt), deviance(GLMSqrt), tolerance = 1e-3)
  expect_equal(logLik(FitSqrt), logLik(GLMSqrt), tolerance = 1e-3)
  expect_equal(extractAIC(FitSqrt), extractAIC(GLMSqrt), tolerance = 1e-3)
  expect_equal(vcov(FitSqrt), vcov(GLMSqrt), tolerance = 1e-3)
  expect_equal(coef(FitSqrt, type = "all"), as.data.frame(coef(summary(GLMSqrt))), 
               tolerance = 1e-3)
  expect_equal(unname(sigma(FitSqrt)), c(1, 1))
  expect_equal(family(FitSqrt), family(GLMSqrt), tolerance = 1e-3)
})