# Setup models for tests
library("testthat")
library("gratia")
library("mgcv")
library("gamm4")
library("scam")
library("dplyr")
library("tibble")
library("nlme")

## Need a local wrapper to allow conditional use of vdiffr
`expect_doppelganger` <- function(title, fig, ...) {
  testthat::skip_if_not_installed("vdiffr")
  vdiffr::expect_doppelganger(title, fig, ...)
}

## Fit models
quick_eg1 <- data_sim("eg1", n = 200, seed = 1)
su_eg1 <- data_sim("eg1", n = 1000,  dist = "normal", scale = 2, seed = 1)
su_eg2 <- data_sim("eg2", n = 2000, dist = "normal", scale = 0.5, seed = 42)
su_eg3 <- data_sim("eg3", n = 400, seed = 32)
su_eg4 <- data_sim("eg4", n = 400,  dist = "normal", scale = 2, seed = 1)

su_m_quick_eg1 <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3),
                      data = quick_eg1,
                      method = "REML")

su_m_quick_eg1_shrink <- gam(y ~ s(x0, bs = "ts") + s(x1, bs = "ts") +
                               s(x2, bs = "ts") + s(x3, bs = "ts"),
                             data = quick_eg1,
                             method = "REML")

su_m_univar_4 <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3),
                     data = su_eg1,
                     method = "REML")

su_m_penalty <- gam(y ~ s(x0, bs = 'cr') + s(x1, bs = 'bs') +
                      s(x2, k = 15) + s(x3, bs = 'ps'),
                    data = su_eg1,
                    method = "REML")

su_m_bivar <- gam(y ~ s(x, z, k = 40),
                  data = su_eg2,
                  method = "REML")

su_m_trivar <- gam(y ~ s(x0, x1, x2), data = su_eg1, method = "REML")

su_m_quadvar <- gam(y ~ s(x0, x1, x2, x3), data = su_eg1, method = "REML")

su_m_bivar_te <- gam(y ~ te(x, z, k = c(5, 5)), data = su_eg2, method = "REML")

su_m_bivar_t2 <- gam(y ~ t2(x, z, k = c(5, 5)), data = su_eg2, method = "REML")

su_m_trivar_te <- gam(y ~ te(x0, x1, x2, k = c(3, 3, 3)),
                      data = su_eg1, method = "REML")

su_m_quadvar_te <- gam(y ~ te(x0, x1, x2, x3, k = c(3, 3, 3, 3)),
                       data = su_eg1, method = "REML")

su_m_trivar_t2 <- gam(y ~ t2(x0, x1, x2, k = c(3, 3, 3)),
                      data = su_eg1, method = "REML")

su_m_quadvar_t2 <- gam(y ~ t2(x0, x1, x2, x3, k = c(3, 3, 3, 3)),
                       data = su_eg1, method = "REML")

su_m_cont_by <- gam(y ~ s(x2, by = x1), data = su_eg3, method = "REML")

su_m_factor_by <- gam(y ~ fac + s(x2, by = fac) + s(x0),
                      data = su_eg4,
                      method = "REML")

su_m_factor_by_gamm <- gamm(y ~ fac + s(x2, by = fac) + s(x0),
                            data = su_eg4, REML = TRUE)

su_m_factor_by_gamm4 <- gamm4(y ~ fac + s(x2, by = fac) + s(x0),
                              data = su_eg4, REML = TRUE)

su_m_factor_by_bam <- bam(y ~ fac + s(x2, by = fac) + s(x0), data = su_eg4)

su_m_factor_by_x2 <- gam(y ~ fac + s(x2, by = fac),
  data = su_eg4,
  method = "REML")

if (packageVersion("mgcv") >= "1.8.41") {
  m_sz <- gam(y ~ s(x2) + s(fac, x2, bs = "sz") + s(x0),
    data = su_eg4, method = "REML")

  # two factor sz smooth example from ?smooth.construct.sz.smooth.spec
  ## Example involving 2 factors
  two_factor_sz_example <- function(seed = NULL) {
    ## sort out the seed
    if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) {
      runif(1)
    }
    if (is.null(seed)) {
      RNGstate <- get(".Random.seed", envir = .GlobalEnv)
    } else {
      R.seed <- get(".Random.seed", envir = .GlobalEnv)
      set.seed(seed)
      RNGstate <- structure(seed, kind = as.list(RNGkind()))
      on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
    }
    f1 <- function(x2) 2 * sin(pi * x2)
    f2 <- function(x2) exp(2 * x2) - 3.75887
    f3 <- function(x2) 0.2 * x2^11 * (10 * (1 - x2))^6 + 10 * (10 * x2)^3 *
      (1 - x2)^10

    n <- 600
    x <- runif(n)
    f1 <- factor(sample(c("a", "b", "c"), n, replace = TRUE))
    f2 <- factor(sample(c("foo", "bar"), n, replace = TRUE))

    mu <- f3(x)
    for (i in 1:3) mu <- mu + exp(2 * (2 - i) * x) * (f1 == levels(f1)[i])
    for (i in 1:2) mu <- mu + 10 * i * x * (1 - x) * (f2 == levels(f2)[i])
    y <- mu + rnorm(n)
    dat <- data.frame(y = y, x = x, f1 = f1, f2 = f2)
    dat
  }
  su_eg_sz_2_factor <- two_factor_sz_example(seed = 42)
  # using bam as it is so much faster
  m_sz_2f <- bam(y ~ s(x) + s(f1, x, bs = "sz") + s(f2, x, bs = "sz") +
    s(f1, f2, x, bs = "sz", id = 1),
  data = su_eg_sz_2_factor, method = "fREML")
}

su_eg2_by <- su_eg2 %>%
  mutate(y = y + y^2 + y^3) %>%
  bind_rows(su_eg2) %>%
  mutate(fac = factor(rep(c("A", "B"), each = nrow(su_eg2))))
su_m_bivar_by_fac <- gam(y ~ fac + s(x, z, k = 40, by = fac),
                  data = su_eg2_by,
                  method = "REML")

su_gamm_univar_4 <- gamm(y ~ s(x0) + s(x1) + s(x2) + s(x3),
                         data = su_eg1,
                         method = "REML")

m_1_smooth <- gam(y ~ s(x0), data = quick_eg1, method = "REML")

m_1_smooth_offset <- gam(y ~ s(x0) + offset(log(off)),
  data = quick_eg1 |> mutate(off = 2), method = "REML")

m_gam <- su_m_univar_4

m_gamm <- su_gamm_univar_4

m_bam    <- bam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = su_eg1,
  method = "fREML")

m_gamgcv <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = su_eg1,
  method = "GCV.Cp")

m_gamm4  <- gamm4(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = su_eg1,
  REML = TRUE)
m_gamm4_real <- m_gamm4
  class(m_gamm4_real) <- append("gamm4", class(m_gamm4_real[-1L]))

m_gaulss <- gam(list(y ~ s(x0) + s(x1) + s(x2) + s(x3), ~ 1), data = su_eg1,
                family = gaulss)

m_scat <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = su_eg1,
              family = scat(), method = "REML")

m_lm  <- lm(y ~ x0 + x1 + x2 + x3, data = quick_eg1)

m_glm <- glm(y ~ x0 + x1 + x2 + x3, data = quick_eg1)

# rootogram models
df_pois  <- data_sim("eg1", dist = "poisson", n = 500L, scale = 0.2, seed = 42)
## fit the model
b_pois  <-  bam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = df_pois,
                method = "fREML", family = poisson())
m_nb    <-  gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = df_pois,
                method = "REML", family = nb())
m_negbin    <-  gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = df_pois,
                    method = "REML", family = negbin(25))
m_tw    <-  gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = df_pois,
                method = "REML", family = tw())

#-- A standard GAM with a simple random effect ---------------------------------
su_re <- quick_eg1
set.seed(42)
su_re$fac <- as.factor(sample(seq_len(10), 200, replace = TRUE))
su_re$X <- model.matrix(~ fac - 1, data = su_re)
su_re <- transform(su_re, y = y + X %*% rnorm(10) * 0.5)
rm1 <- gam(y ~ s(fac, bs = "re") + s(x0) + s(x1) + s(x2) + s(x3),
           data = su_re, method = "ML")

#-- A factor by GAM with random effects ----------------------------------------
su_re2 <- su_eg4
set.seed(42)
su_re2$ranef <- as.factor(sample(1:20, 400, replace = TRUE))
su_re2$X <- model.matrix(~ ranef - 1, data = su_re2)
su_re2 <- transform(su_re2, y = y + X %*% rnorm(20) * 0.5)
rm2 <- gam(y ~ fac + s(ranef, bs = "re", by = fac) + s(x0) + s(x1) + s(x2),
           data = su_re2, method = "ML")

#-- A distributed lag model example --------------------------------------------
su_dlnm <- su_eg1 %>%
  mutate(f_lag = cbind(dplyr::lag(f, 1),
                       dplyr::lag(f, 2),
                       dplyr::lag(f, 3),
                       dplyr::lag(f, 4),
                       dplyr::lag(f, 5)),
         lag = matrix(1:5, ncol = 5)) %>%
  filter(!is.na(f_lag[, 5]))

# fit DLNM GAM
dlnm_m <- gam(y ~ te(f_lag, lag), data = su_dlnm,
              method = "REML")

#-- An AR(1) example using bam() with factor by --------------------------------
# from ?magic
## simulate truth
set.seed(1)
n <- 400
sig <- 2
x <- 0:(n-1)/(n-1)
## produce scaled covariance matrix for AR1 errors...
rho <- 0.6
V <- corMatrix(Initialize(corAR1(rho), data.frame(x = x)))
Cv <- chol(V)  # t(Cv) %*% Cv=V
## Simulate AR1 errors ...
e1 <- t(Cv) %*% rnorm(n, 0, sig) # so cov(e) = V * sig^2
e2 <- t(Cv) %*% rnorm(n, 0, sig) # so cov(e) = V * sig^2
## Observe truth + AR1 errors
f1 <- 0.2*x^11*(10*(1-x))^6+10*(10*x)^3*(1-x)^10
f2 <- (1280 * x^4) * (1- x)^4
df <- data.frame(x = rep(x, 2), f = c(f1, f2), y = c(f1 + e1, f2 + e2),
                 series = as.factor(rep(c("A", "B"), each = n)))
#rm(x, f1, f2, e1, e2, V, Cv)
AR.start <- rep(FALSE, n*2)
AR.start[c(1, n+1)] <- TRUE
## fit GAM using `bam()` with known correlation
## first just to a single series
m_ar1 <- bam(y ~ s(x, k = 20), data = df[seq_len(n), ], rho = rho,
             AR.start = NULL)
## now as a factor by smooth to model both series
m_ar1_by <- bam(y ~ series + s(x, k = 20, by = series), data = df, rho = rho,
                AR.start = AR.start)

# A standard GAM with multiple factors
set.seed(1)
df_2_fac <- add_column(su_eg4,
                       ff = factor(sample(LETTERS[1:4], nrow(su_eg4),
                                                replace = TRUE)))
# a GAM with multiple factor parametric terms
m_2_fac <- gam(y ~ fac * ff + s(x0) + s(x1) + s(x2),
               data = df_2_fac, method = "REML")
# a GAM with parametric terms (factor and linear) and smooth terms
m_para_sm <- gam(y ~ fac * ff + x0 + s(x1) + s(x2),
                 data = df_2_fac, method = "REML")
# a GAM with parametric terms (factor and linear) and smooth terms
m_only_para <- gam(y ~ fac * ff + x0 + x1 + x2,
                   data = df_2_fac, method = "REML")

##-- scam models ---------------------------------------------------------------
data(smallAges)
smallAges$Error[1] <- 1.1
sw <- scam(Date ~ s(Depth, k = 5, bs = "mpd"), data = smallAges,
  weights = 1 / smallAges$Error, gamma = 1.4)

# this should be folded into data_sim()
`sim_scam` <- function(n, seed = NULL) {
      ## sort out the seed
      if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) {
        runif(1)
      }
      if (is.null(seed)) {
          RNGstate <- get(".Random.seed", envir = .GlobalEnv)
      }
      else {
          R.seed <- get(".Random.seed", envir = .GlobalEnv)
          set.seed(seed)
          RNGstate <- structure(seed, kind = as.list(RNGkind()))
          on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
      }
      # from ?scam, first example
      x1 <- runif(n) * 6 - 3
      f1 <- 3 * exp(-x1^2) # unconstrained term
      x2 <- runif(n) * 4 - 1
      f2 <- exp(4 * x2) / (1 + exp(4 * x2)) # monotone increasing smooth
      y <- f1 + f2 + rnorm(n) * .5
      tibble(x1 = x1, x2 = x2, y = y)
}
dat <- sim_scam(n = 200, seed = 4)
## fit model, get results, and plot...
m_scam <- scam(y ~ s(x1, bs = "cr") + s(x2, bs = "mpi"), data = dat)