## Test for boot_nlme
require(nlme)
require(nlraa)
require(car)

## I only run this on my computer and occasionally
run.boot.test <- Sys.info()[["user"]] == "fernandomiguez" && FALSE

data(barley, package = "nlraa")
set.seed(101)
## Does Boot produce 'good' confidence intervals?

if(run.boot.test){
  
  fit.nls <- nls(yield ~ SSlinp(NF, a, b, xs), data = barley)
  
  ## Profiled confidence intervals
  confint(fit.nls)

  ## Does bootstrap as implemented in car underestimates confidence intervals?  
  fit.nls.bt <- Boot(fit.nls)
  ## It is not possible to parallelize the previous code, why?
  
  fit.gnls <- gnls(yield ~ SSlinp(NF, a, b, xs), data = barley)
  
  intervals(fit.gnls)
  
  ## This takes: 9.3 seconds
  system.time(fit.gnls.bt <- boot_nlme(fit.gnls, R = 999, cores = 4))
  
  ## This takes also <10 seconds 
  fit.gnls.bt2 <- boot_nlme(fit.gnls, R = 999, psim = 0, cores = 4)
  
  ## Compare confidence intervals
  confint(fit.nls) ## nls profile
  confint(fit.nls.bt) ## Boot
  confint(fit.gnls.bt) ## boot_nlme psim = 1
  confint(fit.gnls.bt2) ## boot_nlme psim = 0
  ## These intervals do not agree exaclty because different assumptions 
  ## are made and I'm running bootstrap a relatively small number of times
}

if(run.boot.test){
  
  ## Simple test for barley and an object of class 'gnls'
  ## Simplify the dataset to make the set up simpler
  barley2 <- subset(barley, year < 1974)

  fit.lp.gnls2 <- gnls(yield ~ SSlinp(NF, a, b, xs), data = barley2)

  intervals(fit.lp.gnls2)

  ## Compare this to the bootstrapping approach
  ## This take about ~13 seconds
  system.time(fit.lp.gnls2.bt <- boot_nlme(fit.lp.gnls2, R = 2000, cores = 4))

  summary(fit.lp.gnls2.bt) ## Bias is low, which is good

  confint(fit.lp.gnls2.bt, type = "perc")

  hist(fit.lp.gnls2.bt, 1, ci = "perc")
  hist(fit.lp.gnls2.bt, 2, ci = "perc")
  hist(fit.lp.gnls2.bt, 3, ci = "perc")

  ## Testing the bootstrap function with an object which
  ## contains factors
  
  barley2$year.f <- as.factor(barley2$year)

  cfs <- coef(fit.lp.gnls2)

  fit.lp.gnls3 <- update(fit.lp.gnls2, 
                         params = list(a + b + xs ~ year.f),
                         start = c(cfs[1], 0, 0, 0, 
                                   cfs[2], 0, 0, 0,
                                   cfs[3], 0, 0, 0))

  intervals(fit.lp.gnls3)

  ## This takes 13 seconds (Mac), not bad. Windows (21 seconds)
  system.time(fit.lp.gnls3.bt <- boot_nlme(fit.lp.gnls3, R = 3000, cores = 4))

  summary(fit.lp.gnls3.bt)
  
  confint(fit.lp.gnls3.bt, type = "perc")
  
  hist(fit.lp.gnls3.bt, 1, ci = "perc")
  hist(fit.lp.gnls3.bt, 2, ci = "perc")
  hist(fit.lp.gnls3.bt, 3, ci = "perc")

  ## Testing the function for 'nlme'. 
  barley$year.f <- as.factor(barley$year)

  barleyG <- groupedData(yield ~ NF | year.f, data = barley)

  fitL.bar <- nlsList(yield ~ SSlinp(NF, a, b, xs), data = barleyG)

  fit.bar.nlme <- nlme(fitL.bar, random = pdDiag(a + b + xs ~ 1))

  plot(augPred(fit.bar.nlme, level = 0:1))
  ## Confidence intervals of the model fixed parameters
  intervals(fit.bar.nlme, which = "fixed")

  ## Bootstrap for the asymptote
  fna <- function(x) fixef(x)[1] + fixef(x)[2] * fixef(x)[3]

  ## This takes much longer... 215-237 seconds on my laptop
  system.time(fit.bar.nlme.bt <- boot_nlme(fit.bar.nlme, f = fna, R = 2000, cores = 4))

  confint(fit.bar.nlme.bt, type = "perc")

  ## This is good because the estimate on the original data is
  ## 348.9912, which is in the middle of the confidence interval
  hist(fit.bar.nlme.bt, 1, ci = "perc")

}

## Trying to fit the model using different approaches
## This time with the SSasymp function

if(run.boot.test){
  
  fmL1 <- nlsList(yield ~ SSasymp(NF, Asym, R0, lrc), data = barleyG)
  
  fm1 <- nlme(fmL1)
  
  fm2 <- update(fm1, random = pdDiag(Asym + R0 + lrc ~ 1))
  ## Arguably the second model is better
  
  ## fm3 <- nlmer(yield ~ SSasymp(NF, Asym, R0, lrc) ~ (Asym | year.f) + (R0 | year.f), data = barley,
  ##             start = getInitial(yield ~ SSasymp(NF, Asym, R0, lrc), data = barley))
  ## This does not fail now, but it did before. 
  
  ## This is very slow... 17 seconds for 10 attempts
  ## system.time(fm1.bt <- boot_nlme(fm1, R = 10, cores = 4))
  ## What about the second model?
  ## This one takes about 39 sec (Mac and Windows?)
  system.time(fm2.bt <- boot_nlme(fm2, R = 1000, cores = 4))
  
  summary(fm2.bt)
  
  confint(fm2.bt)
  
  hist(fm2.bt, 1)
  hist(fm2.bt, 2)
  hist(fm2.bt, 3)
  
  ## I feel validated! These intervals are very, very close to the ones
  ## obtained through normal approximation
  ## I wrote the previous statement a long time aga, I don't know
  ## if it is still true (2020-5-22)

  ## rstanarm failed with this example. Chains did not converge (2020-05-22)
  ## fm3 <- stan_nlmer(yield ~ SSasymp(NF, Asym, R0, lrc) ~ Asym + R0 + lrc | year.f, 
  ##                  data = barley, cores = 2)  
  
  ## What is the impact of psim = 2 vs. psim = 3 on bootstrapping?
  data(barley, package = "nlraa")
  barley$year.f <- as.factor(barley$year)
  barleyG <- groupedData(yield ~ NF | year.f, data = barley)
  fmL1 <- nlsList(yield ~ SSasymp(NF, Asym, R0, lrc), data = barleyG)
  fm1 <- nlme(fmL1)
  fm2 <- update(fm1, random = pdDiag(Asym + R0 + lrc ~ 1))
  fm3 <- update(fm1, random = pdDiag(Asym + R0 ~ 1))

  anova(fm2, fm3)
  ## Bootstrap the random effects
  vcov_est <- function(x) diag(var_cov(x, type = "random"))
  
  system.time(fm3.vc.bt2 <- boot_nlme(fm3, vcov_est, psim = 2, cores = 4))
  
  fm3.vc.bt2
  
  hist(fm3.vc.bt2, 1, ci = "perc")
  hist(fm3.vc.bt2, 2, ci = "perc")
  
  system.time(fm3.vc.bt3 <- boot_nlme(fm3, vcov_est, psim = 3, cores = 4))

  fm3.vc.bt3
  
  hist(fm3.vc.bt3, 1, ci = "perc")
  hist(fm3.vc.bt3, 2, ci = "perc")
  
  ## Does this show that this method results in biased estimates of the
  ## variance components? Bootstrapping has substantial bias
  ## Is it because I attempt to fix the variance (I use empirical = TRUE in MASS::rmvnorm)
  ## I could either set empirical to FALSE or simulate new variance components...
  
}