context("n_mixture") # Simulations take a bit of time to set up skip_on_cran() set.seed(100) poisdat <- sim_mvgam() test_that("only count data allowed for nmixtures", { gaus_data$data_train$cap <- 100 expect_error(mvgam(y ~ s(season), trend_formula = ~ trend, family = nmix(), data = gaus_data$data_train), 'Values < 0 not allowed for count family responses', fixed = TRUE) }) test_that("cap must be supplied in data", { expect_error(get_mvgam_priors(formula = y ~ s(season), trend_formula = ~ s(season) + trend, trend_model = 'None', family = nmix(), data = poisdat$data_train), 'Max abundances must be supplied as a variable named "cap" for N-mixture models', fixed = TRUE) poisdat$data_train$cap <- rpois(NROW(poisdat$data_train), lambda = 5) + max(poisdat$data_train$y, na.rm = TRUE) expect_error(mvgam(formula = y ~ s(season), trend_formula = ~ s(season) + trend, trend_model = 'None', family = nmix(), data = poisdat$data_train, newdata = poisdat$data_test), '"data" and "newdata" have different numbers of columns', fixed = TRUE) poisdat$data_test$emu <- 50 expect_error(mvgam(formula = y ~ s(season), trend_formula = ~ s(season) + trend, trend_model = 'None', family = nmix(), data = poisdat$data_train, newdata = poisdat$data_test), 'Max abundances must be supplied in test data as a variable named "cap" for N-mixture models', fixed = TRUE) }) poisdat$data_train$cap <- rpois(NROW(poisdat$data_train), lambda = 5) + max(poisdat$data_train$y, na.rm = TRUE) poisdat$data_test$cap <- rpois(NROW(poisdat$data_test), lambda = 5) + max(poisdat$data_test$y, na.rm = TRUE) test_that("latent process intercept is allowed in nmixtures", { prior_df <- get_mvgam_priors(formula = y ~ s(season), trend_formula = ~ s(season) + trend, trend_model = 'None', family = nmix(), data = poisdat$data_train) expect_true(any(grepl('(Intercept)_trend', prior_df$param_name, fixed = TRUE))) mod <- mvgam(formula = y ~ s(season), trend_formula = ~ s(season) + trend, trend_model = 'None', family = nmix(), data = poisdat$data_train, newdata = poisdat$data_test, priors = prior(std_normal(), class = '(Intercept)_trend'), run_model = FALSE) expect_true(any(grepl('(Intercept)_trend', mod$model_file, fixed = TRUE))) expect_true(any(grepl('b_raw_trend[1] ~ std_normal();', mod$model_file, fixed = TRUE))) # Can also test that 'cap' is properly included in model_data # The caps should be arranged by series and then by time train_cap = poisdat$data_train %>% dplyr::arrange(series, time) %>% dplyr::pull(cap) test_cap = poisdat$data_test %>% dplyr::arrange(series, time) %>% dplyr::pull(cap) expect_true(all(mod$model_data$cap == c(train_cap, test_cap))) }) # Check that the model fits and post-processing works using the # example from the families man page test_that("nmix() post-processing works", { set.seed(0) data.frame(site = 1, # five replicates per year; six years replicate = rep(1:5, 6), time = sort(rep(1:6, 5)), species = 'sp_1', # true abundance declines nonlinearly truth = c(rep(28, 5), rep(26, 5), rep(23, 5), rep(16, 5), rep(14, 5), rep(14, 5)), # observations are taken with detection prob = 0.7 obs = c(rbinom(5, 28, 0.7), rbinom(5, 26, 0.7), rbinom(5, 23, 0.7), rbinom(5, 15, 0.7), rbinom(5, 14, 0.7), rbinom(5, 14, 0.7))) %>% # add 'series' information, which is an identifier of site, replicate and species dplyr::mutate(series = paste0('site_', site, '_', species, '_rep_', replicate), time = as.numeric(time), # add a 'cap' variable that defines the maximum latent N to # marginalize over when estimating latent abundance; in other words # how large do we realistically think the true abundance could be? cap = 100) %>% dplyr::select(- replicate) -> testdat # Now add another species that has a different temporal trend and a smaller # detection probability (0.45 for this species) testdat = testdat %>% dplyr::bind_rows(data.frame(site = 1, replicate = rep(1:5, 6), time = sort(rep(1:6, 5)), species = 'sp_2', truth = c(rep(4, 5), rep(7, 5), rep(15, 5), rep(16, 5), rep(19, 5), rep(18, 5)), obs = c(rbinom(5, 4, 0.45), rbinom(5, 7, 0.45), rbinom(5, 15, 0.45), rbinom(5, 16, 0.45), rbinom(5, 19, 0.45), rbinom(5, 18, 0.45))) %>% dplyr::mutate(series = paste0('site_', site, '_', species, '_rep_', replicate), time = as.numeric(time), cap = 50) %>% dplyr::select(-replicate)) # series identifiers testdat$species <- factor(testdat$species, levels = unique(testdat$species)) testdat$series <- factor(testdat$series, levels = unique(testdat$series)) # The trend_map to state how replicates are structured testdat %>% # each unique combination of site*species is a separate process dplyr::mutate(trend = as.numeric(factor(paste0(site, species)))) %>% dplyr::select(trend, series) %>% dplyr::distinct() -> trend_map # Fit a model mod <- SW(mvgam( # the observation formula sets up linear predictors for # detection probability on the logit scale formula = obs ~ species - 1, # the trend_formula sets up the linear predictors for # the latent abundance processes on the log scale trend_formula = ~ s(time, by = trend, k = 4) + species, # the trend_map takes care of the mapping trend_map = trend_map, # nmix() family and data family = nmix(), data = testdat, # priors can be set in the usual way priors = c(prior(std_normal(), class = b), prior(normal(1, 1.5), class = Intercept_trend)), samples = 300, residuals = FALSE, chains = 2)) expect_no_error(summary(mod)) expect_no_error(print(mod)) expect_true(inherits(hindcast(mod), 'mvgam_forecast')) expect_true(inherits(hindcast(mod, type = 'latent_N'), 'mvgam_forecast')) expect_true(inherits(hindcast(mod, type = 'detection'), 'mvgam_forecast')) preds <- predict(mod, summary = FALSE, type = 'response') expect_true(NCOL(preds) == NROW(testdat)) expect_true(all(preds >= 0L)) preds <- predict(mod, summary = FALSE, type = 'detection') expect_true(NCOL(preds) == NROW(testdat)) expect_true(all(preds <= 1L & preds >= 0L)) preds <- predict(mod, summary = FALSE, type = 'latent_N') expect_true(NCOL(preds) == NROW(testdat)) expect_true(all(preds >= 0L)) expect_no_error(plot(mod, type = 'smooths', trend_effects = TRUE)) expect_no_error(plot(mod, type = 'smooths', realisations = TRUE, trend_effects = TRUE)) expect_no_error(plot(mod, type = 'smooths', residuals = TRUE, trend_effects = TRUE)) options(mc.cores = 1) expect_loo(SW(loo(mod))) })