## Spatial integrated test
## - Uses the sdmTMB package to simulate spatial clusters and generate cluster IDs
## - Uses the MixSim and mvtnorm package to simulate observations based on cluster IDs

test_that("test integrated spatial", {
  library(sdmTMB)
  library(clustTMB)
  library(MixSim)
  library(mvtnorm)
  library(ggplot2)
  library(fmesher)

  ## Simulate spatial clusters
  # generate location data
  set.seed(123)
  loc <- data.frame(x = runif(500), y = runif(500))

  Mesh.sim <- fmesher::fm_mesh_2d(loc, max.edge = c(.03, .05))
  loc.sim <- data.frame(x = Mesh.sim$loc[, 1], y = Mesh.sim$loc[, 2])
  mesh.sim <- make_mesh(
    data = loc.sim, xy_cols = c("x", "y"),
    mesh = Mesh.sim
  )

  # Generate three spatial fields:
  sim_dat_1 <- sdmTMB_simulate(
    formula = ~1,
    data = loc.sim,
    mesh = mesh.sim,
    family = gaussian(),
    range = 1 / 3,
    phi = 0.1,
    sigma_O = 1,
    seed = 1,
    B = 1
  )

  sim_dat_2 <- sdmTMB_simulate(
    formula = ~1,
    data = loc.sim,
    mesh = mesh.sim,
    family = gaussian(),
    range = 1 / 3,
    phi = 0.1,
    sigma_O = 1,
    seed = 2,
    B = 1
  )

  sim_dat_3 <- sdmTMB_simulate(
    formula = ~1,
    data = loc.sim,
    mesh = mesh.sim,
    family = gaussian(),
    range = 1 / 3,
    phi = 0.1,
    sigma_O = 1,
    seed = 3,
    B = 1
  )

  # Spatial omega values
  omega_clusters <- cbind(sim_dat_1$omega_s, sim_dat_2$omega_s, sim_dat_3$omega_s)
  # Probability vectors
  cluster_pi <- t(apply(omega_clusters, 1, function(x) exp(x) / (sum(exp(x)))))
  # Cluster ID
  cluster_id <- apply(cluster_pi, 1, function(x) which(x == max(x)))

  sim1 <- sim_dat_1[which(cluster_id == 1), ]
  sim1$cluster_id <- 1
  sim2 <- sim_dat_2[which(cluster_id == 2), ]
  sim2$cluster_id <- 2
  sim3 <- sim_dat_3[which(cluster_id == 3), ]
  sim3$cluster_id <- 3

  cluster_dat <- rbind(
    sim1, sim2, sim3
  ) %>% dplyr::select(x, y, omega_s, cluster_id)

  cluster_samp <- cluster_dat[cluster_dat$x > 0 & cluster_dat$x < 1 & cluster_dat$y > 0 & cluster_dat$y < 1, ]
  cluster_samp_ <- cluster_samp[sample(1:nrow(cluster_samp), 500), ]
  loc.samp <- cluster_samp_[, 1:2]
  Mesh.fit <- fmesher::fm_mesh_2d(loc.samp, max.edge = c(.1, .5))

  ggplot(cluster_dat, aes(x = x, y = y, color = factor(cluster_id))) +
    geom_point()
  table(cluster_id) / nrow(loc.sim)
  table(cluster_samp_$cluster_id) / nrow(cluster_samp_)
  # convert locations to spatial coordinates
  Loc.samp <- sf::st_as_sf(loc.samp, coords = c("x", "y"))

  ## Simulate clustered observations
  ## Generate multivariate mean and covariances for
  ## 3 groups and 4 responses using 1% overlap in clusters
  set.seed(123)
  Q <- MixSim::MixSim(BarOmega = .01, K = 3, p = 4)

  # cluster probability from simulated spatial data (sample):
  cluster_counts <- table(cluster_samp_$cluster_id)

  simdat <- data.frame()
  id <- c()
  for (g in 1:3) {
    set.seed(g * 100)
    simdat <- rbind(
      simdat,
      mvtnorm::rmvnorm(
        n = cluster_counts[g],
        mean = Q$Mu[g, ],
        sigma = as.matrix(Q$S[, , g])
      )
    )
    id <- c(id, rep(g, cluster_counts[g]))
  }

  simdat$id <- id

  ## Fit model
  mod <- suppressWarnings(
    clustTMB(
      response = as.matrix(simdat[, 1:4]),
      family = gaussian(),
      gatingformula = ~ gmrf(0 + 1 | loc),
      G = 3, covariance.structure = "VVV",
      spatial.list = list(loc = Loc.samp, mesh = Mesh.fit)
    )
  )

  expect_equal(mod$opt$convergence, 0)
  expect_equal(mod$sdr$pdHess, TRUE)
  expect_equal(mod$obj$gr(mod$opt$par) < .01, rep(TRUE, length(mod$opt$par)))
  # expect classification to be within 1%
  expect_equal(1, MixSim::ClassProp(mod$report$classification, simdat$id),
    tolerance = .01
  )


  # re-order by estimate due to non-identifiability of cluster ID
  beta.df <- as.data.frame(summary(mod$sdr, "fixed")[3:14, ])
  row.names(beta.df) <- NULL
  beta.ord <- beta.df[order(beta.df$Estimate), ]

  beta.ord$true <- sort(Q$Mu)

  expect_equal(
    abs(beta.ord$true - beta.ord$Estimate) <
      qnorm(.975) * beta.ord$`Std. Error`,
    rep(TRUE, nrow(beta.ord))
  )
})