test_that("ML fit to simple example give reasonable output", {
  set.seed(4323)
  X <- cbind(1,rep(c(0,1),each = 20))
  J <- 10
  n <- 40
  b1 <- 1:J - mean(1:J)
  Y <- radEmu:::simulate_data(n = n,
                              J = J,
                              X = X,
                              b0 = rnorm(J),
                              b1 = b1,
                              distn = "Poisson",
                              mean_z = 10)
  
  ml_fit <- emuFit_micro(X = X,
                         Y = Y,
                         constraint_fn = rep(list(function(x) mean(x)), 2), 
                         maxit = 200,
                         tolerance = 1e-3,
                         verbose = FALSE)

  # plot(b1-mean(b1),ml_fit[2,])
  # abline(a = 0,b = 1,lty =2,col = "red")

  expect_true(max(abs(ml_fit[2,] - (b1 - mean(b1)))) < 1) # increased to 1
})

test_that("With or without 'working_constraint' we get same results", {
  set.seed(4323)
  X <- cbind(1,rep(c(0,1),each = 20))
  J <- 10
  n <- 40
  Y <- radEmu:::simulate_data(n = n,
                              J = J,
                              X = X,
                              b0 = rnorm(J),
                              b1 = seq(1,10,length.out = J),
                              distn = "Poisson",
                              mean_z = 5)
  
  ml_fit <- emuFit_micro(X,
                         Y,
                         constraint_fn = rep(list(function(x) mean(x)), 2),
                         maxit = 200,
                         tolerance = 1e-6,
                         verbose= FALSE)
  
  ml_fit_direct <- emuFit_micro(X,
                                Y,
                                constraint_fn = rep(list(function(x) mean(x)), 2),
                                maxit = 200,
                                use_working_constraint = FALSE,
                                tolerance = 1e-6,
                                verbose = FALSE)


  expect_equal(ml_fit,ml_fit_direct,tolerance = 1e-4)
})


test_that("PL fit with categorical predictor matches analytical form of MPLE in this case,
          and does NOT match MLE when group sizes are equal", {
            set.seed(90333)
            X <- cbind(1,rep(c(0,1),each = 20))
            z <- rnorm(40) 
            J <- 10
            p <- 2
            n <- 40
            b0 <- rnorm(J)
            b1 <- seq(1,10,length.out = J)/5
            b <- rbind(b0,b1)
            Y <- matrix(NA,ncol = J, nrow = 40)
            
            for(i in 1:40){
              for(j in 1:J){
                temp_mean <- exp(X[i,,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
                Y[i,j] <- rpois(1, lambda = temp_mean)
              }
            }
            pl_fit <- emuFit_micro_penalized(X,
                                             Y,
                                             B = matrix(rnorm(20),nrow = 2),
                                             constraint_fn = rep(list(function(x) mean(x)), 2),
                                             maxit = 200,
                                             tolerance = 1e-8,
                                             verbose= FALSE)
            
            ml_fit <- emuFit_micro(X,
                                   Y,
                                   B = matrix(rnorm(20),nrow = 2),
                                   constraint_fn = rep(list(function(x) mean(x)), 2),
                                   maxit = 200,
                                   tolerance = 1e-8,
                                   verbose= FALSE)
            
            cs_grp1 <- colSums(Y[1:20,])
            cs_grp2 <- colSums(Y[21:40,])
            
            bhat1 <- log(cs_grp1 + 0.5) - log(cs_grp1[1] + 0.5)
            bhat2 <- log(cs_grp2 + 0.5) - log(cs_grp2[1] +0.5)
            bhat2 <- bhat2 - bhat1
            bhat1 <- bhat1 - mean(bhat1)
            bhat2 <- bhat2 - mean(bhat2)
            analytical_B <- rbind(bhat1,bhat2)
            
            expect_true(max(abs(pl_fit$B - analytical_B))< 1e-7)
            expect_true(max(abs(ml_fit - analytical_B))>0.01)
          })


test_that("PL fit with categorical predictor matches analytical form of MPLE in this case,
          and does NOT match MLE when group sizes are unequal", {
            set.seed(90333)
            X <- cbind(1,rep(c(0,0,0,1),each = 10))
            z <- rnorm(40) 
            J <- 10
            p <- 2
            n <- 40
            b0 <- rnorm(J)
            b1 <- seq(1,10,length.out = J)/5
            b <- rbind(b0,b1)
            Y <- matrix(NA,ncol = J, nrow = 40)
            
            for(i in 1:40){
              for(j in 1:J){
                temp_mean <- exp(X[i,,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
                Y[i,j] <- rpois(1, lambda = temp_mean)
              }
            }
            pl_fit <- emuFit_micro_penalized(X,
                                             Y,
                                             B = matrix(rnorm(20),nrow = 2),
                                             constraint_fn = rep(list(function(x) mean(x)), 2),
                                             maxit = 200,
                                             tolerance = 1e-8,
                                             verbose= FALSE)
            
            ml_fit <- emuFit_micro(X,
                                   Y,
                                   B = matrix(rnorm(20),nrow = 2),
                                   constraint_fn = rep(list(function(x) mean(x)), 2),
                                   maxit = 200,
                                   tolerance = 1e-8,
                                   verbose= FALSE)
            
            cs_grp1 <- colSums(Y[1:30,])
            cs_grp2 <- colSums(Y[31:40,])
            
            bhat1 <- log(cs_grp1 + 0.5) - log(cs_grp1[1] + 0.5)
            bhat2 <- log(cs_grp2 + 0.5) - log(cs_grp2[1] +0.5)
            bhat2 <- bhat2 - bhat1
            bhat1 <- bhat1 - mean(bhat1)
            bhat2 <- bhat2 - mean(bhat2)
            analytical_B <- rbind(bhat1,bhat2)
            
            expect_true(max(abs(pl_fit$B - analytical_B))< 1e-7)
            expect_true(max(abs(ml_fit - analytical_B))>0.01)
          })

test_that("We get same results with and without warm start", {
  set.seed(4323)
  X <- cbind(1,rep(c(0,1),each = 20))
  J <- 10
  n <- 40
  Y <- radEmu:::simulate_data(n = n,
                              J = J,
                              X = X,
                              b0 = rnorm(J),
                              b1 = 1:J,
                              distn = "Poisson",
                              mean_z = 2)

  # may need to have large number of iterations and small tolerance
  ml_fit <- emuFit_micro(X,
                         Y,
                         constraint_fn = rep(list(function(x) mean(x)), 2),
                         maxit = 1e3,
                         tolerance = 1e-14,
                         verbose = FALSE)
  
  ml_fit_direct <- emuFit_micro(X,
                                Y,
                                constraint_fn = rep(list(function(x) mean(x)), 2),
                                maxit = 1e3,
                                warm_start = FALSE,
                                tolerance = 1e-14,
                                verbose = FALSE)

  expect_equal(ml_fit, ml_fit_direct, tolerance = 1e-6)
})


test_that("We get a fit if we don't specify constraint", {
  set.seed(4323)
  X <- cbind(1,rep(c(0,1),each = 20))
  J <- 10
  n <- 40
  Y <- radEmu:::simulate_data(n = n,
                              J = J,
                              X = X,
                              b0 = rnorm(J),
                              b1 = 1:J,
                              distn = "Poisson",
                              mean_z = 2)
  
  ml_fit <- emuFit_micro(X,
                         Y,
                         maxit = 200,
                         tolerance = 1e-6,
                         verbose = FALSE)


  expect_true(is.matrix(ml_fit))
})

test_that("ML fit to simple example give reasonable output with J >> n", {
  set.seed(4323)
  n <- 10
  X <- cbind(1,rep(c(0,1),each = n/2))
  J <- 1000
  b0 <- rnorm(J)
  b1 <- seq(-5,5,length.out = J)
  b <- rbind(b0,b1)
  
  Y <- radEmu:::simulate_data(n = n,
                              J = J,
                              X = X,
                              b0 = b0,
                              b1 = b1,
                              distn = "Poisson",
                              mean_z = 10)

  ml_fit <- emuFit_micro(X,
                         Y,
                         constraint_fn = rep(list(function(x) mean(x)), 2),
                         maxit = 500,
                         tolerance = 1e-3,
                         verbose = FALSE)
  
  expect_true(max(abs(ml_fit[2,] - b1))<.5)
})

test_that("unpenalized fit uses B if given, and therefore fit is quicker", {
  set.seed(4323)
  X <- cbind(1,rnorm(10))
  J <- 10
  n <- 10
  Y <- radEmu:::simulate_data(n = n,
                              J = J,
                              X = X,
                              b0 = rnorm(J),
                              b1 = seq(1,5,length.out = J),
                              distn = "ZINB",
                              zinb_size = 2,
                              zinb_zero_prop = 0.7,
                              mean_z = 10)
  
  start <- proc.time()
  pl_fit_one <- emuFit_micro(X,
                             Y,
                             B = NULL,
                             constraint_fn = rep(list(function(x) mean(x)), 2),
                             maxit = 10000,
                             tolerance = 0.01,
                             verbose= FALSE)
  end <- proc.time() - start 
  start_refit <- proc.time() 
  pl_fit_two <- emuFit_micro(X,
                             Y,
                             B = pl_fit_one,
                             constraint_fn = rep(list(function(x) mean(x)), 2),
                             maxit = 10000,
                             tolerance = 0.01,
                             verbose= FALSE)
  end_refit <- proc.time() - start_refit 
  expect_true(end_refit[3] < end[3])
  
})

test_that("unpenalized fit converges quicker if optimize_rows is set to TRUE", {
  set.seed(4323)
  J <- 100
  n <- 100
  X <- cbind(1,rnorm(n))
  Y <- radEmu:::simulate_data(n = n,
                              J = J,
                              X = X,
                              b0 = rnorm(J),
                              b1 = seq(1,5,length.out = J),
                              distn = "Poisson",
                              # zinb_size = 2,
                              # zinb_zero_prop = 0.7,
                              mean_z = 10)
  
  start <- proc.time()
  pl_fit_one <- emuFit_micro(X,
                             Y,
                             B = NULL,
                             # constraint_fn = function(x) mean(x),
                             maxit = 10000,
                             tolerance = 1e-6,
                             verbose= FALSE,
                             optimize_rows = FALSE)
  end <- proc.time() - start 
  start_refit <- proc.time() 
  pl_fit_two <- emuFit_micro(X,
                             Y,
                             B = NULL,
                             # constraint_fn = function(x) mean(x),
                             maxit = 10000,
                             tolerance =1e-6,
                             verbose= FALSE,
                             optimize_rows = TRUE)
  end_refit <- proc.time() - start_refit 
  # confirm that new approach is faster
  expect_true(end_refit[3] < end[3])
  # confirm that two estimates are very similar
  expect_true(max(pl_fit_one - pl_fit_two) < 1e-5)
  
})

test_that("unpenalized fit converges quicker if optimize_rows is set to TRUE with large p", {
  
  skip(message = "Skipping because test is very slow with J = 100 and p = 8")
  
  set.seed(4323)
  J <- 100
  n <- 100
  X <- cbind(1,rnorm(n),rnorm(n), rep(c(0, 1, 0, 0), each = 25), 
             rep(c(0, 0, 1, 0), each = 25), rep(c(0, 0, 0, 1), each = 25), rnorm(n),
             rep(0:1, 50))
  B <- rbind(rnorm(J), seq(1, 5, length.out = J),
            rnorm(J), rnorm(J), rnorm(J), rnorm(J),
            rnorm(J), rnorm(J))
  for (k in 1:ncol(X)) {
    B[k, ] <- B[k, ] - radEmu:::pseudohuber_median(B[k, ], 0.1)
  }
  Y <- radEmu:::simulate_data(n = n,
                              J = J,
                              X = X,
                              B = B,
                              distn = "Poisson",
                              mean_z = 10)
  
  start <- proc.time()
  pl_fit_one <- emuFit_micro(X,
                             Y,
                             B = NULL,
                             maxit = 10000,
                             tolerance = 1e-6,
                             verbose= FALSE,
                             optimize_rows = FALSE)
  end <- proc.time() - start 
  start_refit <- proc.time() 
  pl_fit_two <- emuFit_micro(X,
                             Y,
                             B = NULL,
                             maxit = 10000,
                             tolerance =1e-6,
                             verbose= FALSE,
                             optimize_rows = TRUE)
  end_refit <- proc.time() - start_refit 
  expect_true(end_refit[3] < end[3])
  max_est_error1 <- max(pl_fit_one - B)
  max_est_error2 <- max(pl_fit_two - B)
  max_est_diff <- max(pl_fit_one - pl_fit_two)
  expect_true(max_est_error1 > max_est_error2)
  mean_est_error1 <- mean(sqrt((pl_fit_one - B)^2))
  mean_est_error2 <- mean(sqrt((pl_fit_two - B)^2))
  mean_est_diff <- mean(sqrt((pl_fit_one - pl_fit_two)^2))
  expect_true(mean_est_error1 > mean_est_error2)
  expect_true(mean_est_diff < 1e-3)
    
})
#
#
#

#
# test_that("ML fit to multiple regressors, large n, and excess-Poisson variance gives reasonable output", {
#   set.seed(4323)
#   n <- 800
#   X <- cbind(1,rep(c(0,1),each = n/2),rnorm(n),rnorm(n),rnorm(n))
#   z <- rnorm(n) +5
#   J <- 10
#   p <- 2
#
#   b0 <- rnorm(J)
#   b1 <- seq(1,10,length.out = J)
#   b <- rbind(b0,b1)
#   Y <- matrix(NA,ncol = J, nrow = n)
#
#   for(i in 1:n){
#     for(j in 1:J){
#       temp_mean <- exp(X[i,1:2,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
#       Y[i,j] <- rnbinom(1,mu= temp_mean,size = 0.25)#rpois(1, lambda = temp_mean)
#     }
#   }
#   ml_fit <- emuFit_micro(X,
#                          Y,
#                          constraint_fn = function(x) mean(x),
#                          maxit = 200,
#                          tolerance = 1e-2)
#
#   # plot(b1-mean(b1),ml_fit[2,])
#   # abline(a = 0,b = 1,lty =2,col = "red")
#
#   expect_true(max(abs(ml_fit[2,] - (b1 - mean(b1))))<.5)
# })
#
#
# test_that("ML fit to multiple regressors, moderate n, moderate J, and excess-Poisson variance gives reasonable output", {
#   set.seed(4323)
#   n <- 40
#   X <- cbind(1,rep(c(0,1),each = n/2),rnorm(n),rnorm(n),rnorm(n))
#   z <- rnorm(n)
#   J <- 100
#   p <- 2
#
#   b0 <- rnorm(J)
#   b1 <- seq(1,10,length.out = J)
#   b <- rbind(b0,b1)
#   Y <- matrix(NA,ncol = J, nrow = n)
#
#   for(i in 1:n){
#     for(j in 1:J){
#       temp_mean <- exp(X[i,1:2,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
#       Y[i,j] <- rnbinom(1,mu= temp_mean,size = 0.25)#rpois(1, lambda = temp_mean)
#     }
#   }
#   ml_fit <- emuFit_micro(X,
#                          Y,
#                          constraint_fn = function(x) mean(x),
#                          maxit = 200,
#                          tolerance = 1e-1,
#                          max_step = 0.5)
#
#   # plot(b1-mean(b1),ml_fit[2,])
#   # abline(a = 0,b = 1, lty = 2, col = "red")
#
#   b1_hat <- ml_fit[2,]
#   expect_true(abs(lm(b1_hat~b1)$coef[2] - 1) < 0.2)
#   # abline(a = 0,b = 1,lty =2,col = "red")
#
# })
#
# # t(ml_fit) %>% as.data.frame() %>% mutate(j = 1:J) %>%
# #   pivot_longer(-j) %>%
# #   ggplot() +
# #   geom_point(aes(x = j,y = value, color = name)) +
# #   geom_line(aes(x = j, y  = value, group = name, color = name)) +
# #   theme_bw()
#
#
#
# test_that("ML fit to simple example give reasonable output with J > n", {
#   set.seed(4323)
#   X <- cbind(1,rep(c(0,1),each = 20))
#   z <- rnorm(40) +8
#   b0 <- rnorm(80)
#   b1 <- seq(-5,5,length.out = 80)
#   b <- rbind(b0,b1)
#   Y <- matrix(NA,ncol = 80, nrow = 40)
#
#   for(i in 1:40){
#     for(j in 1:80){
#       temp_mean <- exp(X[i,,drop = FALSE]%*%b[,j,drop = FALSE] + z[i])
#       Y[i,j] <- rpois(1, lambda = temp_mean)
#     }
#   }
#   ml_fit <- emuFit_micro(X,
#                          Y,
#                          B = NULL,
#                          constraint_fn = function(x) mean(x),
#                          maxit = 500,
#                          tolerance = 0.01)
#
#
#
#
#
#   expect_true(max(abs(ml_fit[2,] - b1))<.1)
#
#   # X_cup <- X_cup_from_X(X,10)
#   # B_cup <- B_cup_from_B(ml_fit)
#
#
#
# })
#
#