# mkuhn, 2021-04-07
# testing the delay estimation,
# in particular parameter estimates, convergence etc. from the model fit object

test_that('Parameter extraction and transformation', {

  # test getDist for parameter names:
  # on transformed scale (for optimization)
  expect_identical(incubate:::getDist(distribution = "weibull", type = "param", twoPhase = FALSE, twoGroup = TRUE,
                                      bind = "shape1", profiled = FALSE, transformed = TRUE),
                   expected = c("shape1_tr", "delay1_tr.x", "scale1_tr.x", "delay1_tr.y", "scale1_tr.y"))
  expect_identical(incubate:::getDist(distribution = "weibull", type = "param", twoPhase = FALSE, twoGroup = TRUE,
                                      bind = "shape1", profiled = TRUE, transformed = TRUE),
                   expected = c("shape1_tr", "delay1_tr.x", "delay1_tr.y"))
  # original (=non-transformed) scale
  expect_identical(incubate:::getDist(distribution = "weibull", type = "param", twoPhase = FALSE, twoGroup = TRUE,
                                      bind = "shape1", profiled = FALSE, transformed = FALSE),
                   expected = c("shape1", "delay1.x", "scale1.x", "delay1.y", "scale1.y"))
  # getDist takes out profiled parameters even when for original (=non-transformed) scale
  #+ this is needed in delay_estimation (at extractParOptInd)
  expect_identical(incubate:::getDist(distribution = "weibull", type = "param", twoPhase = FALSE, twoGroup = TRUE,
                                      bind = "shape1", profiled = TRUE, transformed = FALSE),
                   expected = c("shape1", "delay1.x", "delay1.y"))

  #' Slow extract-routine based on R's name-matching capabilities as gold standard function.
  #'
  #' Extract certain parameters from a given named parameter vector. External, slow variant that parses parameter names.
  #'
  #' This routine handles parameter vectors for one or two groups. To this end, it parses the parameter names.
  #' Bound parameters (cf. `bind=`) are deduced from the naming: they lack the .x or .y suffix.
  #' it can also transform the given parameters from the standard form (as used in the delayed distribution functions)
  #' to the internal transformed parametrization as used by the optimization function, and vice versa.
  #'
  #' When parameters for a single group are requested the parameters are always returned in the canonical order of the distribution.
  #' Otherwise, the order of parameters is unchanged.
  #' It parses the parameter names to find out if it is twoPhase, twoGroup and, when `transform=TRUE`, which direction to transform.
  #'
  #' @param parV numeric named parameter vector (for single group or two groups)
  #' @param distribution character name. Exponential or Weibull
  #' @param twoPhase logical. Do we model two phases?
  #' @param group character. Extract only parameters for specified group. Recognizes 'x' and 'y'. Other values are ignored.
  #' @param isTransformed logical. Are the parameters transformed? If `NULL` (default) it is deduced from the parameter names.
  #' @param transform logical. Transform the parameters!?
  #' @return requested parameters as named numeric vector
  extractParsTest <- function(parV, distribution = c('exponential', 'weibull'), twoPhase = NULL, group = NULL, isTransformed = NULL, transform = FALSE) {

    stopifnot( is.numeric(parV), all(nzchar(names(parV))) )
    distribution <- match.arg(distribution)

    # contract: parV is canonically ordered (see below as well)
    # extractPars will not check if it needs first to reorder the parameters given by checking the names.
    pNames <- names(parV)

    # isTransformed when all parameter names contain '_tr'
    if (is.null(isTransformed)) {
      trNames <- grepl(pattern = "_tr", pNames, fixed = TRUE)
      stopifnot( sum(trNames) %in% c(0L, length(parV)) ) #all or nothing
      isTransformed <- all(trNames)
    }
    if (is.null(twoPhase)) {
      twoPhase <- any(grepl(pattern = "delay2", pNames, fixed = TRUE))
    }
    # parameter names of single group where we start from!
    oNames <- incubate:::getDist(distribution, type = "param", twoPhase = twoPhase, twoGroup = FALSE, transformed = isTransformed)

    # index of parameters that are *not* group-specific
    idx.nongrp <- which(!grepl(pattern = paste0("[.][xy]$"), pNames))
    #Is there any .x or .y parameter name?
    #+when two groups but all parameters bound then isTwoGroup is FALSE
    isTwoGroup <- length(idx.nongrp) < length(parV)


    # transform parameters if requested
    # if no transformation required, simply extract the relevant parameters
    if ( transform ){

      # transform parameter vector for a single group. Also transforms parameter names.
      # @param parV1: parameter vector for a single group
      # @return transformed parameter vector
      transformPars1Test <- function(parV1){
        # parameter transformation matrices
        PARAM_TRANSF_M <- switch(distribution,
                                 exponential = matrix(c( 1, 0, 0, 0,
                                                         0, 1, 0, 0,
                                                         -1, 0, 1, 0,
                                                         0, 0, 0, 1), nrow = 4L, byrow = TRUE,
                                                      dimnames = list(c("delay1_tr", "rate1_tr", "delay2_tr", "rate2_tr"))),
                                 weibull = matrix(c( 1, 0, 0, 0, 0, 0,
                                                     0, 1, 0, 0, 0, 0,
                                                     0, 0, 1, 0, 0, 0,
                                                     -1, 0, 0, 1, 0, 0,
                                                     0, 0, 0, 0, 1, 0,
                                                     0, 0, 0, 0, 0, 1), nrow = 6L, byrow = TRUE,
                                                  dimnames = list(paste0(c("delay1", "shape1", "scale1", "delay2", "shape2", "scale2"), "_tr"))),
                                 stop("Unknown distribution!", call. = FALSE)
        )
        PARAM_TRANSF_Minv <- switch(distribution,
                                    exponential = matrix(c(1, 0, 0, 0,
                                                           0, 1, 0, 0,
                                                           1, 0, 1, 0,
                                                           0, 0, 0, 1), nrow = 4L, byrow = TRUE,
                                                         dimnames = list(c("delay1", "rate1", "delay2", "rate2"))),
                                    weibull = matrix(c(1, 0, 0, 0, 0, 0,
                                                       0, 1, 0, 0, 0, 0,
                                                       0, 0, 1, 0, 0, 0,
                                                       1, 0, 0, 1, 0, 0,
                                                       0, 0, 0, 0, 1, 0,
                                                       0, 0, 0, 0, 0, 1), nrow = 6L, byrow = TRUE,
                                                     dimnames = list(c("delay1", "shape1", "scale1", "delay2", "shape2", "scale2"))),
                                    stop("Unknown distribution", call. = FALSE)
        )


        PARAM_TRANSF_F <- list(exponential = c(identity, log, log, log),
                               weibull = c(identity, log, #log1p, #identity, #=shape1
                                           log, log, log, log))[[distribution]]
        PARAM_TRANSF_Finv <- list(exponential = c(identity, exp, exp, exp),
                                  weibull = c(identity, exp, #expm1, #identity, #=shape1
                                              exp, exp, exp, exp))[[distribution]]

        stopifnot( length(parV1) <= NCOL(PARAM_TRANSF_M), NCOL(PARAM_TRANSF_M) == length(PARAM_TRANSF_F) )


        if (isTransformed) {
          # b = Ainv %*% Finv(b')
          rlang::set_names(
            as.numeric(PARAM_TRANSF_Minv[seq_along(parV1), seq_along(parV1)] %*%
                         as.numeric(.mapply(FUN = function(f, x) f(x),
                                            dots = list(PARAM_TRANSF_Finv[seq_along(parV1)], parV1),
                                            MoreArgs = NULL))),
            nm = rownames(PARAM_TRANSF_Minv)[seq_along(parV1)])
        } else {
          # b' = F(A %*% b)
          rlang::set_names(
            as.numeric(.mapply(FUN = function(f, x) f(x),
                               dots = list(PARAM_TRANSF_F[seq_along(parV1)],
                                           as.numeric(PARAM_TRANSF_M[seq_along(parV1), seq_along(parV1)] %*% parV1)),
                               MoreArgs = NULL)),
            nm = rownames(PARAM_TRANSF_M)[seq_along(parV1)])
        }
      }# fn transformPars1Test


      # update parameter vector according to transformation
      parV <- if (! isTwoGroup) {
        transformPars1Test(parV1 = parV)
      } else {
        # **twoGroup** case (effectively)
        # target parameter names
        pNames_trgt <- if (isTransformed) {
          sub(pattern = "_tr", replacement = "", pNames, fixed = TRUE) # remove '_tr' string
        } else {
          purrr::map_chr(.x = strsplit(pNames, split = ".", fixed = TRUE),
                         .f = function(s) if (length(s) == 1L) paste0(s, "_tr") else paste0(s[[1L]], "_tr.", s[[2L]]))
        }

        # list of transformed parameters, for both groups x and y
        h <- list(
          x = { idx.grp <- which(endsWith(x = pNames, suffix = ".x"))
          # drop group naming: last two letters
          pNms_grp <- substr(pNames[idx.grp], start = 1L, stop = nchar(pNames[idx.grp], type = "chars") - 2L)
          # apply transform on 1-group parameter vector in canonical order!
          transformPars1Test(parV1 = c(parV[idx.nongrp], rlang::set_names(parV[idx.grp], nm = pNms_grp))[intersect(oNames, c(pNames, pNms_grp))]) },
          y = { idx.grp <- which(endsWith(x = pNames, suffix = ".y"))
          # drop group naming: last two letters
          pNms_grp <- substr(pNames[idx.grp], start = 1L, stop = nchar(pNames[idx.grp], type = "chars") - 2L)
          # apply transform on 1-group parameter vector in canonical order!
          transformPars1Test(parV1 = c(parV[idx.nongrp], rlang::set_names(parV[idx.grp], nm = pNms_grp))[intersect(oNames, c(pNames, pNms_grp))]) }
        )

        # parV transformed for effective twoGroup-setting
        rlang::set_names(c(h[[1L]][pNames_trgt[idx.nongrp]],
                           h[[1L]][! names(h[[1L]]) %in% pNames_trgt[idx.nongrp]],
                           h[[2L]][! names(h[[2L]]) %in% pNames_trgt[idx.nongrp]]),
                         nm = pNames_trgt)
      }

      # reflect transformation in meta-data
      isTransformed <- ! isTransformed
      pNames <- names(parV)
      oNames <- incubate:::getDist(distribution, type = "param", twoPhase = twoPhase, twoGroup = FALSE, transformed = isTransformed)
    }# transform


    # return parameter vector
    if ( ! isTwoGroup || is.null(group) || length(group) != 1L || ! group %in% c('x', 'y') ) {
      parV
    } else {
      # single group extraction
      stopifnot( is.character(group), nzchar(group) )

      # extract all group parameters (=contain .x or .y at the end)
      #+and restore original name (=remove ".x" or ".y")
      # contract: parV is canonically ordered
      parV.gr <- parV[grepl(pattern = paste0(".", group), x = pNames, fixed = TRUE)]
      names(parV.gr) <- substr(names(parV.gr), start = 1L, stop = nchar(names(parV.gr))-2L)
      #parV.gr <- rlang::set_names(parV.gr, nm = setdiff(oNames, pNames[idx.nongrp]))

      # restore original order
      # contract: bind is intersected (only valid names, canonical order)
      c(parV.gr, parV[pNames[idx.nongrp]])[intersect(oNames, c(pNames, names(parV.gr)))]
    }
  }

  extPars_exp1 <- incubate:::objFunFactory(x = rexp_delayed(n=11, delay1 = 5, rate1 = .2), distribution = "expon", twoPhase = FALSE, profiled = FALSE) %>%
    rlang::fn_env() %>% rlang::env_get("extractPars")

  extPars_exp1P <- incubate:::objFunFactory(x = rexp_delayed(n=11, delay1 = 5, rate1 = .2), distribution = "expon", twoPhase = FALSE, profiled = TRUE) %>%
    rlang::fn_env() %>% rlang::env_get("extractPars")

  par_exp1 <- c(delay1 = 3, rate1 = .8)

  expect_identical(extPars_exp1(par_exp1, isOpt = FALSE, named = TRUE), par_exp1)
  expect_identical(extPars_exp1(par_exp1, isOpt = FALSE, named = FALSE), as.vector(par_exp1))
  expect_identical(extPars_exp1(par_exp1, isOpt = FALSE, group = "x", named = TRUE), par_exp1)
  expect_identical(extPars_exp1(par_exp1, isOpt = FALSE, group = "y", named = TRUE), par_exp1) # group= is ignored for single group!
  expect_identical(extPars_exp1(par_exp1, isOpt = FALSE, group = "z", named = TRUE), par_exp1) # group= is ignored for single group!
  expect_identical(extPars_exp1(par_exp1, isOpt = FALSE, transform = TRUE, named = TRUE), c(delay1_tr = par_exp1[[1L]], rate1_tr = log(par_exp1[[2L]])))
  expect_identical(extPars_exp1(par_exp1, isOpt = FALSE, transform = TRUE, named = FALSE), c(par_exp1[[1L]], log(par_exp1[[2L]])))

  # objective function with profiled=TRUE is different
  expect_identical(extPars_exp1P(par_exp1, isOpt = FALSE, named = TRUE), par_exp1)
  expect_identical(extPars_exp1P(par_exp1, isOpt = FALSE, transform = TRUE, named = TRUE), c(delay1_tr = par_exp1[[1L]]))
  expect_identical(extPars_exp1P(par_exp1, isOpt = FALSE, transform = TRUE, named = FALSE), par_exp1[[1L]])

  # my original (but slow) implementation
  expect_identical(extractParsTest(parV = par_exp1), par_exp1)
  expect_identical(extractParsTest(parV = par_exp1, group = "x"), par_exp1)
  expect_identical(extractParsTest(parV = par_exp1, group = "y"), par_exp1) # group= is ignored here
  expect_identical(extractParsTest(parV = par_exp1, transform = TRUE), c(delay1_tr = par_exp1[[1L]], rate1_tr = log(par_exp1[[2L]])))

  # exponential, two groups, unbound
  objFun_exp2 <- incubate:::objFunFactory(x = rexp_delayed(n=3, delay1 = 5, rate1 = .2), y = rexp_delayed(n=3, delay1 = 3, rate1 = .1), distribution = "expon", twoPhase = FALSE)
  extPars_exp2 <- rlang::env_get(rlang::fn_env(objFun_exp2), "extractPars")

  par_exp2 <- c(delay1.x = 2.8, rate1.x = .81, delay1.y = 5.1, rate1.y = 1.1)

  expect_identical(extPars_exp2(par_exp2, isOpt = FALSE, named = TRUE), par_exp2)
  expect_identical(extPars_exp2(par_exp2, isOpt = FALSE, named = TRUE, group = "z"), NULL) # strange groups gives NULL in two group setting
  expect_identical(extPars_exp2(par_exp2, isOpt = FALSE, named = TRUE, group = "x"), setNames(par_exp2[1:2], c("delay1", "rate1")))
  expect_identical(extPars_exp2(par_exp2, isOpt = FALSE, named = TRUE, group = "y"), setNames(par_exp2[3:4], c("delay1", "rate1")))

  # my original (but slow) implementation
  expect_identical(extractParsTest(parV = par_exp2), par_exp2)
  expect_identical(extractParsTest(parV = par_exp2, group = "z"), par_exp2) # strange groups are ignored with extractParsTest!
  expect_identical(extractParsTest(parV = par_exp2, group = "x"), setNames(par_exp2[1:2], c("delay1", "rate1")))
  expect_identical(extractParsTest(parV = par_exp2, group = "y"), setNames(par_exp2[3:4], c("delay1", "rate1")))

  # exponential, two groups, bound
  objFun_exp2b <- incubate:::objFunFactory(x = rexp_delayed(n=3, delay1 = 5, rate1 = .2), y = rexp_delayed(n=3, delay1 = 3, rate1 = .1),
                                           distribution = "expon", twoPhase = FALSE, bind = "rate1")
  extPars_exp2b <- rlang::env_get(rlang::fn_env(objFun_exp2b), "extractPars")

  par_exp2b <- c(rate1 = .23, delay1.x = 2.8, delay1.y = 5.1)
  expect_identical(extPars_exp2b(parV = par_exp2b, isOpt = FALSE, named = TRUE), par_exp2b)
  expect_identical(extPars_exp2b(parV = par_exp2b, isOpt = FALSE, named = TRUE, group = "x"), setNames(par_exp2b[c(2, 1)], c("delay1", "rate1")))
  expect_identical(extPars_exp2b(parV = par_exp2b, isOpt = FALSE, named = TRUE, group = "y"), setNames(par_exp2b[c(3, 1)], c("delay1", "rate1")))
  expect_identical(extPars_exp2b(parV = par_exp2b, isOpt = FALSE, named = TRUE, transform = TRUE),
                   c(rate1_tr = log(par_exp2b[["rate1"]]), delay1_tr.x = par_exp2b[[2]], delay1_tr.y = par_exp2b[[3]]))
  expect_identical(extPars_exp2b(parV = par_exp2b, isOpt = FALSE, named = TRUE, group = "x", transform = TRUE),
                   c(delay1_tr = par_exp2b[[2]], rate1_tr = log(par_exp2b[[1]])))
  # idem-potent (round-trip)
  expect_identical(extPars_exp2b(extPars_exp2b(par_exp2b, isOpt = FALSE, transform = TRUE),
                                 isOpt = TRUE, named = TRUE, transform = TRUE), par_exp2b)

  # my original (but slow) implementation
  expect_identical(extractParsTest(parV = par_exp2b), par_exp2b)
  expect_identical(extractParsTest(parV = par_exp2b, group = "x"), setNames(par_exp2b[c(2, 1)], c("delay1", "rate1")))
  expect_identical(extractParsTest(parV = par_exp2b, group = "y"), setNames(par_exp2b[c(3, 1)], c("delay1", "rate1")))
  expect_identical(extractParsTest(parV = par_exp2b, transform = TRUE),
                   c(rate1_tr = log(par_exp2b[["rate1"]]), delay1_tr.x = par_exp2b[[2]], delay1_tr.y = par_exp2b[[3]]))
  expect_identical(extractParsTest(parV = par_exp2b, group = "x", transform = TRUE),
                   c(delay1_tr = par_exp2b[[2]], rate1_tr = log(par_exp2b[[1]])))
  # idem-potent (round-trip)
  expect_identical(extractParsTest(extractParsTest(parV = par_exp2b, transform = TRUE), transform = TRUE), par_exp2b)


  # weibull
  objFun_weib1 <- incubate:::objFunFactory(x = rweib_delayed(n=3, delay1 = 5, shape1 = 1.2), distribution = "weib", twoPhase = FALSE)
  extPars_weib1 <- rlang::env_get(rlang::fn_env(objFun_weib1), "extractPars")

  par_weib1 <- c(delay1 = 5, shape1 = .8, scale1 = 1.2)
  expect_identical(extPars_weib1(par_weib1, isOpt = FALSE, named = TRUE), par_weib1)
  expect_identical(extractParsTest(par_weib1, distribution = "weibull"), par_weib1)

  # extractParsTest specific: incomplete parameter vector
  par_weib1s <- c(delay1 = 5, shape1 = .8)
  expect_identical(extPars_weib1(par_weib1s, isOpt = FALSE, named = TRUE), c(par_weib1s, scale1=NA)) #missing parameter gets named NA
  expect_identical(extractParsTest(par_weib1s, distribution = "weibull"), par_weib1s)
  expect_identical(extractParsTest(par_weib1s, distribution = "weibull", transform = TRUE),
                   c(delay1_tr = par_weib1s[["delay1"]], shape1_tr = log(par_weib1s[["shape1"]])))

  # weibull two groups
  objFun_weib2 <- incubate:::objFunFactory(x = rweib_delayed(n=3, delay1 = 5, shape1 = 1.2), y = rweib_delayed(n=4, delay1=3, shape1 = 2.8, scale1 = .9),
                                           distribution = "weib", twoPhase = FALSE)
  extPars_weib2 <- rlang::env_get(rlang::fn_env(objFun_weib2), "extractPars")

  par_weib2 <- c(delay1.x = 1, shape1.x = 1.8, scale1.x = 34, delay1.y = 4.2, shape1.y = 3.4, scale1.y = 12)
  par_weib2.y <- setNames(par_weib2[-c(1:3)], nm = c("delay1", "shape1", "scale1"))
  par_weib2.y.tr <- setNames(c(par_weib2.y[1], log(par_weib2.y[-1])), nm = paste0(names(par_weib2.y), "_tr"))

  expect_identical(extPars_weib2(par_weib2, isOpt = FALSE, named = TRUE), par_weib2)
  expect_identical(extPars_weib2(par_weib2, isOpt = FALSE, named = TRUE, group = "y"), par_weib2.y)
  expect_identical(extPars_weib2(par_weib2, isOpt = FALSE, named = TRUE, group = "y", transform = TRUE), par_weib2.y.tr)

  expect_identical(extractParsTest(par_weib2, distribution = "weibull"), par_weib2)
  expect_identical(extractParsTest(par_weib2, distribution = "weibull", group = "y"), par_weib2.y)
  expect_identical(extractParsTest(par_weib2, distribution = "weibull", group = "y", transform = TRUE), par_weib2.y.tr)

  # extractParsTest specific: incomplete parameter vector
  par_weib2s <- par_weib2[-c(3, 6)] # drop scale
  expect_identical(extractParsTest(par_weib2s, distribution = "weibull", group = "y"),
                   setNames(par_weib2s[c(3,4)], nm = c("delay1", "shape1")))

  expect_identical(extractParsTest(par_weib2s, distribution = "weibull", group = "y", transform = TRUE),
                   setNames(c(par_weib2s[3], log(par_weib2s[4])), nm = c("delay1_tr", "shape1_tr")))

})


test_that("Fit delayed Exponentials", {

  # single group ------------------------------------------------------------

  # from a call to rexp_delayed(16, delay = 9, rate = 0.5)
  exp_d9 <- c(9.3758422, 9.436878, 9.440794, 9.63324, 9.6421,
          9.76594, 9.80527, 9.907327, 10.357, 10.371,
          10.596, 10.623, 11.1074, 11.575, 11.849, 16.38)

  fd_exp <- delay_model(exp_d9, distribution = "expon", profiled = FALSE)
  coef_exp <- coef(fd_exp)

  expect_type(fd_exp$data, type = 'double')
  expect_identical(length(fd_exp$data), expected = 16L)
  expect_identical(fd_exp$method, expected = "MPSE")
  expect_named(coef(fd_exp), expected = c('delay1', 'rate1'))
  expect_named(fd_exp$optimizer, expected = c('parOpt', "valOpt", 'profiled', "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  # optim converges properly for this data vector!
  # * convergence=51 is warning from L-BFGS-B
  # * convergence=52 is error from L-BFGS-B
  expect_identical(purrr::chuck(fd_exp, "optimizer", "valOpt"), fd_exp[["criterion"]]) # same for MSPE
  expect_identical(purrr::chuck(fd_exp, 'optimizer', 'convergence'), expected = 0L)
  expect_type(purrr::chuck(fd_exp, 'optimizer', 'optim_args'), type = 'list')

  expect_equal(coef_exp[["delay1"]], expected = 9, tolerance = .04)
  expect_equal(coef_exp[["rate1"]], expected = 0.5, tolerance = .37)

  # update does not change structure
  fd_exp_oa <- purrr::pluck(fd_exp, 'optimizer', 'optim_args')
  expect_identical(update(fd_exp, optim_args = fd_exp_oa), expected = fd_exp)
  # effect of worse start values
  fd_exp_updW <- update(fd_exp, optim_args = purrr::assign_in(fd_exp_oa, 'par', c(1, .1)))
  # we need more iterations during optimization
  expect_gt(min(fd_exp_updW$optimizer$counts), expected = min(fd_exp$optimizer$counts))
  # objective function does not reach a better (=lower) value when starting with worse start value
  # (add a little safety margin as buffer)
  expect_gte(fd_exp_updW$criterion + 1e-05, expected = fd_exp$criterion)

  # MPSE fit with profiled=TRUE
  fd_exp_P <- delay_model(exp_d9, distribution = "expon", profiled = TRUE)
  expect_equal(coef(fd_exp_P), coef_exp, tolerance = .01) # similar coefficients
  expect_named(fd_exp_P$optimizer$parOpt, expected = "delay1_tr")

  # another data set
  exp_d5 <- c(5.05133760899019, 5.21641483083995, 5.4131497041096, 5.62188922194764,
              5.75496241915971, 5.98260715969298, 6.18675520061515, 7.41160508326157,
              9.36626494375473, 10.5935673083787, 10.8441290040006)
  fd_exp1_mpseNP <- delay_model(exp_d5, method = "MPSE", profiled = FALSE)
  fd_exp1_mpseP  <- delay_model(exp_d5, method = "MPSE", profiled = TRUE)
  expect_identical(fd_exp1_mpseNP$optimizer$convergence, expected = 0L)
  expect_identical(fd_exp1_mpseP$optimizer$convergence, expected = 0L)
  expect_named(fd_exp1_mpseNP$optimizer$parOpt, expected = c("delay1_tr", "rate1_tr"))
  expect_named(fd_exp1_mpseP$optimizer$parOpt, expected = "delay1_tr")
  expect_equal(coef(fd_exp1_mpseP), expected = coef(fd_exp1_mpseNP), tolerance = .01)


  # MLE fits -----------------------------------------------------------

  # MLEn = naive MLE
  fd_exp_MLEn_NP <- delay_model(exp_d9, distribution = 'expon', method = 'MLEn')
  fd_exp_MLEn_P <- delay_model(exp_d9, distribution = 'expon', method = 'MLEn', profiled = TRUE)

  expect_type(fd_exp_MLEn_NP$data, type = 'double')
  expect_identical(length(fd_exp_MLEn_NP$data), expected = length(exp_d9))
  expect_identical(fd_exp_MLEn_NP$method, expected = "MLEn")
  expect_identical(length(coef(fd_exp_MLEn_NP)), expected = 2L)
  expect_named(coef(fd_exp_MLEn_NP), expected = c("delay1", "rate1"))
  expect_named(fd_exp_MLEn_NP$optimizer$parOpt, expected = c("delay1_tr", "rate1_tr"))

  # MLEn uses analytical solution when in single group mode
  expect_named(fd_exp_MLEn_NP$optimizer, expected = c("parOpt", "valOpt", 'profiled', "methodOpt", "convergence", "message", "counts")) ##analytic with no "optim_args"))
  expect_identical(purrr::chuck(fd_exp_MLEn_NP, 'optimizer', 'methodOpt'), expected = "analytic")
  expect_identical(purrr::chuck(fd_exp_MLEn_NP, 'optimizer', 'convergence'), expected = 0L)
  local({
    fd_exp_MLEn_NPopt <- attr(fd_exp_MLEn_NP$objFun, which = 'opt', exact = TRUE)
    expect_type(fd_exp_MLEn_NPopt, type = 'list')
    expect_named(fd_exp_MLEn_NPopt, expected = c("par_orig", 'par', 'value', "methodOpt", 'convergence', 'message', 'counts'))
  })
  # check objective function
  # objective function is for transformed parameters:
  purrr::walk2(.x = runif(n=7, min=0.1, max=9),   #delay1
               .y = runif(n=7, min=0.001, max=2), #rate1
               .f = ~ expect_equal(- length(exp_d9) * (log(.y) - .y * (mean(exp_d9) - .x)),
                                   expected = fd_exp_MLEn_NP$objFun(pars = c(delay1=.x, rate1=.y), criterion = TRUE))
  )

  expect_type(fd_exp_MLEn_P$data, type = 'double')
  expect_identical(length(fd_exp_MLEn_P$data), expected = length(exp_d9))
  expect_identical(fd_exp_MLEn_P$method, expected = "MLEn")
  expect_true(fd_exp_MLEn_P$optimizer$profiled)
  expect_identical(length(coef(fd_exp_MLEn_P)), expected = 2L)
  expect_named(coef(fd_exp_MLEn_P), expected = c("delay1", "rate1"))
  expect_named(fd_exp_MLEn_P$optimizer$parOpt, expected = "delay1_tr")


  # delay1 estimate = min(x)
  expect_identical(coef(fd_exp_MLEn_NP)[['delay1']], expected = exp_d9[[1L]])
  # MLEn's later delay is compensated for by higher estimated rate
  expect_true(all(coef(fd_exp_MLEn_NP) >  coef_exp))
  expect_equal(coef(fd_exp_MLEn_NP)[['rate1']], expected = (mean(exp_d9) - min(exp_d9))**-1)


  # MLEc
  fd_exp_MLEc_NP <- delay_model(exp_d9, distribution = 'expon', method = 'MLEc')
  fd_exp_MLEc_P <- delay_model(exp_d9, distribution = 'expon', method = 'MLEc', profiled = TRUE)

  expect_type(fd_exp_MLEc_NP$data, type = 'double')
  expect_identical(length(fd_exp_MLEc_NP$data), expected = length(exp_d9))
  expect_identical(length(coef(fd_exp_MLEc_NP)), expected = 2L)
  expect_named(coef(fd_exp_MLEc_NP), expected = c("delay1", "rate1"))
  expect_named(fd_exp_MLEc_NP$optimizer$parOpt, expected = c("delay1_tr", "rate1_tr"))

  expect_named(fd_exp_MLEc_NP$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", "convergence", "message", "counts", "optim_args"))
  expect_identical(purrr::chuck(fd_exp_MLEc_NP, 'optimizer', 'convergence'), expected = 0L)
  # no exact solution for MLEc
  expect_null(attr(fd_exp_MLEc_NP$objFun, which = 'opt', exact = TRUE))

  expect_type(fd_exp_MLEc_P$data, type = 'double')
  expect_identical(length(fd_exp_MLEc_P$data), expected = length(exp_d9))
  expect_identical(length(coef(fd_exp_MLEc_P)), expected = 2L)
  expect_named(coef(fd_exp_MLEc_P), expected = c("delay1", "rate1"))
  expect_named(fd_exp_MLEc_P$optimizer$parOpt, expected = "delay1_tr")
  # profiled variant is an easier optimization
  expect_true(all(fd_exp_MLEc_P$optimizer$counts < fd_exp_MLEc_NP$optimizer$counts))
  # quite similar coefficients
  expect_equal(coef(fd_exp_MLEc_P), expected = coef(fd_exp_MLEc_NP), tolerance = .01)

  # MLEw
  fd_exp_MLEw <- delay_model(exp_d9, distribution = "expon", method = "MLEw")
  expect_identical(fd_exp_MLEw$method, expected = "MLEw")
  expect_true(fd_exp_MLEw$optimizer$profiled)
  expect_type(fd_exp_MLEw$data, type = "double")
  expect_identical(length(fd_exp_MLEw$data), expected = length(exp_d9))
  expect_identical(fd_exp_MLEw$optimizer$convergence, expected = 0L)
  expect_named(coef(fd_exp_MLEw), expected = c("delay1", "rate1"))
  expect_named(fd_exp_MLEw$optimizer$parOpt, expected = "delay1_tr")
  expect_equal(coef(fd_exp_MLEw), coef(fd_exp_MLEc_P), tolerance = .1) # similar coefficients


  # 2nd group ---------------------------------------------------------------

  set.seed(20220429)
  exp_d10 <- rexp_delayed(27L, delay1 = 10, rate1 = .9)

  fd_exp2 <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon")
  coef_exp2 <- coef(fd_exp2)

  expect_type(fd_exp2$data, type = 'list')
  # data gets sorted
  expect_identical(fd_exp2$data$x, sort(exp_d9))
  expect_identical(fd_exp2$data$y, sort(exp_d10))
  expect_named(fd_exp2$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_identical(purrr::chuck(fd_exp2, 'optimizer', 'convergence'), expected = 0L)
  expect_type(purrr::chuck(fd_exp2, 'optimizer', 'optim_args'), type = 'list')
  # coefficient do not change much when adding a 2nd independent group and no binding
  expect_equal(as.numeric(coef_exp2[1:2]), expected = as.numeric(coef_exp), tolerance = .01)

  # no delay in 2nd group
  set.seed(20221124)
  exp_d0 <- rexp_delayed(13L, delay1=0, rate1=1.1)
  fd_exp2nd <- delay_model(x = exp_d9, y = exp_d0, distribution = "expon") #nd = no delay
  coef_exp2nd <- coef(fd_exp2nd)

  expect_identical(purrr::chuck(fd_exp2nd, "optimizer", "convergence"), expected = 0L)
  expect_equal(coef_exp2nd[[3]], 0) # no delay in 3rd group

  # MLE fits -----
  fd_exp2_MLEn_NP <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", method = "MLEn")
  coef_exp2_MLEn_NP <- coef(fd_exp2_MLEn_NP)
  expect_type(fd_exp2_MLEn_NP$data, type = "list")
  expect_identical(fd_exp2_MLEn_NP$data$y, sort(exp_d10))
  expect_named(fd_exp2_MLEn_NP$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_identical(purrr::chuck(fd_exp2_MLEn_NP, 'optimizer', 'convergence'), expected = 0L) # converged
  expect_type(purrr::chuck(fd_exp2_MLEn_NP, 'optimizer', 'optim_args'), type = 'list')
  # coefficient do not change much when adding a 2nd independent group and no binding
  expect_equal(as.numeric(coef_exp2_MLEn_NP[1:2]), expected = as.numeric(coef(fd_exp_MLEn_NP)), tolerance = .01)
  # MLEn fits have larger (=later) delay and (to recover a higher rate)
  purrr::walk(.x = seq_along(coef(fd_exp2)), .f = ~ expect_gte(coef(fd_exp2_MLEn_NP)[.x], coef(fd_exp2)[.x]))

  fd_exp2_MLEn_P <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", method = "MLEn", profiled = TRUE)
  coef_exp2_MLEn_P <- coef(fd_exp2_MLEn_P)
  expect_type(fd_exp2_MLEn_P$data, type = "list")
  expect_identical(fd_exp2_MLEn_P$data$y, sort(exp_d10))
  expect_named(fd_exp2_MLEn_P$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_identical(purrr::chuck(fd_exp2_MLEn_P, 'optimizer', 'convergence'), expected = 0L) # converged
  expect_type(purrr::chuck(fd_exp2_MLEn_P, 'optimizer', 'optim_args'), type = 'list')
  expect_equal(coef_exp2_MLEn_P, expected = coef_exp2_MLEn_NP, tolerance = .01) # parameters are quite similar

  fd_exp2_MLEc_NP <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", method = "MLEc")
  expect_type(fd_exp2_MLEc_NP$data, type = "list")
  expect_identical(fd_exp2_MLEc_NP$data$y, sort(exp_d10))
  expect_named(fd_exp2_MLEc_NP$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_identical(purrr::chuck(fd_exp2_MLEc_NP, 'optimizer', 'convergence'), expected = 0L) # converged
  expect_type(purrr::chuck(fd_exp2_MLEc_NP, 'optimizer', 'optim_args'), type = 'list')
  # coefficient do not change much when adding a 2nd independent group and no binding
  expect_equal(as.numeric(coef(fd_exp2_MLEc_NP)[1:2]), expected = as.numeric(coef(fd_exp_MLEc_NP)), tolerance = .01)

  fd_exp2_MLEc_P <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", method = "MLEc", profiled = TRUE)
  expect_type(fd_exp2_MLEc_P$data, type = "list")
  expect_identical(fd_exp2_MLEc_P$data$y, sort(exp_d10))
  expect_named(fd_exp2_MLEc_P$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_identical(purrr::chuck(fd_exp2_MLEc_P, 'optimizer', 'convergence'), expected = 0L) # converged
  expect_type(purrr::chuck(fd_exp2_MLEc_P, 'optimizer', 'optim_args'), type = 'list')
  expect_equal(coef(fd_exp2_MLEc_P), expected = coef(fd_exp2_MLEc_NP), tolerance = .01)


  # bind delay
  fd_exp2b_NP <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = "delay1")
  coef_exp2b_NP <- coef(fd_exp2b_NP)

  expect_named(coef_exp2b_NP, expected = c("delay1", "rate1.x", "rate1.y"))
  expect_named(fd_exp2b_NP$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_identical(purrr::chuck(fd_exp2b_NP, 'optimizer', 'convergence'), expected = 0L)
  # the bound delay is near the minimum of the two delay estimates from the individual group fits
  expect_equal(coef_exp2b_NP[[1L]],
               expected = min(coef_exp2[grepl(pattern = "delay1", names(coef_exp2), fixed = TRUE)]),
               tolerance = .01)

  fd_exp2b_P <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = "delay1", profiled = TRUE)
  coef_exp2b_P <- coef(fd_exp2b_P)
  expect_named(coef_exp2b_P, expected = c("delay1", "rate1.x", "rate1.y"))
  expect_identical(purrr::chuck(fd_exp2b_P, 'optimizer', 'convergence'), expected = 0L)
  expect_equal(coef_exp2b_P, expected = coef_exp2b_NP, tolerance = .05) # profiled=T/F: parameters are similar

  # bind delay with MLEn
  fd_exp2b_MLEn_NP <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = "delay1", method = "MLEn")
  coef_exp2b_MLEn_NP <- coef(fd_exp2b_MLEn_NP)

  expect_named(coef_exp2b_MLEn_NP, expected = c("delay1", "rate1.x", "rate1.y"))
  expect_named(fd_exp2b_MLEn_NP$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_identical(purrr::chuck(fd_exp2b_MLEn_NP, 'optimizer', 'convergence'), expected = 0L)
  # the bound delay is near the minimum of the two delay estimates from the individual group fits
  expect_equal(coef_exp2b_MLEn_NP[[1L]],
               expected = min(coef(fd_exp2_MLEn_NP)[grepl(pattern = "delay1", names(coef(fd_exp2_MLEn_NP)), fixed = TRUE)]),
               tolerance = .01)

  fd_exp2b_MLEn_P <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = "delay1", method = "MLEn", profiled = TRUE)
  coef_exp2b_MLEn_P <- coef(fd_exp2b_MLEn_P)
  expect_named(coef_exp2b_MLEn_P, expected = c("delay1", "rate1.x", "rate1.y"))
  expect_named(fd_exp2b_MLEn_P$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_identical(purrr::chuck(fd_exp2b_MLEn_P, 'optimizer', 'convergence'), expected = 0L)
  expect_equal(coef_exp2b_MLEn_P, expected = coef_exp2b_MLEn_NP, tolerance = 1e-5) # profiled=T/F: similar coefficients

  # bind delay with MLEc
  fd_exp2b_MLEc_NP <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = "delay1", method = "MLEc")
  coef_exp2b_MLEc_NP <- coef(fd_exp2b_MLEc_NP)

  expect_named(coef_exp2b_MLEc_NP, expected = c("delay1", "rate1.x", "rate1.y"))
  expect_named(fd_exp2b_MLEc_NP$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_identical(purrr::chuck(fd_exp2b_MLEc_NP, 'optimizer', 'convergence'), expected = 0L)
  # the bound delay is near the minimum of the two delay estimates from the individual group fits
  expect_equal(coef_exp2b_MLEc_NP[[1L]],
               expected = min(coef(fd_exp2_MLEc_NP)[grepl(pattern = "delay1", names(coef(fd_exp2_MLEc_NP)), fixed = TRUE)]),
               tolerance = .01)

  fd_exp2b_MLEc_P <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = "delay1", method = "MLEc", profiled = TRUE)
  coef_exp2b_MLEc_P <- coef(fd_exp2b_MLEc_P)

  expect_named(coef_exp2b_MLEc_P, expected = c("delay1", "rate1.x", "rate1.y"))
  expect_named(fd_exp2b_MLEc_P$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_identical(purrr::chuck(fd_exp2b_MLEc_P, 'optimizer', 'convergence'), expected = 0L)
  expect_equal(coef_exp2b_MLEc_P, expected = coef_exp2b_MLEc_NP, tolerance = .005) # profiled=T/F: similar coefficients


  # bind delay + rate
  fd_exp2c_NP <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = c("delay1", "rate1"))
  coef_exp2c_NP <- coef(fd_exp2c_NP)
  expect_named(coef_exp2c_NP, expected = c("delay1", "rate1"))

  # bind: order of parameters does not matter
  fd_exp2c_NP2 <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = c("rate1", "delay1"))
  expect_identical(coef(fd_exp2c_NP2), expected = coef(fd_exp2c_NP))
  expect_identical(fd_exp2c_NP2$optimizer$convergence, expected = 0L)
  expect_identical(fd_exp2c_NP2$bind, fd_exp2c_NP$bind)

  # profiling and full bind throws a warning:
  #+if rate1 is to be profiled out but later inferred from the delay1-estimate and the observations per group it will not be the same
  fd_exp2c_P <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = c("delay1", "rate1"), profiled = TRUE)
  coef_exp2c_P <- coef(fd_exp2c_P)
  expect_identical(fd_exp2c_P$optimizer$convergence, expected = 0L)
  expect_equal(coef_exp2c_P, coef_exp2c_NP, tolerance = .03) # parameters are similar (but because of bind= & profiled rate1 is simply averaged)

  # data combined
  fd_exp2comb_NP <- delay_model(x = c(exp_d9, exp_d10), distribution = "expon")
  coef_exp2comb_NP <- coef(fd_exp2comb_NP)

  fd_exp2comb_P <- delay_model(x = c(exp_d9, exp_d10), distribution = "expon", profiled = TRUE)
  coef_exp2comb_P <- coef(fd_exp2comb_P)

  expect_named(coef_exp2comb_NP, expected = c("delay1", "rate1"))
  expect_identical(length(coef_exp2comb_NP), length(coef_exp2c_NP))
  # similar coefficients (but the criterion is not identical, weighted mean for both groups vs overall mean)
  expect_equal(coef_exp2c_NP[[1L]], coef_exp2comb_NP[[1L]], tolerance = .01)
  expect_equal(coef_exp2c_NP[[2L]], coef_exp2comb_NP[[2L]], tolerance = .04)

  expect_named(coef_exp2comb_P, expected = c("delay1", "rate1"))
  expect_equal(coef_exp2comb_P, coef_exp2comb_NP, tolerance = .01)

  # bind delay + rate for MLE
  fd_exp2c_MLEn <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = c("delay1", "rate1"), method = "MLEn")
  coef_exp2c_MLEn <- coef(fd_exp2c_MLEn)
  fd_exp2comb_MLEn <- delay_model(x = c(exp_d9, exp_d10), distribution = "expon", method = "MLEn")
  coef_exp2comb_MLEn <- coef(fd_exp2comb_MLEn)

  expect_named(coef_exp2c_MLEn, expected = c("delay1","rate1"))
  expect_identical(length(coef_exp2comb_MLEn), length(coef_exp2c_MLEn))
  purrr::walk(seq_along(coef_exp2c_MLEn), ~expect_equal(coef_exp2c_MLEn[[.x]], coef_exp2comb_MLEn[[.x]], tolerance = .05))

  # MLEc
  fd_exp2c_MLEc <- delay_model(x = exp_d9, y = exp_d10, distribution = "expon", bind = c("delay1", "rate1"), method = "MLEc")
  coef_exp2c_MLEc <- coef(fd_exp2c_MLEc)
  fd_exp2comb_MLEc <- delay_model(x = c(exp_d9, exp_d10), distribution = "expon", method = "MLEc")
  coef_exp2comb_MLEc <- coef(fd_exp2comb_MLEc)

  expect_named(coef_exp2c_MLEc, expected = c("delay1","rate1"))
  expect_identical(length(coef_exp2comb_MLEc), length(coef_exp2c_MLEc))
  purrr::walk(seq_along(coef_exp2c_MLEc), ~expect_equal(coef_exp2c_MLEc[[.x]], coef_exp2comb_MLEc[[.x]], tolerance = .05))

})


test_that("Fit delayed Weibull", {

  # single group ----

  # susquehanna is an example dataset within incubate
  fd_maxFl <- delay_model(susquehanna, distribution = "weib")
  coef_maxFl <- coef(fd_maxFl)

  expect_identical(purrr::chuck(fd_maxFl, 'optimizer', 'convergence'), expected = 0L)
  expect_lte(fd_maxFl$criterion, expected = 3.0987)
  expect_equal(coef_maxFl, expected = c(delay1=0.244, shape1=1.310, scale1=.202), tolerance = .005)

  # MLE-based fits to susquehanna --
  fd_maxFl_MLEn_NP <- delay_model(susquehanna, distribution = "weib", method = "MLEn")
  coef_maxFl_MLEn <- coef(fd_maxFl_MLEn_NP)
  expect_false(fd_maxFl_MLEn_NP$optimizer$profiled)
  expect_identical(purrr::chuck(fd_maxFl_MLEn_NP, "optimizer", "convergence"), expected = 0L)
  expect_named(fd_maxFl_MLEn_NP$optimizer, expected = c("parOpt", "valOpt", "profiled", "methodOpt", 'convergence', 'message', 'counts', 'optim_args'))
  expect_gte(coef_maxFl_MLEn[["delay1"]], coef_maxFl[["delay1"]])
  expect_lte(coef_maxFl_MLEn[["scale1"]], coef_maxFl[["scale1"]])

  # check the objective function
  purrr::pwalk(.l = list(delay1=runif(7, max=0.25),
                         shape1=runif(7, max=3),
                         scale1=runif(7, max=5)),
               .f = ~ {
                 xc <- susquehanna - ..1
                 expect_equal(fd_maxFl_MLEn_NP$objFun(c(delay1=..1, shape1=..2, scale1=..3), criterion = TRUE),
                              expected = -length(susquehanna) * (log(..2) - ..2 * log(..3) + (..2 - 1L) * mean(log(xc)) - mean(xc**..2)/..3**..2))
               })

  fd_maxFl_MLEn_P <- delay_model(susquehanna, distribution = "weibu", method = "MLEn", profiled = TRUE)
  expect_true(fd_maxFl_MLEn_P$optimizer$profiled)
  expect_identical(fd_maxFl_MLEn_P$optimizer$convergence, expected = 0L)
  expect_named(coef(fd_maxFl_MLEn_P), expected = c("delay1", "shape1", "scale1"))
  expect_equal(coef(fd_maxFl_MLEn_P), expected = coef_maxFl_MLEn, tolerance = .001)
  expect_equal(fd_maxFl_MLEn_P$objFun(pars = fd_maxFl_MLEn_P$par, criterion = TRUE), fd_maxFl_MLEn_NP$criterion, tolerance = .001)

  # MLEn profiling by hand, using the indirect criterion of min(f')
  llProfObjFun_ind <- function(theta){
    stopifnot( is.numeric(theta), length(theta) == 2L )
    a <- theta[[1L]]
    k <- theta[[2L]]
    (1/k + mean(log(susquehanna-a)) - sum(log(susquehanna - a) * (susquehanna - a)**k)/sum((susquehanna-a)**k))^2 +
      # first factor inverse of harmonic mean of (susquehanna - a)
      (mean(1/(susquehanna-a)) * sum((susquehanna-a)**k)/sum((susquehanna-a)**(k-1)) - k/(k-1))^2
  }

  # the indirect objective function variant (using min(f')) is indeed close to 0 at the fit for the coefficients
  expect_equal(llProfObjFun_ind(coef(fd_maxFl_MLEn_P)[1:2]), expected = 0, tolerance = .01)

  # manual optimization, using the indirect objective function
  opt_maxFl_MLEn_Pman <- optim(par = c(a=0.255, k=1.8), #c(a=0.165, k=exp(1.3847)), #c(a=0.25, k=1.5),
                               fn = llProfObjFun_ind, method = "L-BFGS-B",
                      lower = c(0, 1 + 1.49e-8), upper = c(min(susquehanna)-1.49e-8, +Inf))
  coef_maxFl_MLEnp_man <- purrr::set_names(c(opt_maxFl_MLEn_Pman$par, mean((susquehanna-opt_maxFl_MLEn_Pman$par["a"])^opt_maxFl_MLEn_Pman$par["k"])^(1/opt_maxFl_MLEn_Pman$par["k"])),
                                  nm = c("delay1", "shape1", "scale1"))
  # estimates are only **roughly** equal
  expect_equal(coef_maxFl_MLEnp_man, coef(fd_maxFl_MLEn_P), tolerance = .25)

  fd_maxFl_MLEw <- delay_model(x = susquehanna, distribution = "weib", method = "MLEw")
  expect_identical(fd_maxFl_MLEw$optimizer$convergence, expected = 0L)
  expect_named(coef(fd_maxFl_MLEw), expected = c("delay1", "shape1", "scale1"))
  expect_gte(fd_maxFl_MLEw$criterion, fd_maxFl_MLEn_NP$criterion) #MLEn directly optimizes the criterion

  #llProfObjFun(coef_maxFl_MLEnp_man[1:2])
  #llProfObjFun(coef(fd_maxFl_MLEnp)[1:2])

  # # visualize the optimization function landscape
  # objFunLS_maxFl_MLEnp <- tidyr::expand_grid(a = seq.int(0, .26, length.out = 17),
  #                                            k = seq.int(1.3, 3, length.out = 17)) %>%
  #   #head %>%
  #   rowwise() %>%
  #   dplyr::mutate(ll = llProfObjFun(theta = c(a,k))) %>%
  #   ungroup()
  #
  # ggplot(objFunLS_maxFl_MLEnp, aes(x = a, y = k, z = log(ll+.1), colour = after_stat(level))) +
  #   geom_contour() +
  #   scale_colour_distiller(palette = "YlGn", direction = -1)


  # pollution is an example dataset within incubate
  fd_poll <- delay_model(pollution, distribution = "weib")
  objFun_poll <- fd_poll$objFun
  coef_poll <- coef(fd_poll)

  expect_identical(purrr::chuck(fd_poll, 'optimizer', 'convergence'), expected = 0L)
  expect_identical(length(coef_poll), expected = 3L)
  expect_lt(objFun_poll(coef_poll, criterion = TRUE), expected = 3.75)
  expect_equal(coef_poll, expected = c(delay1=1085, shape1=0.95, scale1=6562), tolerance = .001)

  # MLE-based fits for pollution
  fd_poll_MLEn <- delay_model(pollution, distribution = "weibu", method = "MLEn", profile = FALSE)
  coef_poll_MLEn <- coef(fd_poll_MLEn)
  expect_identical(fd_poll_MLEn$optimizer$convergence, expected = 0L)
  expect_named(coef_poll_MLEn, expected = c("delay1", "shape1", "scale1"))
  # naive MLE gives later delay estimates than MPSE, close to minimal observation
  expect_gt(coef_poll_MLEn[["delay1"]], coef_poll[["delay1"]])
  expect_equal(coef_poll_MLEn[["delay1"]], expected = min(pollution), tolerance = 1e-4)
  # MLEn allows for shape estimates k below 1
  expect_lt(coef_poll_MLEn[["shape1"]], expected = 1)

  fd_poll_MLEnp <- delay_model(pollution, distribution = "weibu", method = "MLEn", profile = TRUE)
  coef_poll_MLEnp <- coef(fd_poll_MLEnp)
  expect_identical(fd_poll_MLEnp$optimizer$convergence, expected = 0L)
  expect_named(coef_poll_MLEnp, expected = c("delay1", "shape1", "scale1"))
  # MLEn and MLEnp give very similar parameter estimates
  expect_equal(coef_poll_MLEn, expected = coef_poll_MLEnp, tolerance = 1e-4)
  # profiled MLEn allows for shape estimates k below 1
  expect_lt(coef_poll_MLEnp[["shape1"]], expected = 1)

  fd_poll_MLEw <- delay_model(pollution, distribution = "weibu", method = "MLEw", profile = TRUE)
  expect_identical(fd_poll_MLEw$optimizer$convergence, expected = 0L)
  expect_true(fd_poll_MLEw$optimizer$profiled)
  expect_named(coef(fd_poll_MLEw), expected = c("delay1", "shape1", "scale1"))


  # Cousineau's numerical example, taken from Weibull with delay = 300, shape k = 2 and scale = 100
  x <- c(310, 342, 353, 365, 383, 393, 403, 412, 451, 456)
  densFun_wb <- incubate:::getDist(distribution = "weib", type = "density")

  fd_wbc_mlen <- delay_model(x = x, distribution = "weib", method = "MLEn")
  expect_equal(coef(fd_wbc_mlen), expected = c(delay1 = 274.8, shape1 = 2.80, scale1 = 126.0), tolerance = .001)
  fd_wbc_mlenp <- delay_model(x = x, distribution = "weib", method = "MLEn", profile = TRUE)
  expect_equal(coef(fd_wbc_mlenp), expected = c(delay1=280.9, shape1=2.62, scale1=119.0), tolerance = .05)
  # our implementation finds a smaller objective value (which we minimize)
  expect_lte(fd_wbc_mlenp$optimizer$valOpt, fd_wbc_mlenp$objFun(c(delay1=280.9, shape1=log(2.62))))
  # but log-likelihood is in fact quite similar (actually, Cousineau's solution is slightly worse)
  expect_equal(fd_wbc_mlenp$criterion, fd_wbc_mlenp$objFun(pars = c(delay1 = 280.9, shape1=2.62, scale1=119.0), criterion = TRUE), tolerance = .01)
  expect_equal(fd_wbc_mlenp$criterion, -sum(densFun_wb(x, delay1 = 280.9, shape1=2.62, scale1=119.0, log = TRUE)), tolerance = .01)
  fd_wbc_mlewp <- delay_model(x = x, distribution = "weib", method = "MLEw", profile = TRUE)
  expect_true(fd_wbc_mlewp$optimizer$profiled)
  expect_equal(coef(fd_wbc_mlewp), c(delay1=283.7, shape1=2.29, scale1=116.0), tolerance = .04)
  # our implementation finds a smaller objective value.
  expect_lte(fd_wbc_mlewp$optimizer$valOpt, fd_wbc_mlewp$objFun(c(delay1=283.7, shape1=log(2.29))))
  # but log-likelihood is in fact quite similar (actually, Cousineau's solution is slightly better)
  expect_equal(fd_wbc_mlewp$criterion, -sum(densFun_wb(x, delay1=283.7, shape1=2.29, scale1=116.0, log = TRUE)), tolerance = .01)


  # two groups -----

  fd_wb2 <- incubate::delay_model(x = susquehanna, y = pollution, distribution = "weib")
  coef_wb2 <- coef(fd_wb2)

  expect_identical(purrr::chuck(fd_wb2, 'optimizer', 'convergence'), expected = 0L)
  expect_identical(length(coef_wb2), expected = 2L*3L)
  expect_named(coef_wb2, expected = c("delay1.x", "shape1.x", "scale1.x", "delay1.y", "shape1.y", "scale1.y"))
  expect_equal(as.numeric(coef_wb2[1:3]), expected = as.numeric(coef_maxFl), tolerance = .001)
  # there might occur quite some differences in the coefficients
  expect_equal(as.numeric(coef_wb2[4:6]), expected = as.numeric(coef_poll), tolerance = .25)

  # MLE based fits
  fd_wb2_MLEn_NP <- incubate::delay_model(x = susquehanna, y = pollution, distribution = "weib", method = "MLEn")
  expect_identical(names(coef(fd_wb2_MLEn_NP)), expected = names(coef(fd_wb2)))
  expect_equal(coef(fd_wb2_MLEn_NP), expected = coef(fd_wb2), tolerance = .3)
  # MLEn has later delay estimates
  expect_gt(coef(fd_wb2_MLEn_NP)[["delay1.x"]], coef(fd_wb2)[["delay1.x"]])
  expect_gt(coef(fd_wb2_MLEn_NP)[["delay1.y"]], coef(fd_wb2)[["delay1.y"]])

  fd_wb2_MLEn_P <- delay_model(x = susquehanna, y = pollution, distribution = "weib", method = "MLEn", profile = TRUE)
  expect_identical(fd_wb2_MLEn_P$optimizer$convergence, expected = 0L)
  # roughly equal coefficients as compared to single fits
  expect_equal(coef(fd_wb2_MLEn_P, group = "x"), expected = coef(fd_maxFl_MLEn_P), tolerance = .2)
  expect_equal(coef(fd_wb2_MLEn_P, group = "y"), expected = coef(fd_poll_MLEnp), tolerance = .01)

  # MLEc
  fd_wb2_MLEc_NP <- delay_model(x = susquehanna, y = pollution, distribution = "weib", method = "MLEc")
  expect_named(coef(fd_wb2_MLEc_NP), expected = names(coef(fd_wb2)))
  expect_false(fd_wb2_MLEc_NP$optimizer$profiled)
  expect_equal(coef(fd_wb2_MLEc_NP), expected = coef(fd_wb2), tolerance = .3)
  expect_gt(coef(fd_wb2_MLEc_NP)[["delay1.x"]], coef(fd_wb2)[["delay1.x"]])
  expect_gt(coef(fd_wb2_MLEc_NP)[["delay1.y"]], coef(fd_wb2)[["delay1.y"]])

  fd_wb2_MLEc_P <- delay_model(x = susquehanna, y = pollution, distribution = "weib", method = "MLEc", profiled = TRUE)
  expect_true(fd_wb2_MLEc_P$optimizer$profiled)
  expect_identical(fd_wb2_MLEc_P$optimizer$convergence, expected = 0L)
  # MLEc profiling has very little impact on coefficient estimates
  expect_equal(coef(fd_wb2_MLEc_P), expected = coef(fd_wb2_MLEc_NP), tolerance = .01)

  # MLEw with two groups
  fd_wb2_MLEw_P <- incubate::delay_model(x = susquehanna, y = pollution, distribution = "weib", method = "MLEw", profile = TRUE)
  expect_identical(names(coef(fd_wb2_MLEw_P)), expected = names(coef(fd_wb2)))
  #expect_equal(coef(fd_wb2_MLEw_P, group = "x"), expected = coef(fd_maxFl_MLEw), tolerance = .01)
  expect_equal(coef(fd_wb2_MLEw_P, group = "y"), expected = coef(fd_poll_MLEw), tolerance = .01)


  # two groups with binding
  set.seed(20210430)
  fd_wb2b <- delay_model(x = rweib_delayed(n=37, delay1 = 7, shape1 = 1.8, scale1 = 3),
                         y = rweib_delayed(n=51, delay1 = 5, shape1 = 1.2, scale1 = 1.5),
                         distribution = "weib", bind = "delay1")
  coef_wb2b <- coef(fd_wb2b)

  expect_identical(purrr::chuck(fd_wb2b, 'optimizer', 'convergence'), expected = 0L)
  #delay is bound
  expect_identical(length(coef_wb2b), expected = 2L*3L-1L)
  expect_named(coef_wb2b, c("delay1", "shape1.x", "scale1.x", "shape1.y", "scale1.y"))
  # expect a delay close to the minimum of the two true delay parameters
  expect_equal(coef_wb2b[[1L]], expected = 5, tolerance = .02)

  fd_wb2b_MLEn_NP <- delay_model(x = fd_wb2b$data$x, y = fd_wb2b$data$y, distribution = "weib", method = "MLEn", bind = "delay1")
  expect_identical(fd_wb2b_MLEn_NP$optimizer$convergence, expected = 0L)
  expect_named(coef(fd_wb2b_MLEn_NP), c("delay1", "shape1.x", "scale1.x", "shape1.y", "scale1.y"))
  expect_equal(coef(fd_wb2b_MLEn_NP), coef_wb2b, tolerance = .03)

  fd_wb2b_MLEn_P <- delay_model(x = fd_wb2b$data$x, y = fd_wb2b$data$y, distribution = "weib", method = "MLEn", profile = TRUE, bind = "delay1")
  expect_identical(fd_wb2b_MLEn_P$optimizer$convergence, expected = 0L)
  expect_equal(coef(fd_wb2b_MLEn_P), coef(fd_wb2b_MLEn_NP), tolerance = .01) # profiling has little effect on coefficients

  fd_wb2b_MLEc_NP <- delay_model(x = fd_wb2b$data$x, y = fd_wb2b$data$y, distribution = "weib", method = "MLEc", profile = FALSE, bind = "delay1")
  expect_identical(fd_wb2b_MLEc_NP$optimizer$convergence, expected = 0L)
  fd_wb2b_MLEc_P <- delay_model(x = fd_wb2b$data$x, y = fd_wb2b$data$y, distribution = "weib", method = "MLEc", profile = TRUE, bind = "delay1")
  expect_identical(fd_wb2b_MLEc_P$optimizer$convergence, expected = 0L)
  expect_named(coef(fd_wb2b_MLEc_P), c("delay1", "shape1.x", "scale1.x", "shape1.y", "scale1.y"))
  expect_equal(coef(fd_wb2b_MLEc_P), coef(fd_wb2b_MLEc_NP), tolerance = .001) # profiling has hardly an effect on the coefficients

  fd_wb2b_MLEw_P <- delay_model(x = fd_wb2b$data$x, y = fd_wb2b$data$y, distribution = "weib", method = "MLEw", profile = TRUE, bind = "delay1")
  expect_identical(fd_wb2b_MLEw_P$optimizer$convergence, expected = 0L)

  # bind shape
  fd_wb2bb <- delay_model(x = rweib_delayed(n=37, delay1 = 7, shape1 = 1.8, scale1 = 3),
                          y = rweib_delayed(n=51, delay1 = 5, shape1 = 1.5, scale1 = 1.5),
                          distribution = "weib", bind = "shape1")
  coef_wb2bb <- coef(fd_wb2bb)

  expect_identical(fd_wb2bb$optimizer$convergence, expected = 0L)
  expect_identical(length(coef_wb2bb), expected = 2L*3L-1L)
  expect_identical(names(coef_wb2bb), c("shape1", "delay1.x", "scale1.x", "delay1.y", "scale1.y"))

  fd_wb2bb_MLEn_NP <- delay_model(x = fd_wb2bb$data$x, y = fd_wb2bb$data$y, distribution = "weib", method = "MLEn", bind = "shape1")
  expect_identical(fd_wb2bb_MLEn_NP$optimizer$convergence, expected = 0L)
  expect_identical(names(coef(fd_wb2bb_MLEn_NP)), c("shape1", "delay1.x", "scale1.x", "delay1.y", "scale1.y"))
  expect_equal(coef(fd_wb2bb_MLEn_NP), coef_wb2bb, tolerance = .05)

  fd_wb2bb_MLEn_P <- delay_model(x = fd_wb2bb$data$x, y = fd_wb2bb$data$y, distribution = "weib", method = "MLEn", profile = TRUE, bind = "shape1")
  expect_identical(fd_wb2bb_MLEn_P$optimizer$convergence, expected = 0L)
  expect_true(fd_wb2bb_MLEn_P$optimizer$profiled)
  expect_identical(names(coef(fd_wb2bb_MLEn_P)), c("shape1", "delay1.x", "scale1.x", "delay1.y", "scale1.y"))
  expect_equal(coef(fd_wb2bb_MLEn_P), coef(fd_wb2bb_MLEn_NP), tolerance = .01) # profiling has hardly an effect

  fd_wb2bb_MLEc_NP <- delay_model(x = fd_wb2bb$data$x, y = fd_wb2bb$data$y, distribution = "weib", method = "MLEc", profile = FALSE, bind = "shape1")
  expect_identical(fd_wb2bb_MLEc_NP$optimizer$convergence, expected = 0L)
  expect_identical(names(coef(fd_wb2bb_MLEc_NP)), c("shape1", "delay1.x", "scale1.x", "delay1.y", "scale1.y"))
  fd_wb2bb_MLEc_P <- delay_model(x = fd_wb2bb$data$x, y = fd_wb2bb$data$y, distribution = "weib", method = "MLEc", profile = TRUE, bind = "shape1")
  expect_identical(fd_wb2bb_MLEc_P$optimizer$convergence, expected = 0L)
  expect_equal(coef(fd_wb2bb_MLEc_P), coef(fd_wb2bb_MLEc_NP), tolerance = .001) # profiling has hardly an effect on the coefficients

  fd_wb2bb_MLEw_P <- delay_model(x = fd_wb2bb$data$x, y = fd_wb2bb$data$y, distribution = "weib", method = "MLEw", profile = TRUE, bind = "shape1")
  expect_identical(fd_wb2bb_MLEw_P$optimizer$convergence, expected = 0L)
  expect_identical(names(coef(fd_wb2bb_MLEw_P)), c("shape1", "delay1.x", "scale1.x", "delay1.y", "scale1.y"))
  expect_equal(coef(fd_wb2bb_MLEw_P), coef(fd_wb2bb_MLEc_P), tolerance = .1) # MLEw and MLEc coefficients are only rougly similar
})


test_that('Confidence intervals', {
  testthat::skip_on_cran()
  set.seed(1234)

  obs_sim <- rexp_delayed(n = 29L, delay = 5, rate = .3)

  fm <- delay_model(x = obs_sim, distribution = 'expon')

  # use default smd_factor
  bs_data <- incubate:::bsDataStep(fm, R = 1199, useBoot = FALSE)
  # set up identical bs_data from boot-package
  bs_data_bt <- incubate:::bsDataStep(fm, R = 3, useBoot = TRUE)
  bs_data_bt$R <- NCOL(bs_data)
  bs_data_bt$t <- t(bs_data)

  # agreement of own implementation and boot-implementation
  purrr::walk(.x = c('normal', 'lognormal', 'quantile', 'logquantile', 'quantile0'),
              .f = ~ {
                ci_own <- confint(fm, bs_data = bs_data, bs_infer = .x)
                ci_boot <- confint(fm, bs_data = bs_data_bt, bs_infer = .x, useBoot = TRUE)
                expect_equal(ci_own, ci_boot, tolerance = 1e-3)})

})