test_that("check calib_mlr output", {

  ## Reduce to 150 individuals
  # Extract the predicted transition probabilities out of state j = 1 for first 150 individuals
  tp.pred <- tp.cmprsk.j0 |>
    dplyr::filter(id %in% 1:150) |>
    dplyr::select(any_of(paste("pstate", 1:6, sep = "")))
  # Reduce ebmtcal to first 150 individuals
  ebmtcal <- ebmtcal |> dplyr::filter(id %in% 1:150)
  # Reduce msebmtcal.cmprsk to first 150 individuals
  msebmtcal <- msebmtcal.cmprsk |> dplyr::filter(id %in% 1:150)

  ## Expect error if generate with CI
  expect_error(calib_mlr(data.mstate = msebmtcal,
                         data.raw = ebmtcal,
                         j=1,
                         s=0,
                         t = 1826,
                         tp.pred = tp.pred,
                         w.covs = c("year", "agecl", "proph", "match"),
                         ps.int = 2,
                         degree = 2,
                         CI = 95,
                         CI.R.boot = 5))

  ## Calculate observed event probabilities
  dat.calib.mlr <-
    calib_mlr(data.mstate = msebmtcal,
              data.raw = ebmtcal,
              j=1,
              s=0,
              t = 1826,
              tp.pred = tp.pred,
              w.covs = c("year", "agecl", "proph", "match"),
              ps.int = 2,
              degree = 2)

  expect_type(dat.calib.mlr, "list")
  expect_equal(class(dat.calib.mlr), "calib_mlr")
  expect_length(dat.calib.mlr, 2)
  expect_length(dat.calib.mlr[["plotdata"]], 5)
  expect_length(dat.calib.mlr[["plotdata"]][["state3"]]$id, 150)
  expect_length(dat.calib.mlr[["plotdata"]][["state6"]]$id, 150)
  expect_error(dat.calib.mlr[["plotdata"]][[6]])
  expect_length(dat.calib.mlr[["metadata"]], 4)
  expect_no_error(summary(dat.calib.mlr))

})

test_that("check calib_mlr output, (j = 1, s = 0),
          Manually define function to estimate weights", {

            ## Reduce to 150 individuals
            # Extract the predicted transition probabilities out of state j = 1 for first 150 individuals
            tp.pred <- tp.cmprsk.j0 |>
              dplyr::filter(id %in% 1:150) |>
              dplyr::select(any_of(paste("pstate", 1:6, sep = "")))
            # Reduce ebmtcal to first 150 individuals
            ebmtcal <- ebmtcal |> dplyr::filter(id %in% 1:150)
            # Reduce msebmtcal.cmprsk to first 150 individuals
            msebmtcal <- msebmtcal.cmprsk |> dplyr::filter(id %in% 1:150)

            ###
            ### Calculate observed event probabilities
            dat.calib.mlr <-
              calib_mlr(data.mstate = msebmtcal,
                        data.raw = ebmtcal,
                        j = 1,
                        s = 0,
                        t = 1826,
                        tp.pred = tp.pred,
                        w.covs = c("year", "agecl", "proph", "match"),
                        ps.int = 2,
                        degree = 2)

            ###
            ### Calculate manual weights
            weights.manual <-
              calc_weights(data.mstate = msebmtcal,
                           data.raw = ebmtcal,
                           covs =  c("year", "agecl", "proph", "match"),
                           t = 1826,
                           s = 0,
                           landmark.type = "state",
                           j = 1,
                           max.weight = 10,
                           stabilised = FALSE)

            ###
            ### Calculate observed event probabilities same function as internal procedure, and check it agrees with dat.calib.mlr
            dat.calib.mlr.w.manual <-
              calib_mlr(data.mstate = msebmtcal,
                        data.raw = ebmtcal,
                        j = 1,
                        s = 0,
                        t = 1826,
                        tp.pred = tp.pred,
                        weights = weights.manual$ipcw,
                        ps.int = 2,
                        degree = 2)

            expect_equal(dat.calib.mlr[["plotdata"]][[1]], dat.calib.mlr.w.manual[["plotdata"]][[1]])

            ###
            ### Calculate observed event probabilities using an incorrect vector of weights, and see if its different from dat.calib.mlr
            dat.calib.mlr.w.manual <-
              calib_mlr(data.mstate = msebmtcal,
                        data.raw = ebmtcal,
                        j = 1,
                        s = 0,
                        t = 1826,
                        tp.pred = tp.pred,
                        weights = rep(1,nrow(weights.manual)),
                        ps.int = 2,
                        degree = 2)

            expect_false(any(dat.calib.mlr[["plotdata"]][[1]]$obs == dat.calib.mlr.w.manual[["plotdata"]][[1]]$obs))

            ###
            ### Calculate observed event probabilities with w.function, where calc_weights_manual = calc_weights (exactly same as internal procedure)
            calc_weights_manual <- calc_weights

            dat.calib.mlr.w.function <-
              calib_mlr(data.mstate = msebmtcal,
                        data.raw = ebmtcal,
                        j = 1,
                        s = 0,
                        t = 1826,
                        tp.pred = tp.pred,
                        w.function = calc_weights_manual,
                        w.covs = c("year", "agecl", "proph", "match"),
                        ps.int = 2,
                        degree = 2)

            expect_type(dat.calib.mlr.w.function, "list")
            expect_equal(class(dat.calib.mlr.w.function), "calib_mlr")
            expect_length(dat.calib.mlr.w.function, 2)
            expect_length(dat.calib.mlr.w.function[["plotdata"]], 5)
            expect_length(dat.calib.mlr.w.function[["plotdata"]][[1]]$id, 150)
            expect_length(dat.calib.mlr.w.function[["plotdata"]][[4]]$id, 150)
            expect_length(dat.calib.mlr.w.function[["metadata"]], 4)

            ## Check answer is same whether w.function used or not
            expect_equal(dat.calib.mlr[["plotdata"]][[1]], dat.calib.mlr.w.function[["plotdata"]][[1]])
            expect_equal(dat.calib.mlr[["plotdata"]][[4]], dat.calib.mlr.w.function[["plotdata"]][[4]])

            ###
            ### Redefine calc_weights, but change order of all the input arguments (this shouldn't make a difference)
            calc_weights_manual <- function(stabilised = FALSE, max.follow = NULL, data.mstate, covs = NULL, landmark.type = "state", j = NULL, t, s, max.weight = 10, data.raw){

              ### Modify everybody to be censored after time t, if a max.follow has been specified
              if(!is.null(max.follow)){
                if (max.follow == "t"){
                  data.raw <- dplyr::mutate(data.raw,
                                            dtcens.s = dplyr::case_when(dtcens < t + 2 ~ dtcens.s,
                                                                        dtcens >= t + 2 ~ 0),
                                            dtcens = dplyr::case_when(dtcens < t + 2 ~ dtcens,
                                                                      dtcens >= t + 2 ~ t + 2))
                } else {
                  data.raw <- dplyr::mutate(data.raw,
                                            dtcens.s = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens.s,
                                                                        dtcens >= max.follow + 2 ~ 0),
                                            dtcens = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens,
                                                                      dtcens >= max.follow + 2 ~ max.follow + 2))
                }
              }

              ### Create a new outcome, which is the time until censored from s
              data.raw$dtcens.modified <- data.raw$dtcens - s

              ### Save a copy of data.raw
              data.raw.save <- data.raw

              ### If landmark.type = "state", calculate weights only in individuals in state j at time s
              ### If landmark.type = "all", calculate weights in all uncensored individuals at time s (note that this excludes individuals
              ### who have reached absorbing states, who have been 'censored' from the survival distribution is censoring)
              if (landmark.type == "state"){
                ### Identify individuals who are uncensored in state j at time s
                ids.uncens <- base::subset(data.mstate, from == j & Tstart <= s & s < Tstop) |>
                  dplyr::select(id) |>
                  dplyr::distinct(id) |>
                  dplyr::pull(id)

              } else if (landmark.type == "all"){
                ### Identify individuals who are uncensored time s
                ids.uncens <- base::subset(data.mstate, Tstart <= s & s < Tstop) |>
                  dplyr::select(id) |>
                  dplyr::distinct(id) |>
                  dplyr::pull(id)

              }

              ### Subset data.mstate and data.raw to these individuals
              data.mstate <- data.mstate |> base::subset(id %in% ids.uncens)
              data.raw <- data.raw |> base::subset(id %in% ids.uncens)

              ###
              ### Create models for censoring in order to calculate the IPCW weights
              ### Seperate models for estimating the weights, and stabilising the weights (intercept only model)
              ###
              if (!is.null(covs)){
                ### A model where we adjust for predictor variables
                cens.model <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ ",
                                                                      paste(covs, collapse = "+"),
                                                                      sep = "")),
                                              data = data.raw)

                ### Intercept only model (numerator for stabilised weights)
                cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
                                                                          sep = "")),
                                                  data = data.raw)
              } else if (is.null(covs)){
                ### If user has not input any predictors for estimating weights, the model for estimating the weights is the intercept only model (i.e. Kaplan Meier estimator)

                ### Intercept only model (numerator for stabilised weights)
                cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
                                                                          sep = "")),
                                                  data = data.raw)
                ### Assign cens.model to be the same
                cens.model <- cens.model.int


              }

              ### Calculate a data frame containing probability of censored and uncenosred at each time point
              ### The weights will be the probability of being uncensored, at the time of the event for each individual

              ## Extract baseline hazard
              data.weights <- survival::basehaz(cens.model, centered = FALSE)
              ## Add lp to data.raw.save
              data.raw.save$lp <- stats::predict(cens.model, newdata = data.raw.save, type = "lp", reference = "zero")

              ### Create weights for the cohort at time t - s
              ### Note for individuals who reached an absorbing state, we take the probability of them being uncensored at the time of reached the
              ### abosrbing state. For individuals still alive, we take the probability of being uncensored at time t - s.

              ### Get location of individuals who entered absorbing states or were censored prior to evaluation time
              obs.absorbed.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 0)
              obs.censored.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 1)

              ###
              ### Now create unstabilised probability of (un)censoring weights
              ### Note that weights are the probability of being uncensored, so if an individual has low probability of being uncesored,
              ### the inervse of this will be big, weighting them strongly
              ###

              ### First assign all individuals a weight of the probability of being uncensored at time t
              ### This is the linear predictor times the cumulative hazard at time t, and appropriate transformation to get a risk
              data.raw.save$pcw <- as.numeric(exp(-exp(data.raw.save$lp)*data.weights$hazard[max(which(data.weights$time <= t - s))]))

              ## Write a function which will extract the uncensored probability for an individual with linear predictor lp at a given time t
              prob.uncens.func <- function(input){

                ## Assign t and person_id
                t <- input[1]
                lp <- input[2]

                if (t <= 0){
                  return(NA)
                } else if (t > 0){
                  ## Get hazard at appropriate time
                  if (t < min(data.weights$time)){
                    bhaz.t <- 0
                  } else if (t >= min(data.weights$time)){
                    bhaz.t <- data.weights$hazard[max(which(data.weights$time <= t))]
                  }

                  ## Return risk
                  return(exp(-exp(lp)*bhaz.t))
                }
              }

              ### Apply this function to all the times at which individuals have entered an absorbing state prior to censoring
              data.raw.save$pcw[obs.absorbed.prior] <- apply(data.raw.save[obs.absorbed.prior, c("dtcens.modified", "lp")], 1, FUN = prob.uncens.func)

              ### For individuals who were censored prior to t, assign the weight as NA
              data.raw.save$pcw[obs.censored.prior] <- NA

              ### Invert these
              data.raw.save$ipcw <- 1/data.raw.save$pcw

              ###
              ### Stabilise these weights dependent on user-input
              ###
              if (stabilised == TRUE){

                ## Extract baseline hazard
                data.weights.numer <- survival::basehaz(cens.model.int, centered = TRUE)

                ### Assign all individuals a weight of the probability of being uncesored at time t
                data.raw.save$pcw.numer <- as.numeric(exp(-data.weights.numer$hazard[max(which(data.weights.numer$time <= t - s))]))

                ### Create stabilised weight
                data.raw.save$ipcw.stab <- data.raw.save$pcw.numer*data.raw.save$ipcw
              }

              ### Finally cap these at 10 and create output object

              ### Create output object
              if (stabilised == FALSE){
                data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
                output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "pcw" = data.raw.save$pcw)
              } else if (stabilised == TRUE){
                data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
                data.raw.save$ipcw.stab <- pmin(data.raw.save$ipcw.stab, max.weight)
                output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "ipcw.stab" = data.raw.save$ipcw.stab, "pcw" = data.raw.save$pcw)
              }

              return(output.weights)

            }

            ### Calculate observed event probabilities with new w.function
            dat.calib.mlr.w.function <-
              calib_mlr(data.mstate = msebmtcal,
                        data.raw = ebmtcal,
                        j = 1,
                        s = 0,
                        t = 1826,
                        tp.pred = tp.pred,
                        w.function = calc_weights_manual,
                        w.covs = c("year", "agecl", "proph", "match"),
                        ps.int = 2,
                        degree = 2)

            expect_type(dat.calib.mlr.w.function, "list")
            expect_equal(class(dat.calib.mlr.w.function), "calib_mlr")
            expect_length(dat.calib.mlr.w.function, 2)
            expect_length(dat.calib.mlr.w.function[["plotdata"]], 5)
            expect_length(dat.calib.mlr.w.function[["plotdata"]][[1]]$id, 150)
            expect_length(dat.calib.mlr.w.function[["plotdata"]][[4]]$id, 150)
            expect_length(dat.calib.mlr.w.function[["metadata"]], 4)

            ## Check answer is same whether w.function used or not
            expect_equal(dat.calib.mlr[["plotdata"]][[1]], dat.calib.mlr.w.function[["plotdata"]][[1]])
            expect_equal(dat.calib.mlr[["plotdata"]][[4]], dat.calib.mlr.w.function[["plotdata"]][[4]])

            ###
            ### Repeat this process (manual definition of calc_weights), again arguments are in different order, but this time an extra argument is added, which adds 'extra.arg' to every weight.
            ### This extra arguments is something that could be inputted by user, and want to check it does actually change the answer. It should no longer agree with dat.calb.mlr.
            calc_weights_manual <- function(stabilised = FALSE, max.follow = NULL, data.mstate, covs = NULL, landmark.type = "state", j = NULL, t, s, max.weight = 10, data.raw, extra.arg = NULL){

              ### Modify everybody to be censored after time t, if a max.follow has been specified
              if(!is.null(max.follow)){
                if (max.follow == "t"){
                  data.raw <- dplyr::mutate(data.raw,
                                            dtcens.s = dplyr::case_when(dtcens < t + 2 ~ dtcens.s,
                                                                        dtcens >= t + 2 ~ 0),
                                            dtcens = dplyr::case_when(dtcens < t + 2 ~ dtcens,
                                                                      dtcens >= t + 2 ~ t + 2))
                } else {
                  data.raw <- dplyr::mutate(data.raw,
                                            dtcens.s = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens.s,
                                                                        dtcens >= max.follow + 2 ~ 0),
                                            dtcens = dplyr::case_when(dtcens < max.follow + 2 ~ dtcens,
                                                                      dtcens >= max.follow + 2 ~ max.follow + 2))
                }
              }

              ### Create a new outcome, which is the time until censored from s
              data.raw$dtcens.modified <- data.raw$dtcens - s

              ### Save a copy of data.raw
              data.raw.save <- data.raw

              ### If landmark.type = "state", calculate weights only in individuals in state j at time s
              ### If landmark.type = "all", calculate weights in all uncensored individuals at time s (note that this excludes individuals
              ### who have reached absorbing states, who have been 'censored' from the survival distribution is censoring)
              if (landmark.type == "state"){
                ### Identify individuals who are uncensored in state j at time s
                ids.uncens <- base::subset(data.mstate, from == j & Tstart <= s & s < Tstop) |>
                  dplyr::select(id) |>
                  dplyr::distinct(id) |>
                  dplyr::pull(id)

              } else if (landmark.type == "all"){
                ### Identify individuals who are uncensored time s
                ids.uncens <- base::subset(data.mstate, Tstart <= s & s < Tstop) |>
                  dplyr::select(id) |>
                  dplyr::distinct(id) |>
                  dplyr::pull(id)

              }

              ### Subset data.mstate and data.raw to these individuals
              data.mstate <- data.mstate |> base::subset(id %in% ids.uncens)
              data.raw <- data.raw |> base::subset(id %in% ids.uncens)

              ###
              ### Create models for censoring in order to calculate the IPCW weights
              ### Seperate models for estimating the weights, and stabilising the weights (intercept only model)
              ###
              if (!is.null(covs)){
                ### A model where we adjust for predictor variables
                cens.model <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ ",
                                                                      paste(covs, collapse = "+"),
                                                                      sep = "")),
                                              data = data.raw)

                ### Intercept only model (numerator for stabilised weights)
                cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
                                                                          sep = "")),
                                                  data = data.raw)
              } else if (is.null(covs)){
                ### If user has not input any predictors for estimating weights, the model for estimating the weights is the intercept only model (i.e. Kaplan Meier estimator)

                ### Intercept only model (numerator for stabilised weights)
                cens.model.int <- survival::coxph(stats::as.formula(paste("survival::Surv(dtcens.modified, dtcens.s) ~ 1",
                                                                          sep = "")),
                                                  data = data.raw)
                ### Assign cens.model to be the same
                cens.model <- cens.model.int


              }

              ### Calculate a data frame containing probability of censored and uncenosred at each time point
              ### The weights will be the probability of being uncensored, at the time of the event for each individual

              ## Extract baseline hazard
              data.weights <- survival::basehaz(cens.model, centered = FALSE)
              ## Add lp to data.raw.save
              data.raw.save$lp <- stats::predict(cens.model, newdata = data.raw.save, type = "lp", reference = "zero")

              ### Create weights for the cohort at time t - s
              ### Note for individuals who reached an absorbing state, we take the probability of them being uncensored at the time of reached the
              ### abosrbing state. For individuals still alive, we take the probability of being uncensored at time t - s.

              ### Get location of individuals who entered absorbing states or were censored prior to evaluation time
              obs.absorbed.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 0)
              obs.censored.prior <- which(data.raw.save$dtcens <= t & data.raw.save$dtcens.s == 1)

              ###
              ### Now create unstabilised probability of (un)censoring weights
              ### Note that weights are the probability of being uncensored, so if an individual has low probability of being uncesored,
              ### the inervse of this will be big, weighting them strongly
              ###

              ### First assign all individuals a weight of the probability of being uncensored at time t
              ### This is the linear predictor times the cumulative hazard at time t, and appropriate transformation to get a risk
              data.raw.save$pcw <- as.numeric(exp(-exp(data.raw.save$lp)*data.weights$hazard[max(which(data.weights$time <= t - s))]))

              ## Write a function which will extract the uncensored probability for an individual with linear predictor lp at a given time t
              prob.uncens.func <- function(input){

                ## Assign t and person_id
                t <- input[1]
                lp <- input[2]

                if (t <= 0){
                  return(NA)
                } else if (t > 0){
                  ## Get hazard at appropriate time
                  if (t < min(data.weights$time)){
                    bhaz.t <- 0
                  } else if (t >= min(data.weights$time)){
                    bhaz.t <- data.weights$hazard[max(which(data.weights$time <= t))]
                  }

                  ## Return risk
                  return(exp(-exp(lp)*bhaz.t))
                }
              }

              ### Apply this function to all the times at which individuals have entered an absorbing state prior to censoring
              data.raw.save$pcw[obs.absorbed.prior] <- apply(data.raw.save[obs.absorbed.prior, c("dtcens.modified", "lp")], 1, FUN = prob.uncens.func)

              ### For individuals who were censored prior to t, assign the weight as NA
              data.raw.save$pcw[obs.censored.prior] <- NA

              ### Invert these
              data.raw.save$ipcw <- 1/data.raw.save$pcw

              ###
              ### Stabilise these weights dependent on user-input
              ###
              if (stabilised == TRUE){

                ## Extract baseline hazard
                data.weights.numer <- survival::basehaz(cens.model.int, centered = TRUE)

                ### Assign all individuals a weight of the probability of being uncesored at time t
                data.raw.save$pcw.numer <- as.numeric(exp(-data.weights.numer$hazard[max(which(data.weights.numer$time <= t - s))]))

                ### Create stabilised weight
                data.raw.save$ipcw.stab <- data.raw.save$pcw.numer*data.raw.save$ipcw
              }

              ### Finally cap these at 10 and create output object

              ### Create output object
              if (stabilised == FALSE){
                data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
                output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "pcw" = data.raw.save$pcw)
              } else if (stabilised == TRUE){
                data.raw.save$ipcw <- pmin(data.raw.save$ipcw, max.weight)
                data.raw.save$ipcw.stab <- pmin(data.raw.save$ipcw.stab, max.weight)
                output.weights <- data.frame("id" = data.raw.save$id, "ipcw" = data.raw.save$ipcw, "ipcw.stab" = data.raw.save$ipcw.stab, "pcw" = data.raw.save$pcw)
              }

              ### Add this extra argument to the weights, to check it does something
              output.weights$ipcw <- output.weights$ipcw + extra.arg

              return(output.weights)

            }

            ### Calculate observed event probabilities with new w.function
            dat.calib.mlr.w.function <-
              calib_mlr(data.mstate = msebmtcal,
                        data.raw = ebmtcal,
                        j = 1,
                        s = 0,
                        t = 1826,
                        tp.pred = tp.pred,
                        w.function = calc_weights_manual,
                        w.covs = c("year", "agecl", "proph", "match"),
                        ps.int = 2,
                        degree = 2,
                        extra.arg = 0.1)

            expect_type(dat.calib.mlr.w.function, "list")
            expect_equal(class(dat.calib.mlr.w.function), "calib_mlr")
            expect_length(dat.calib.mlr.w.function, 2)
            expect_length(dat.calib.mlr.w.function[["plotdata"]], 5)
            expect_length(dat.calib.mlr.w.function[["plotdata"]][[1]]$id, 150)
            expect_length(dat.calib.mlr.w.function[["plotdata"]][[4]]$id, 150)
            expect_length(dat.calib.mlr.w.function[["metadata"]], 4)

            ## Check answer is same whether w.function used or not
            expect_false(any(dat.calib.mlr[["plotdata"]][[1]]$obs == dat.calib.mlr.w.function[["plotdata"]][[1]]$obs))

})