# Tests for milags_functions
#
# Author: Jungwirt
###############################################################################


# tests for get_n0
context("Test the get_n0 function")


test_that("Getting initial biomass works", {

  # data
  biom <- c(27727155, 28619420, 31695970, 35766010, 40113020, 44429830, 47754910, 49705080)
  method_1 <- "first.observation"
  method_2 <- "minimal.observation"
  first_calc <- biom[1]
  minim_calc <- min(biom)

  expect_equal(get_n0(biom, method_1), first_calc)
  expect_equal(get_n0(biom, method_2), minim_calc)
})

# context("Test the calc_lag function")

# test_that("Calculating if lag of given model works", {


#  expect_equal()
#})



context("Test the get_def_pars function")

test_that("Calculating if getting default parameters works", {

  expect_equal(get_def_pars(), list(model = "logistic",
                                    n0_method = "first.observation",
                                    tangent_method = "local.regression",
                                    threshold = 10^2,
                                    curve_points = 3,
                                    init_gr_rate = NULL,
                                    init_lag = NULL,
                                    algorithm = "auto",
                                    max_iter = 100))
})


database <- data.frame(time = c(1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10),
                 biomass = c(4396571.976, 3807332.496, 4165206.611, 5690282.713, 7727252.94, 19381419.82,
                             13744788.86, 18066675.15, 23651017.71, 29465323.75, 28528881.12, 29448677.51,
                             29144257.31, 32086465.47, 29732262.17, 29888494.33, 30720558.23, 31220300,
                             32074919.83))

context("Test the lag_biomass_incr function")

test_that("Calculating if fitting the lag to multiple growth curves based on the biomass increase method works", {


  test_df <- database 
  test_threshold <- 5000
  test_n0 <- 0
  data_test <- test_df %>% filter(FALSE) %>% mutate(lag = numeric(0))
  for (this_curve_id in unique(test_df$curve_id)) {
    data_this_curve <- test_df %>%
      filter(curve_id == this_curve_id) %>%
      left_join(test_n0, by = "curve_id") %>%
      mutate(incr_from_n0 = biomass - test_n0)


    #find where second derivative is maximal
    threshold_diff <- which(data_this_curve$incr_from_n0 >= test_threshold)
    first_threshold_diff <- threshold_diff[1]
    lag_this_curve <- data_this_curve$time[first_threshold_diff]
    data_this_curve <- data_this_curve %>%
      mutate(
        lag = round(lag_this_curve,1))
    data_test <- rbind(data_test, data_this_curve)
}
  expect_equal(lag_biomass_incr(test_df, test_threshold, test_n0), data_test)
})



context("Test the get_init_pars_baranyi function")

test_that("Getting initial parameters for Baranyi algorithm works", {

  # data
  test_df <- database 
  init_lag <- NULL
  init_gr_rate <- NULL
  min_b <- 0.2
  min_a <- 0.8
  this_n0 <- 0.5
  if (is.null(init_lag)) {
    init_lag <- calc_lag(test_df, method = "tangent", pars = get_def_pars()) %>% pull(lag) %>% unique() %>% as.numeric()
  }

  if (is.null(init_gr_rate)) {
    data_this_curve_exp <- test_df %>%
      mutate(
        max_biomass = max(biomass),
        min_threshold = this_n0 + min_b * (max_biomass - this_n0),
        max_threshold = this_n0 + min_a * (max_biomass - this_n0)) %>%
      # take only the points that are in between min and max
      filter(biomass <= max_threshold & biomass >= min_threshold)
    data_this_curve_exp$logdata <- log(data_this_curve_exp$biomass/this_n0)
    if (nrow(data_this_curve_exp %>% filter(!is.na(time) & !is.na(logdata))  > 0)) {
      mod <- lm(logdata ~ time, data = data_this_curve_exp)
      # this growth rate is assuming an exponential model so it will be generally underestimated
      init_gr_rate <- mod$coefficients[2] %>% unname()
      # we have real r = r(1-N/K) so let us take
      n_mid <- median(data_this_curve_exp$biomass)
      init_mumax <- init_gr_rate
    } else {
      init_mumax <- 0.1
    }
  } else {
    init_mumax <- init_gr_rate
  }
  test_init <- list(init_mumax = init_mumax, init_lag = init_lag)
  expect_equal(get_init_pars_baranyi(test_df, 0.5, NULL, NULL), test_init )
})

context("Test the cut_data function")
test_that("Cutting biomass data works", {

  # data
  test_df <- database 
  max_time <- 5
  data_short <- test_df %>% filter(time <= max_time)
  expect_equal(cut_the_data(test_df, max_time), data_short )
})

 context("Test the fit_max_infl_lag function")
test_that("Fitting maximal biomass data to lag works", {

  # data
  test_df <- database 
  if (!("curve_id" %in% names(test_df))) {
    test_df$curve_id <- "growth.curve"
  }
  data_new <- test_df %>% filter(FALSE) %>% mutate(log_biomass = numeric(0), diff = numeric(0), lag = numeric(0))
  for (this_curve_id in unique(test_df$curve_id)) {
    data_this_curve <- test_df %>%
      filter(curve_id == this_curve_id) %>%
      arrange(time) %>%
      as.data.frame() %>%
      mutate(log_biomass = log(biomass),
             time_diff = mean(c(diff(time), diff(dplyr::lag(time))), na.rm = TRUE),
             time_av = (time + dplyr::lag(time))/2) %>%
      mutate(
        #second.deriv.b = c(NA,second_deriv(time, log.biomass), NA),
        # central scheme
        second_deriv_b = (dplyr::lead(log_biomass) + dplyr::lag(log_biomass) - 2 * log_biomass)/time_diff^2,
        # we only look at second derivative if we know the first derivative is positive!
        # In empirical data we sometimes see a decrease in biomass but we don;t want to look at the second derivative there!
        biomass_incr = dplyr::lead(log_biomass)  > dplyr::lag(log_biomass)
      )

    #find where second derivative is maximal
    max_second_deriv_b <- max(data_this_curve$second_deriv_b, na.rm=TRUE)
    # take first point when max derivative if there are multiple
    ind_max_second_deriv_b <- which(data_this_curve$second_deriv_b == max_second_deriv_b)[1]
    lag_this_curve <- data_this_curve$time[ind_max_second_deriv_b]
    data_this_curve <- data_this_curve %>%
      mutate(
        lag = round(lag_this_curve, 1))
    data_new <- rbind(data_new, data_this_curve)
  }
  expect_equal(fit_max_infl_lag(test_df), data_new )
})

 context("Test the lag_biomass_incr function")
test_that("Biomass increase method works", {

  # data
  test_df <- database 
  if (!("curve_id" %in% names(test_df))) {
    test_df$curve_id <- "growth.curve"
  }
  threshold <- 29465323.75
   n0 <- test_df %>%
    group_by(curve_id) %>%
    arrange(time) %>%
    summarise(n0 = get_n0(biomass, "minimal.observation")) %>%
    ungroup() %>%
    mutate(log_n0 = log(n0))
   data_new <- test_df %>% filter(FALSE) %>% mutate(lag = numeric(0))
  for (this_curve_id in unique(test_df$curve_id)) {
    data_this_curve <- test_df %>%
      filter(curve_id == this_curve_id) %>%
      left_join(n0, by = "curve_id") %>%
      mutate(incr_from_n0 = biomass - n0)


    #find where second derivative is maximal
    threshold_diff <- which(data_this_curve$incr_from_n0 >= threshold)
    first_threshold_diff <- threshold_diff[1]
    lag_this_curve <- data_this_curve$time[first_threshold_diff]
    data_this_curve <- data_this_curve %>%
      mutate(
        lag = round(lag_this_curve,1))
    data_new <- rbind(data_new, data_this_curve)
  }
  expect_equal(lag_biomass_incr(test_df, threshold, n0 ), data_new )
})



 context("Test the fit_exp_lag function")
test_that("Fitting biomass data to lag with tangent method works", {

  # data
  test_df <- database 
  if (!("curve_id" %in% names(test_df))) {
    test_df$curve_id <- "growth.curve"
  }
    n0 <- test_df %>%
    group_by(curve_id) %>%
    arrange(time) %>%
    summarise(n0 = get_n0(biomass, "minimal.observation")) %>%
    ungroup() %>%
    mutate(log_n0 = log(n0))
  tangent_method <- "local.regression"
  curve_points <- 3
  data_new <- test_df %>% filter(FALSE) %>% mutate(lag = numeric(0), line_intercept = numeric(0), line_slope = numeric(0))
  for (this_curve_id in unique(test_df$curve_id)) {
    data_this_curve <- test_df %>% filter(curve_id == this_curve_id)
    this_n0 <- n0 %>% filter(curve_id == this_curve_id) %>% pull(n0)
    lag_obj <- fit_exp_lag_to_curve(data_this_curve, this_n0, tangent_method, curve_points)
    data_this_curve <- data_this_curve %>%
      mutate(lag = round(lag_obj$lag,1)) %>%
      mutate(line_intercept = lag_obj$line_intercept,
             line_slope = lag_obj$line_slope) %>%
      left_join(lag_obj$tangent_points)
    data_new <- rbind(data_new, data_this_curve)
  }

  data_new$predicted <- NA
  expect_equal(fit_exp_lag(test_df, tangent_method, n0, curve_points), data_new )
})

context("Test the get_init_pars_logistic function")
test_that("Getting initial parameters for logistic growth curve works", {

  # data
  test_df <- database
  init_K <- NULL
  init_lag <- NULL
  init_gr_rate <- NULL
  this_n0 <- get_n0(test_df$biomass, "minimal.observation")
  min_b <- 0.2
  min_a <- 0.8
  
if (is.null(init_K)) {
    max_this_data <- max(test_df$biomass, na.rm = TRUE)
    init_K <- max_this_data %>% as.numeric()
  }
  if (is.null(init_lag)) {
    init_lag <- calc_lag(test_df, method = "tangent", pars = get_def_pars()) %>% pull(lag) %>% unique() %>% as.numeric()
  }

  if (is.null(init_gr_rate)) {
    data_this_curve_exp <- test_df %>%
      mutate(
        max_biomass = max(test_df$biomass),
        min_threshold = this_n0 + min_b*(max_biomass - this_n0),
        max_threshold = this_n0 + min_a*(max_biomass - this_n0)) %>%
      # take only the points that are in between min and max
      filter(biomass <= max_threshold & biomass >= min_threshold)
    data_this_curve_exp$logdata = log(data_this_curve_exp$biomass/this_n0)
    if (nrow(data_this_curve_exp %>% filter(!is.na(time) & !is.na(logdata)) > 0)) {
      mod = lm(logdata ~ time, data = data_this_curve_exp)
      # this growth rate is assuming an exponential model so it will be generally underestimated
      init_gr_rate = mod$coefficients[2] %>% unname()
      # we have real r = r(1-N/K) so let us take
      n_mid = median(data_this_curve_exp$biomass)
      init_gr_rate = init_gr_rate/(1-n_mid/init_K)
    } else {
      init_gr_rate = 0.1
    }
    #data_this_curve_exp$predicted = predict(mod, data_this_curve_exp)
  }
  data_new <- list(init_K = init_K, init_lag = init_lag, init_gr_rate = init_gr_rate)
  expect_equal(get_init_pars_logistic(test_df, this_n0, init_K, init_lag, init_gr_rate, min_b, min_a), data_new )
})

 context("Test the choose_lag_fit_algorithm_baranyi function")
test_that("Choosing the best nls model to fit to algorithm baranyi works", {

  # data
  test_df <- database 
  LOG10N0 <- NULL
  init_lag <- NULL
  init_mumax <- NULL
  init_LOG10Nmax <- NULL
  max_iter <- 100
  lower_bound <- c(0,0,0, 0)
  tryCatch(
    expr =
      {nlsres_LM <- nlsLM(formula = baranyi,
                        data = test_df,
                        start = list(lag=init_lag, mumax=init_mumax, LOG10N0 = LOG10N0, LOG10Nmax = init_LOG10Nmax),
                        control = nls.control(maxiter = max_iter),
                        lower = lower_bound)
      },
    error = function(cond) {
      # this operator assigns value outside the error environment
      nlsres_LM <<- NA
    })
  tryCatch(
    expr =
      {nls_LM_no_bound <- nlsLM(formula = baranyi,
                                 data = test_df,
                                 start = list(lag=init_lag, mumax=init_mumax, LOG10N0 = LOG10N0, LOG10Nmax = init_LOG10Nmax),
                                 control = nls.control(maxiter = max_iter))
      },
    error = function(cond) {
      nls_LM_no_bound <<- NA
    })
  tryCatch(
    expr =
      {nls_PORT <- nls(baranyi,
                        data = test_df,
                        start = list(lag=init_lag, mumax=init_mumax, LOG10N0 = LOG10N0, LOG10Nmax = init_LOG10Nmax),
                        algorithm = "port",
                        control = nls.control(maxiter = max_iter),
                        lower = lower_bound)
      },
    error = function(cond) {
      nls_PORT <<- NA
    })

  nls_a <- compare_algorithms(nls_LM_no_bound, nls_PORT, nlsres_LM)
  # consider the model without lower bounds only if the resulted estimates are above 0

  expect_equal(choose_lag_fit_algorithm_baranyi(test_df, LOG10N0 = NULL, init_lag = NULL, init_mumax = NULL, init_LOG10Nmax = NULL, max_iter = 100, lower_bound = c(0,0,0, 0)), nls_a )
})


context("Test the get_all_methods_lag function")
test_that("Getting lag by all methods works", {

  # data
  test_df <- database 
  biomass_incr_threshold <- NULL
  pars <- NULL
  if (is.null(pars)) {
    pars <- get_def_pars()
  }
  if (is.null(biomass_incr_threshold))  {
    biomass_incr_threshold <- pars$threshold
  }
  pars_logistic <- pars
  pars_logistic$model <- "logistic"
  data_new_logistic <- calc_lag(data = test_df,
                                    method = "parameter fitting to a model",
                                    pars = pars_logistic) %>%
    mutate(lag_calculation_method = "par. fitting to logistic model")

  pars_baranyi <- pars
  pars_baranyi$model <- "baranyi"
  data_new_baranyi <- calc_lag(data = test_df,
                                   method = "parameter fitting to a model",
                                   pars = pars_baranyi) %>%
    mutate(lag_calculation_method = "par. fitting to baranyi model")

  data_new_max_infl <- calc_lag(data = test_df,
                                    method = "max growth acceleration",
                                    pars = pars) %>%
    mutate(lag_calculation_method = "max growth acceleration")

  pars_to_point <- pars
  pars_to_point$tangent_method <- "to.point"
  data_new_exp <- calc_lag(data = test_df,
                                method = "tangent",
                                pars = pars_to_point
  ) %>%
    mutate(lag_calculation_method = "tangent to max growth point")

  pars_local_regr <- pars
  pars_local_regr$tangent_method <- "local.regression"
  data_new_exp2 <- calc_lag(data = test_df,
                                 method = "tangent",
                                 pars = pars_local_regr)%>%
    mutate(lag_calculation_method = "tangent to max growth line")

  pars$threshold <- biomass_incr_threshold
  data_new_biominc <-  calc_lag(data = test_df,
                                    method = "biomass increase",
                                    pars = pars)%>%
    mutate(lag_calculation_method = "biomass increase")


  data_all_with_lag <- data_new_max_infl %>%
    rbind(data_new_exp) %>%
    rbind(data_new_exp2) %>%
    rbind(data_new_biominc) %>%
    rbind(data_new_baranyi) %>%
    rbind(data_new_logistic)

  data_all_with_lag$lag[data_all_with_lag$lag < 0] = NA
  expect_equal(get_all_methods_lag(test_df, biomass_incr_threshold, pars ), data_all_with_lag )
})


context("Test the calc_lag function")
test_that("Calculating lag works", {

  # data
  method <- "tangent"
  test_df <- database 
if (!("curve_id" %in% names(test_df))) {
    test_df$curve_id = "growth.curve"
  }
 pars <- NULL
  if (is.null(pars)) {
    pars <- get_def_pars()
  }
  n0 <- test_df %>%
    group_by(curve_id) %>%
    arrange(time) %>%
    summarise(n0 = get_n0(biomass, pars$n0_method)) %>%
    ungroup() %>%
    mutate(log_n0 = log(n0))


  if (method == "tangent") {
    sel_tangent_method <- pars$tangent_method
    data_new <- fit_exp_lag(test_df,
                                   n0 = n0,
                                   tangent_method = sel_tangent_method,
                                   curve_points = pars$curve_points)
    data_new <- data_new %>%
      select(time, biomass, curve_id, lag, line_slope, line_intercept, tangent_point) %>%
      mutate(lag_calculation_method = "tangent",
             log_biomass = log(biomass),
             predicted_data = NA,
             diff = NA,
             second_deriv_b = NA,
             threshold = NA) %>%
      select(time, biomass, log_biomass, curve_id, lag, line_slope, line_intercept, lag_calculation_method, predicted_data, diff, second_deriv_b, tangent_point, threshold)


  } else if (method == "biomass increase") {
    sel_threshold <- pars$threshold
    data_new <- lag_biomass_incr(test_df,
                                             threshold = sel_threshold,
                                             n0 = n0)
    data_new <- data_new %>%
      select(time, biomass, curve_id, lag) %>%
      mutate(lag_calculation_method = "biomass increase",
             log_biomass = log(biomass),
             predicted_data = NA,
             second_deriv_b = NA,
             line_intercept = NA,
             line_slope = NA,
             tangent_point = NA,
             diff = NA) %>%
      left_join(n0, by = "curve_id") %>%
      mutate(threshold = n0 + sel_threshold) %>%
      select(time, biomass, log_biomass, curve_id, lag, line_slope, line_intercept, lag_calculation_method, predicted_data, diff, second_deriv_b, tangent_point, threshold)

  } else if (method == "max growth acceleration") {
    data_new <-  fit_max_infl_lag(test_df)
    data_new <- data_new %>%
      select(time, biomass, log_biomass, curve_id, lag, second_deriv_b) %>%
      mutate(lag_calculation_method = "max growth acceleration",
             line_intercept = NA,
             line_slope = NA,
             predicted_data = NA,
             diff = NA,
             tangent_point = NA,
             threshold= NA) %>%
      select(time, biomass, log_biomass, curve_id, lag, line_slope, line_intercept, lag_calculation_method, predicted_data, diff, second_deriv_b, tangent_point, threshold)

  } else if (method == "parameter fitting to a model") {
    sel_model <- pars$model
    if (sel_model == "logistic") {
      data_new <- calc_lagistic_fit_lag(test_df, n0,
                                            init_gr_rate = pars$init_gr_rate,
                                            init_K = pars$init_K,
                                            init_lag = pars$init_lag,
                                            algorithm = pars$algorithm,
                                            max_iter = pars$max_iter)

    } else if (sel_model == "baranyi") {
      data_new <- calc_baranyi_fit_lag(test_df,
                                           n0,
                                           init_lag = pars$init_lag,
                                           init_gr_rate = pars$init_gr_rate,
                                           algorithm = pars$algorithm,
                                           max_iter = pars$max_iter) %>%
        mutate(lag_calculation_method = "Fitting lagged baranyi")

    } else {
      error("model not implemented")
    }
    data_new <- data_new %>%
      select(time, biomass, curve_id, lag, predicted_data = predicted) %>%
      mutate(lag_calculation_method = "parameter fitting to a model",
             log_biomass = log(biomass),
             diff = NA,
             second_deriv_b = NA,
             line_intercept = NA,
             line_slope = NA,
             tangent_point = NA,
             threshold = NA) %>%
      select(time, biomass, log_biomass, curve_id, lag, line_slope, line_intercept, lag_calculation_method, predicted_data, diff, second_deriv_b, tangent_point, threshold)

  }
  data_new$lag[data_new$lag < 0] = NA
  data_new <- data_new %>%
    left_join(n0) #%>%
  
  expect_equal(calc_lag(test_df, method, pars ), data_new )
})

context("Test the get_lag function")
test_that("Getting lag duration value works", {

  # data
  method <- "tangent"
  test_df <- database 
  if (!("curve_id" %in% names(test_df))) {
    test_df$curve_id = "growth.curve"
  }
  pars <- get_def_pars()
  data_extended = calc_lag(test_df, method, pars)
  lags_data = data_extended %>%
    group_by(curve_id) %>% 
    summarise(lag = unique(lag)) %>% 
    ungroup()
  
  expect_equal(get_lag(test_df, method, pars ), lags_data )
})

context("Test the make_grwoth_curve_df function")

test_that("Calculating if getting global variables predefined works", {

curve_id = NULL
time = c(1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5, 10)
biomass = c(4396571.976, 3807332.496, 4165206.611, 5690282.713, 7727252.94, 19381419.82,
                             13744788.86, 18066675.15, 23651017.71, 29465323.75, 28528881.12, 29448677.51,
                             29144257.31, 32086465.47, 29732262.17, 29888494.33, 30720558.23, 31220300,
                             32074919.83)
  if(!(is.numeric(time) | is.integer(time))) stop("Time must be a numeric vector")
  if(!(is.numeric(biomass) | is.integer(biomass))) stop("Biomass must be a numeric vector")
  length_time <- length(time)
  length_biomass <- length(biomass)
  
  if(length_time != length_biomass)
    stop( "time and biomass vectors must be of the same length")
  
  if(!is.null(curve_id) & length(curve_id) != length_biomass)
    stop("time and curve_id vectors must be of the same length")
  
  if (is.null(curve_id)) {
    df = data.frame(time = time, biomass = biomass)
  } else {
    df = data.frame(time = time, biomass = biomass, curve_id = curve_id)
  }
  
  expect_equal(make_grwoth_curve_df(time, biomass), df)
})