# tests/testthat/test-synthetic_data.R

test_that("generate_X_df correctly generates CFD data", {
  nind <- 10
  start <- 0
  end <- 100

  # Generation for categorical data
  df <- generate_X_df(nind = nind, start = start, end = end, curve_type = 'cat')

  expect_s3_class(df, "data.frame")
  expect_equal(length(unique(df$id)), nind)
  # Check time bounds
  expect_equal(min(df$time), start)
  expect_equal(max(df$time), end)
  # Check binary states
  expect_true(all(df$state %in% c(0, 1)))
})

test_that("generate_X_df correctly generates SFD data", {
  nind <- 5
  # Generation for scalar functional data
  df <- generate_X_df(nind = nind, curve_type = 'num', noise_sd = 0.1)

  expect_s3_class(df, "data.frame")
  expect_equal(length(unique(df$id)), nind)
  # For SFD, column should be 'value'
  expect_true("value" %in% names(df))
})

test_that("generate_Y_df calculates Y with correct dimensions and noise", {
  # Setup minimal CFD data
  df <- generate_X_df(nind = 20, curve_type = 'cat', seed = 123)

  # Use beta_1_real_func
  beta_f <- beta_1_real_func
  beta_0 <- 5.4321

  # 1. Test without noise (NotS_ratio = 0)
  Y_no_noise <- generate_Y_df(df, curve_type = 'cat',
                              beta_real_func_or_list = beta_f,
                              beta_0_real = beta_0, NotS_ratio = 0)

  expect_equal(nrow(Y_no_noise), 20)
  expect_equal(Y_no_noise$Y_real, Y_no_noise$Y_noised)

  # 2. Test with noise ratio
  Y_noisy <- generate_Y_df(df, curve_type = 'cat',
                           beta_real_func_or_list = beta_f,
                           beta_0_real = beta_0, NotS_ratio = 0.2)

  # Y_noised should differ from Y_real
  expect_false(identical(Y_noisy$Y_real, Y_noisy$Y_noised))
})

test_that("generate_X_df_multistates respects N_states", {
  N_states <- 3 # [cite: 880]
  lambdas <- lambda_determination(N_states)
  transition_df <- transfer_probabilities(N_states)

  df_multi <- generate_X_df_multistates(nind = 5, N_states = N_states,
                                        lambdas = lambdas,
                                        transition_df = transition_df)

  # Check that all states are within the expected range
  expect_true(all(df_multi$state >= 1 & df_multi$state <= N_states))
})

test_that("Synthetic data reproducibility via seed", {
  # Two calls with same seed should yield identical dataframes
  df1 <- generate_X_df(nind = 5, curve_type = 'cat', seed = 42)
  df2 <- generate_X_df(nind = 5, curve_type = 'cat', seed = 42)

  expect_identical(df1, df2)
})

test_that("beta functions return expected types and shapes", {
  t <- seq(0, 100, 10)
  # All provided real beta functions should handle vector inputs
  expect_type(beta_1_real_func(t), "double")
  expect_length(beta_1_real_func(t), length(t))
  expect_type(beta_2_real_func(t), "double")
  expect_length(beta_2_real_func(t), length(t))
  expect_type(beta_3_real_func(t), "double")
  expect_length(beta_3_real_func(t), length(t))
})

#### State_indicator_functions ####
test_that("state_indicator correctly transforms multi-state data", {
  # 1. Create a minimal multi-state individual
  # t=0: State A, t=10: State B, t=20: State A, t=30: End
  df_multi <- data.frame(
    id = 1,
    time = c(0, 10, 20, 30),
    state = c("A", "B", "A", "A")
  )

  res <- state_indicator(df_multi, id_col = 'id', time_col = 'time')

  # Should have columns: id, time, state_A, state_B
  expect_true(all(c("state_A", "state_B") %in% names(res)))

  # Check values for state_A
  # At t=0: state is A (1), t=10: state is B (0), t=20: state is A (1)
  expect_equal(res$state_A, c(1, 0, 1, 1))
  expect_equal(res$state_B, c(0, 1, 0, 0))
})

test_that("cat_data_to_indicator preserves timing and data structure", {
  # Create data with 3 states
  df_raw <- data.frame(
    id = rep(1, 5),
    time = c(0, 5, 15, 25, 30),
    state = c(1, 2, 3, 1, 1)
  )

  # Full processing pipeline
  processed_list <- cat_data_to_indicator(df_raw, id_col = 'id',
                                            time_col = 'time')

  # 1. Check list structure
  expect_type(processed_list, "list")
  expect_length(processed_list, 3) # 3 states
  expect_named(processed_list, c("state_1", "state_2", "state_3"))

  # 2. Check for duplicate removal (remove_duplicate_states)
  # For state_2: active only at t=5.
  # In the list, it should have the entry where it starts and where it ends.
  s2_df <- processed_list$state_2
  expect_equal(nrow(s2_df), 4) # t=0 (0), t=5 (1), t=15 (0), t=30 (0)

  # 3. Check individual dataframe integrity
  expect_true(all(s2_df$state %in% c(0, 1)))
})

test_that("cat_data_to_indicator handles character states correctly", {
  df_char <- data.frame(
    id = 1,
    time = c(0, 10, 20),
    state = c("rest", "run", "run")
  )

  res <- cat_data_to_indicator(df_char)
  expect_named(res, c("state_rest", "state_run"))
})

test_that("cat_data_to_indicator stops on invalid column number", {
  # Function expects exactly 3 columns: id, time, state
  df_bad <- data.frame(id = 1, time = 0, state = 1, extra = 99)
  expect_error(cat_data_to_indicator(df_bad),
               "The dataframe should have 3 columns")
})