test_that("extract_states basic functionality with existing column", {
  data_test <- tibble::tibble(
    Id = rep("A", 6),
    Datetime = lubridate::as_datetime(0) + lubridate::hours(0:5),
    Status = c("On", "On", "Off", "Off", "Off", "On")
  ) %>% dplyr::group_by(Id)
  
  result <- extract_states(data_test, Status, epoch = "1 hour")
  
  expect_s3_class(result, "tbl_df")
  expect_equal(nrow(result), 3)
  expect_named(result, c("Id", "Status", "state.count", "epoch", "start", "end", "duration"))
  
  expect_equal(result$Status, c("Off", "On", "On"))
  expect_equal(result$state.count, c("Off 1", "On 1", "On 2")) # Renumbered within Id-Status
  expect_equal(result$epoch, rep(lubridate::duration(1, "hour"), 3))
  
  # Check start/end times (adjusted by 0.5 * epoch)
  # On: 0-1h -> start = 0h - 0.5h, end = 1h + 0.5h
  expect_equal(result$start[2], lubridate::as_datetime(0) - lubridate::duration(30, "minutes"))
  expect_equal(result$end[2], lubridate::as_datetime(0) + lubridate::hours(1) + lubridate::duration(30, "minutes"))
  expect_equal(result$duration[2], lubridate::duration(2, "hours")) # (1h + 0.5h) - (0h - 0.5h) + epoch for interval counting
  
  # Off: 2-4h -> start = 2h - 0.5h, end = 4h + 0.5h
  expect_equal(result$start[1], lubridate::as_datetime(0) + lubridate::hours(2) - lubridate::duration(30, "minutes"))
  expect_equal(result$end[1], lubridate::as_datetime(0) + lubridate::hours(4) + lubridate::duration(30, "minutes"))
  expect_equal(result$duration[1], lubridate::duration(3, "hours")) # (4h+0.5h) - (2h-0.5h)
  
  # On: 5h -> start = 5h - 0.5h, end = 5h + 0.5h
  expect_equal(result$start[3], lubridate::as_datetime(0) + lubridate::hours(5) - lubridate::duration(30, "minutes"))
  expect_equal(result$end[3], lubridate::as_datetime(0) + lubridate::hours(5) + lubridate::duration(30, "minutes"))
  expect_equal(result$duration[3], lubridate::duration(1, "hour"))
})

test_that("extract_states with State.expression", {
  data_test <- tibble::tibble(
    Datetime = lubridate::as_datetime(0) + lubridate::days(0:4),
    Value = c(5, 15, 8, 12, 3)
  )
  
  result <- extract_states(data_test, ActivityState, Value > 10, epoch = "1 day")
  expect_named(result, c("ActivityState", "state.count", "epoch", "start", "end", "duration"))
  expect_equal(nrow(result), 5) # F, T, F, T, F
  expect_equal(result$ActivityState, c(FALSE, FALSE, FALSE, TRUE, TRUE))
  expect_equal(result$state.count, c("FALSE 1", "FALSE 2", "FALSE 3", "TRUE 1", "TRUE 2"))
  
  # Check TRUE state (Value > 10 at day 1, and day 3)
  # Day 1: Value = 15 (TRUE)
  true_state1 <- result %>% dplyr::filter(ActivityState == TRUE, grepl("TRUE 1", state.count))
  expect_equal(true_state1$start, lubridate::as_datetime(0) + lubridate::days(1) - lubridate::duration(12, "hours"))
  expect_equal(true_state1$end, lubridate::as_datetime(0) + lubridate::days(1) + lubridate::duration(12, "hours"))
  expect_equal(true_state1$duration, lubridate::duration(1, "day"))
})

test_that("extract_states grouping behavior", {
  data_test <- tibble::tibble(
    Id = rep(c("G1", "G2"), each = 3),
    Datetime = lubridate::as_datetime(0) + lubridate::hours(rep(0:2, 2)),
    Status = c("A", "A", "B", "C", "C", "C")
  ) %>% dplyr::group_by(Id)
  
  # Default: group.by.state = TRUE
  res_grp_true <- extract_states(data_test, Status, epoch = "1 hour")
  expect_equal(dplyr::group_vars(res_grp_true), c("Id", "Status"))
  expect_equal(nrow(res_grp_true), 3) # (G1,A), (G1,B), (G2,C)
  
  # group.by.state = FALSE
  res_grp_false <- extract_states(data_test, Status, epoch = "1 hour", group.by.state = FALSE)
  expect_equal(dplyr::group_vars(res_grp_false), "Id")
  expect_equal(nrow(res_grp_false), 3)
  # Check order within Id group
  g1_ordered <- res_grp_false %>% dplyr::filter(Id == "G1")
  expect_equal(g1_ordered$Status, c("A", "B")) # Ordered by start time
})

test_that("extract_states with handle.gaps", {
  data_gappy <- tibble::tibble(
    Datetime = lubridate::as_datetime(0) + lubridate::hours(c(0, 1, 3)), # Gap at 2h
    Value = c(100, 200, 50)
  )
  # With handle.gaps = FALSE (default)
  res_no_handle <- extract_states(data_gappy, State, Value > 75, epoch = "1 hour")
  # States: T, T, F. Should be 2 state episodes (T, F)
  expect_equal(nrow(res_no_handle), 2)
  expect_equal(res_no_handle$State, c(FALSE, TRUE))
  
  # With handle.gaps = TRUE
  # Data becomes: 0h(100,T), 1h(200,T), 2h(NA,NA), 3h(50,F)
  # States from Value > 75: T, T, NA, F.
  # consecutive_id(c(T,T,NA,F)) -> 1,1,2,3
  # State column will have NA where Value is NA (from gap_handler)
  # So, expression Value > 75 yields NA for the gap-filled row.
  # State: T, T, NA, F.
  # Episodes: T (0-1h), NA (2h), F (3h)
  res_handle <- extract_states(data_gappy, State, Value > 75, epoch = "1 hour", handle.gaps = TRUE)
  expect_equal(nrow(res_handle), 3)
  expect_true(all(res_handle$State %in% c(TRUE, FALSE, NA))) # One state is NA
  na_state_row <- res_handle %>% dplyr::filter(is.na(State))
  expect_equal(na_state_row$start, lubridate::as_datetime(0) + lubridate::hours(2) - lubridate::duration(30, "min"))
})

test_that("extract_states with different epoch settings", {
  data_test <- tibble::tibble(
    Datetime = lubridate::as_datetime(0) + lubridate::days(0:1),
    Status = c("X", "X")
  )
  # Default epoch ("dominant.epoch" should be 1 day)
  res_dom <- extract_states(data_test, Status) # epoch default is dominant.epoch
  expect_equal(res_dom$epoch, lubridate::duration(1, "day"))
  expect_equal(res_dom$duration, lubridate::duration(2, "days")) # (1d+0.5d) - (0d-0.5d)
  
  # String epoch
  res_str <- extract_states(data_test, Status, epoch = "1 day")
  expect_equal(res_str$epoch, lubridate::duration(1, "day"))
  
  # Duration object epoch
  res_dur_obj <- extract_states(data_test, Status, epoch = lubridate::duration(1, "day"))
  expect_equal(res_dur_obj$epoch, lubridate::duration(1, "day"))
})

test_that("extract_states with drop.empty.groups", {
  data_test_deg <- tibble::tibble(
    Category = factor(c("P1", "P1", "P2"), levels = c("P1", "P2", "P3")), # P3 is empty
    Datetime = lubridate::as_datetime(0) + lubridate::hours(0:2),
    Value = c(1,1,2)
  ) %>% dplyr::group_by(Category)
  
  # Default drop.empty.groups = TRUE
  res_drop_true <- extract_states(data_test_deg, MyState, Value > 0, epoch = "1 hour")
  expect_equal(length(unique(res_drop_true$Category)), 2) # P1, P2
  expect_false("P3" %in% res_drop_true$Category)
  
  # drop.empty.groups = FALSE
  expect_warning(
    res_drop_false <- extract_states(data_test_deg, MyState, Value > 0, epoch = "1 hour", drop.empty.groups = FALSE)
  )
  # When P3 is processed, Value > 0 will be on empty data. `dplyr::consecutive_id` on empty is empty.
  expect_equal(length(unique(res_drop_false$Category)), 3)
  expect_true("P3" %in% res_drop_false$Category)
  
  # Test with a group that becomes empty AFTER state extraction (all NA state)
  data_test_deg2 <- tibble::tibble(
    Category = c("P1", "P1", "P2", "P2"),
    Datetime = lubridate::as_datetime(0) + lubridate::hours(0:3),
    Value = c(1, 1, NA, NA) # P2 will have all NA states for Value > 0
  ) %>% dplyr::group_by(Category)
  # State for P2 will be NA.
  res_deg2_true <- extract_states(data_test_deg2, MyState, Value > 0, epoch = "1 hour", drop.empty.groups = TRUE)
  expect_equal(nrow(res_deg2_true), 2) # (P1, TRUE=1), (P2, NA=1)
  expect_true("P2" %in% res_deg2_true$Category) # P2 has an NA state.
  
  res_deg2_false <- extract_states(data_test_deg2, MyState, Value > 0, epoch = "1 hour", drop.empty.groups = FALSE)
  expect_equal(nrow(res_deg2_false), 2)
  expect_true("P2" %in% res_deg2_false$Category)
})

test_that("extract_states with single observation group returns NA epoch/duration", {
  data_single_obs_group <- tibble::tibble(
    Id = c("X", "Y", "Y"), # Group X has one obs
    Datetime = lubridate::as_datetime(0) + lubridate::days(0:2),
    Status = c("On", "Off", "Off")
  ) %>% dplyr::group_by(Id)
  
  result <- extract_states(data_single_obs_group, Status)
  row_X <- result %>% dplyr::filter(Id == "X")
  expect_equal(nrow(row_X), 1)
  expect_true(is.na(row_X$epoch))
  # Duration for X: (day0 + NA) - (day0 - NA) = NA
  expect_true(is.na(row_X$duration) || row_X$duration == lubridate::duration(0)) # depends on how NA*0.5 behaves. lubridate::as.duration(NA) is NA s
  expect_true(is.na(row_X$start))
  expect_true(is.na(row_X$end))
  
  
  row_Y <- result %>% dplyr::filter(Id == "Y")
  expect_equal(nrow(row_Y), 1) # One state "Off" for Y
  expect_false(is.na(row_Y$epoch)) # Epoch should be 1 day for Y
  expect_false(is.na(row_Y$duration))
})


# Tests for add_states

test_that("add_states basic functionality", {
  main_data <- tibble::tibble(
    BirdID = "B1",
    Timestamp = lubridate::as_datetime(0) + lubridate::hours(0:3),
    Temp = c(20,21,20,19)
  ) %>% dplyr::group_by(BirdID)
  
  states_data <- tibble::tibble(
    BirdID = "B1",
    start = lubridate::as_datetime(0) + lubridate::hours(c(0, 2)),
    end = lubridate::as_datetime(0) + lubridate::hours(c(1, 3)),
    Behavior = c("Fly", "Eat"),
    Intensity = c(5, 3)
  ) # No group needed if main has only one group that matches
  
  result <- add_states(main_data, states_data, Datetime.colname = Timestamp)
  
  expect_s3_class(result, "tbl_df")
  expect_equal(nrow(result), 4) # Same as main_data
  expect_true(all(c("Behavior", "Intensity") %in% names(result)))
  expect_false("start" %in% names(result))
  expect_false("end" %in% names(result))
  
  # Check values:
  # Timestamp 0h: Behavior=Fly, Intensity=5
  # Timestamp 1h: Behavior=Fly, Intensity=5
  # Timestamp 2h: Behavior=Eat, Intensity=3
  # Timestamp 3h: Behavior=Eat, Intensity=3
  expect_equal(result$Behavior, c("Fly", "Fly", "Eat", "Eat"))
  expect_equal(result$Intensity, c(5, 5, 3, 3))
})

test_that("add_states with multiple groups and NAs for non-matching times", {
  main_data <- tibble::tibble(
    Id = rep(c("A", "B"), each = 3),
    Datetime = lubridate::as_datetime(0) + lubridate::hours(rep(0:2, 2)),
    Value = 1:6
  ) %>% dplyr::group_by(Id)
  
  states_data <- tibble::tibble(
    Id = c("A", "B"),
    start = lubridate::as_datetime(0) + lubridate::hours(c(1, 0)), # A: 1-1, B: 0-0
    end = lubridate::as_datetime(0) + lubridate::hours(c(1, 0)),
    StateInfo = c("StateA_Mid", "StateB_Start")
  ) %>% dplyr::group_by(Id) # Match grouping
  
  result <- add_states(main_data, states_data)
  # For Id A: Datetime 0,1,2. State defined for 1h.
  # For Id B: Datetime 0,1,2. State defined for 0h.
  expect_equal(result$StateInfo[result$Id == "A"], c(NA, "StateA_Mid", NA))
  expect_equal(result$StateInfo[result$Id == "B"], c("StateB_Start", NA, NA))
})

test_that("add_states with custom column names and leave.out", {
  main_data <- tibble::tibble(
    DT = lubridate::as_datetime(0) + lubridate::hours(0:1)
  )
  states_data <- tibble::tibble(
    From = lubridate::as_datetime(0) + lubridate::hours(0),
    To = lubridate::as_datetime(0) + lubridate::hours(0),
    State = "S1",
    ExtraCol = "Keep",
    RemoveThis = "Remove"
  )
  result <- add_states(main_data, states_data,
                       Datetime.colname = DT,
                       start.colname = From, end.colname = To,
                       leave.out = "RemoveThis")
  expect_true("State" %in% names(result))
  expect_true("ExtraCol" %in% names(result))
  expect_false("RemoveThis" %in% names(result))
  expect_false("From" %in% names(result))
  expect_false("To" %in% names(result))
})

test_that("add_states with force.tz", {
  main_data_utc <- tibble::tibble(
    Timestamp = lubridate::as_datetime("2023-01-01 05:30:00", tz = "UTC") # 5:30 UTC
  )
  states_data_nyc <- tibble::tibble(
    start = lubridate::as_datetime("2023-01-01 00:00:00", tz = "America/New_York"), # 00:00 NYC = 05:00 UTC
    end = lubridate::as_datetime("2023-01-01 01:00:00", tz = "America/New_York"),   # 01:00 NYC = 06:00 UTC
    Event = "E_NYC"
  )
  
  # force.tz = FALSE (default). Timestamp 05:30 UTC. State NYC 00:00-01:00 (UTC 05:00-06:00). Match.
  res_no_force <- add_states(main_data_utc, states_data_nyc, Datetime.colname = Timestamp)
  expect_equal(res_no_force$Event, "E_NYC")
  
  # force.tz = TRUE. Start/end of states_data_nyc converted to UTC *as if they were UTC originally*.
  # So 00:00 NYC becomes 00:00 UTC. 01:00 NYC becomes 01:00 UTC. No match for 05:30 UTC.
  # This is how lubridate::with_tz works, which is used if force.tz=T.
  # The description says "forced to the same time zone ... using lubridate::force_tz()" - this is for `with_tz`.
  # `force_tz` changes the tz attribute without changing clock time. `with_tz` changes clock time to match new tz.
  # The code uses `with_tz`.
  # Original state: 2023-01-01 00:00:00 EST (UTC-5) to 2023-01-01 01:00:00 EST.
  # `with_tz(..., tzone = "UTC")` converts these to:
  # 2023-01-01 05:00:00 UTC to 2023-01-01 06:00:00 UTC. This is the same as no_force.
  # My understanding of the example was confused. with_tz correctly converts.
  # So, force.tz=TRUE should behave identically to force.tz=FALSE if timezones are handled correctly by join.
  # The purpose of force.tz=T using `with_tz` is to ensure start/end are in the same TZ as dataset$Datetime for the join logic.
  # If join correctly handles differing timezones (it should by converting to common internal representation),
  # then force.tz might only matter if there's an issue with that or for clarity.
  # Let's re-verify:
  # main_data_utc$Timestamp is 2023-01-01 05:30:00 UTC.
  # states_data_nyc start is 2023-01-01 00:00:00 America/New_York.
  # states_data_nyc end is 2023-01-01 01:00:00 America/New_York.
  # If force.tz=TRUE, states_data_nyc start becomes `with_tz(start, "UTC")` -> 2023-01-01 05:00:00 UTC.
  # states_data_nyc end becomes `with_tz(end, "UTC")` -> 2023-01-01 06:00:00 UTC.
  # Join condition: 05:30:00 UTC >= 05:00:00 UTC AND 05:30:00 UTC <= 06:00:00 UTC. This is TRUE.
  # So result should be "E_NYC".
  res_force <- add_states(main_data_utc, states_data_nyc, Datetime.colname = Timestamp, force.tz = TRUE)
  expect_equal(res_force$Event, "E_NYC")
  
  # What if main_data is NYC and states_data is UTC?
  main_data_nyc <- tibble::tibble(
    Timestamp = lubridate::as_datetime("2023-01-01 00:30:00", tz = "America/New_York") # 00:30 NYC
  )
  states_data_utc <- tibble::tibble(
    start = lubridate::as_datetime("2023-01-01 05:00:00", tz = "UTC"), # 05:00 UTC
    end = lubridate::as_datetime("2023-01-01 06:00:00", tz = "UTC"),   # 06:00 UTC
    Event = "E_UTC"
  )
  # force.tz=F: main is 00:30 NYC. states is 05:00-06:00 UTC.
  # Join compares these. 00:30 NYC is 05:30 UTC. Match.
  res_nf2 <- add_states(main_data_nyc, states_data_utc, Datetime.colname = Timestamp, force.tz = FALSE)
  expect_equal(res_nf2$Event, "E_UTC")
  
  # force.tz=T: states_data_utc start/end (already UTC) converted to "America/New_York"
  # start: 05:00 UTC -> `with_tz(start, "America/New_York")` -> 00:00 NYC
  # end:   06:00 UTC -> `with_tz(end, "America/New_York")` -> 01:00 NYC
  # Join condition: 00:30 NYC >= 00:00 NYC AND 00:30 NYC <= 01:00 NYC. TRUE.
  res_f2 <- add_states(main_data_nyc, states_data_utc, Datetime.colname = Timestamp, force.tz = TRUE)
  expect_equal(res_f2$Event, "E_UTC")
  # The force.tz logic seems to ensure all times are in a consistent TZ before comparison, which is good practice.
})

test_that("add_states error handling for inputs", {
  df <- tibble::tibble(Datetime = Sys.time())
  states_df <- tibble::tibble(start=Sys.time(), end=Sys.time()+1)
  
  expect_error(add_states(list(), states_df), "dataset is not a dataframe")
  expect_error(add_states(df, list()), "States.dataset is not a dataframe")
  expect_error(add_states(df, states_df, Datetime.colname = BadName), "Datetime.colname must be part of the dataset")
  expect_error(add_states(df, states_df, start.colname = BadName), "start.colname must be part of the States.dataset")
  expect_error(add_states(df, states_df, end.colname = BadName), "end.colname must be part of the States.dataset")
  expect_error(add_states(df, states_df, leave.out = 123), "leave.out must be a character vector")
  expect_error(add_states(df, states_df, force.tz = "TRUE"), "force.tz must be a logical")
  
  df_bad_dt <- tibble::tibble(Datetime = "2023-01-01")
  expect_error(add_states(df_bad_dt, states_df), "Datetime.colname must be a POSIXct")
  states_df_bad_start <- tibble::tibble(start="2023-01-01", end=Sys.time())
  expect_error(add_states(df, states_df_bad_start), "start.colname must be a POSIXct")
  states_df_bad_end <- tibble::tibble(start=Sys.time(), end="2023-01-01")
  expect_error(add_states(df, states_df_bad_end), "end.colname must be a POSIXct")
  
  # Grouping variables check
  df_grp <- tibble::tibble(Id="A", Datetime=Sys.time()) %>% dplyr::group_by(Id)
  states_df_no_grp <- tibble::tibble(start=Sys.time(), end=Sys.time()+1, State="S")
  expect_error(add_states(df_grp, states_df_no_grp), "The grouping variables in the dataset must be present in the States.dataset")
})

test_that("add_states handles column name conflicts (suffixing)", {
  main_data <- tibble::tibble(Id = "A", Timestamp = Sys.time(), Value = 10) %>% dplyr::group_by(Id)
  states_data <- tibble::tibble(Id = "A", start = Sys.time()-10, end = Sys.time()+10, Value = 20, State = "S1") %>% dplyr::group_by(Id)
  
  result <- add_states(main_data, states_data, Datetime.colname = Timestamp)
  expect_true("Value.x" %in% names(result)) # From main_data
  expect_true("Value.y" %in% names(result)) # From states_data
  expect_equal(result$Value.x, 10)
  expect_equal(result$Value.y, 20)
  expect_equal(result$State, "S1")
})

test_that("add_states where a Datetime falls into multiple state intervals", {
  # This tests how dplyr::left_join handles multiple matches with non-equi join conditions
  # It should duplicate the row from the left table (dataset) for each match.
  main_data <- tibble::tibble(
    Id = "M1",
    Datetime = lubridate::as_datetime(0) + lubridate::hours(1) # Single point: 1h
  ) %>% dplyr::group_by(Id)
  
  states_data <- tibble::tibble(
    Id = "M1",
    start = lubridate::as_datetime(0) + lubridate::hours(c(0, 0)), # Both states cover 1h
    end   = lubridate::as_datetime(0) + lubridate::hours(c(2, 3)),
    StateName = c("State_A_0_2", "State_B_0_3"),
    Detail    = c("DetailA", "DetailB")
  ) %>% dplyr::group_by(Id)
  
  result <- add_states(main_data, states_data)
  expect_equal(nrow(result), 2) # main_data row duplicated
  expect_equal(result$Datetime, rep(lubridate::as_datetime(0) + lubridate::hours(1), 2))
  expect_equal(sort(result$StateName), sort(c("State_A_0_2", "State_B_0_3")))
  expect_equal(sort(result$Detail), sort(c("DetailA", "DetailB")))
})