# tests/testthat/test-pbs.R
# PBS calculation and plotting tests

# --- plot_pbs using sample_pbs ---

test_that("plot_pbs produces expected layers with sample_pbs", {
  data("sample_pbs")

  p <- plot_pbs(data = sample_pbs)
  expect_gte(length(p$layers), 2) # At least geom_line and geom_point
})

test_that("plot_pbs handles empty data frame", {
  empty_df <- data.frame(
    activity_date = lubridate::as_date(character(0)),
    time_seconds = numeric(0),
    distance = numeric(0),
    is_pb = logical(0)
  )
  expect_error(plot_pbs(data = empty_df), "No PB data available to plot")
})

test_that("plot_pbs handles add_trend_line argument", {
  data("sample_pbs")

  p_trend <- plot_pbs(data = sample_pbs, add_trend_line = TRUE)
  p_no_trend <- plot_pbs(data = sample_pbs, add_trend_line = FALSE)

  has_smooth <- function(p) {
    sum(sapply(p$layers, function(l) inherits(l$geom, "GeomSmooth")))
  }
  expect_gt(has_smooth(p_trend), 0)
  expect_equal(has_smooth(p_no_trend), 0)
})

test_that("plot_pbs facets for multiple distances", {
  data("sample_pbs")

  if (length(unique(sample_pbs$distance)) > 1) {
    p <- plot_pbs(data = sample_pbs)
    expect_s3_class(p$facet, "FacetWrap")
    expect_equal(p$facet$params$free$y, TRUE,
      info = "Faceted PB plot should use free_y scales"
    )
  }
})

# ============================================================
# Numerical Value Validation
# ============================================================

test_that("sample_pbs has realistic pace ordering", {
  data("sample_pbs")

  pace_by_dist <- sample_pbs |>
    dplyr::group_by(distance) |>
    dplyr::summarise(
      avg_pace_per_km = mean(time_seconds / (distance / 1000)),
      .groups = "drop"
    ) |>
    dplyr::arrange(distance)

  for (i in 2:nrow(pace_by_dist)) {
    expect_true(
      pace_by_dist$avg_pace_per_km[i] > pace_by_dist$avg_pace_per_km[i - 1],
      info = sprintf(
        "Pace for %gm (%.1f s/km) should be slower than %gm (%.1f s/km)",
        pace_by_dist$distance[i], pace_by_dist$avg_pace_per_km[i],
        pace_by_dist$distance[i - 1], pace_by_dist$avg_pace_per_km[i - 1]
      )
    )
  }
})

test_that("sample_pbs individual data points satisfy pace ordering", {
  data("sample_pbs")

  multi_dist_dates <- sample_pbs |>
    dplyr::group_by(activity_date) |>
    dplyr::filter(dplyr::n_distinct(distance) > 1) |>
    dplyr::ungroup()

  if (nrow(multi_dist_dates) > 0) {
    per_point <- multi_dist_dates |>
      dplyr::mutate(pace_per_km = time_seconds / (distance / 1000)) |>
      dplyr::group_by(activity_date) |>
      dplyr::arrange(distance) |>
      dplyr::mutate(pace_ok = pace_per_km == cummax(pace_per_km)) |>
      dplyr::ungroup()

    expect_true(
      all(per_point$pace_ok),
      info = "Every data point must satisfy pace ordering"
    )
  }
})

test_that("sample_pbs PB flags are consistent with cumulative minima", {
  data("sample_pbs")

  pbs_check <- sample_pbs |>
    dplyr::group_by(distance) |>
    dplyr::arrange(activity_date) |>
    dplyr::mutate(
      expected_cummin = cummin(time_seconds),
      expected_is_pb = time_seconds == expected_cummin
    ) |>
    dplyr::ungroup()

  expect_equal(pbs_check$is_pb, pbs_check$expected_is_pb,
    info = "is_pb flags should match cumulative minimum of time_seconds"
  )
})

# ============================================================
# calculate_pbs Input Validation & Edge Cases
# ============================================================

test_that("calculate_pbs validates missing and invalid input", {
  expect_error(calculate_pbs(), "activities_data.*must be provided")
  expect_error(calculate_pbs(NULL), "activities_data.*must be provided")
  expect_error(calculate_pbs("not_a_df"), "must be a data frame")

  df_no_filename <- data.frame(date = Sys.Date(), type = "Run", distance = 5000)
  expect_error(calculate_pbs(df_no_filename, export_dir = tempdir()), "filename")
})

test_that("calculate_pbs returns empty with warning when no activities match", {
  activities <- load_extdata_activities()

  # Date range far in the future - no activities should match
  expect_warning(
    result <- calculate_pbs(activities,
      export_dir = tempdir(),
      start_date = "2099-01-01",
      end_date = "2099-01-31"
    ),
    "No activities meet the criteria"
  )
  expect_equal(nrow(result), 0)
})

test_that("calculate_pbs handles activity_type = NULL (no type filter)", {
  activities <- load_extdata_activities()

  result <- suppressWarnings(calculate_pbs(
    activities_data = activities,
    export_dir = extdata_dir,
    activity_type = NULL,
    distances_m = c(1000)
  ))
  expect_s3_class(result, "data.frame")
})

test_that("calculate_pbs handles invalid date arguments gracefully", {
  activities <- load_extdata_activities()

  result <- suppressWarnings(calculate_pbs(
    activities_data = activities,
    export_dir = extdata_dir,
    activity_type = "Run",
    end_date = "not-a-date",
    distances_m = c(1000)
  ))
  expect_s3_class(result, "data.frame")
})

test_that("calculate_pbs returns athlytics_pbs class with params", {
  activities <- load_extdata_activities()

  result <- suppressWarnings(calculate_pbs(
    activities_data = activities,
    export_dir = extdata_dir,
    activity_type = "Run",
    distances_m = c(1000, 5000)
  ))
  expect_s3_class(result, "data.frame")
  if (nrow(result) > 0) {
    expect_s3_class(result, "athlytics_pbs")
    expect_true(!is.null(attr(result, "params")))
    expect_contains(
      names(result),
      c("distance_label", "is_pb", "cumulative_pb_seconds")
    )
  }
})

# ============================================================
# find_best_effort Internal Tests
# ============================================================

test_that("find_best_effort returns NULL for insufficient data", {
  short_stream <- data.frame(
    time = as.POSIXct("2024-01-01") + 0:5,
    distance = 0:5
  )
  expect_null(Athlytics:::find_best_effort(short_stream, 1000))

  # Activity covers 500m but target is 1000m
  n <- 100
  stream_short_dist <- data.frame(
    time = as.POSIXct("2024-01-01") + 0:(n - 1),
    distance = seq(0, 500, length.out = n)
  )
  expect_null(Athlytics:::find_best_effort(stream_short_dist, 1000))
})

test_that("find_best_effort finds correct best effort in known data", {
  n <- 600
  # Constant 3 m/s for 600 seconds = 1800m total
  stream <- data.frame(
    time = as.POSIXct("2024-01-01 00:00:00") + 0:(n - 1),
    distance = seq(0, by = 3, length.out = n)
  )
  # Target 1000m at 3m/s should take ~333s
  result <- Athlytics:::find_best_effort(stream, 1000)
  expect_false(is.null(result))
  expect_true(abs(result$time_seconds - 333) < 5,
    info = sprintf("1km at 3m/s should take ~333s, got %.1f", result$time_seconds)
  )
})

# --- Integration tests with inst/extdata ---

test_that("calculate_pbs works with inst/extdata TCX files", {
  activities <- load_local_activities(extdata_csv)
  pbs_result <- suppressWarnings(calculate_pbs(
    activities_data = activities,
    export_dir = extdata_dir,
    activity_type = "Run",
    distances_m = c(1000, 3000, 5000)
  ))
  expect_s3_class(pbs_result, "data.frame")
  expect_contains(
    names(pbs_result),
    c("activity_id", "activity_date", "distance", "time_seconds", "is_pb")
  )
})

test_that("plot_pbs custom title and caption work", {
  data("sample_pbs")

  p <- plot_pbs(
    data = sample_pbs,
    title = "Custom Title",
    subtitle = "Custom Sub",
    caption = "Test caption"
  )
  expect_equal(p$labels$title, "Custom Title")
  expect_equal(p$labels$subtitle, "Custom Sub")
  expect_equal(p$labels$caption, "Test caption")
})

# ============================================================
# Regression tests — bug-fix guards grouped by version
# ============================================================

# --- zip support (v1.0.4) -----------------------------------------------

test_that("calculate_pbs accepts zip export_dir (regression)", {
  zip_path <- make_extdata_zip()
  on.exit(unlink(zip_path), add = TRUE)

  activities <- suppressWarnings(load_local_activities(zip_path))
  result <- suppressWarnings(calculate_pbs(
    activities_data = activities,
    export_dir = zip_path,
    activity_type = "Run",
    distances_m = c(1000)
  ))

  expect_s3_class(result, "data.frame")
  # Should not error; actual content may be empty depending on example files,
  # but the gate must not reject zip.
})

test_that("calculate_pbs rejects non-zip non-dir export_dir with a clear error", {
  expect_error(
    calculate_pbs(
      activities_data = data.frame(
        filename = character(),
        date = as.Date(character()),
        type = character(),
        distance = numeric()
      ),
      export_dir = "nonexistent_path_xyz"
    ),
    "directory or a .zip file"
  )
})

# --- Custom distance labels preserved (v1.0.4) --------------------------

test_that("calculate_pbs preserves custom distance labels like 3000m (regression)", {
  tmp_dir <- tempfile(pattern = "athlytics_pb_")
  dir.create(file.path(tmp_dir, "activities"), recursive = TRUE, showWarnings = FALSE)
  on.exit(unlink(tmp_dir, recursive = TRUE), add = TRUE)

  # 6km synthetic run so 1k, 3k, and 5k PBs all fit inside.
  tcx_path <- file.path(tmp_dir, "activities", "synthetic.tcx")
  write_synthetic_tcx(tcx_path, total_m = 6000, pace_s_per_m = 0.3)

  activities <- data.frame(
    id = 1L,
    name = "Synthetic run",
    type = "Run",
    date = as.Date("2025-01-01"),
    start_date_local = as.POSIXct("2025-01-01 00:00:00", tz = "UTC"),
    distance = 6000,
    moving_time = 1800L,
    elapsed_time = 1800L,
    average_heartrate = 150,
    filename = "activities/synthetic.tcx",
    stringsAsFactors = FALSE
  )

  result <- suppressWarnings(calculate_pbs(
    activities_data = activities,
    export_dir = tmp_dir,
    activity_type = "Run",
    distances_m = c(1000, 3000, 5000),
    start_date = as.Date("2024-01-01"),
    end_date = as.Date("2025-12-31")
  ))

  expect_s3_class(result, "data.frame")
  expect_gt(nrow(result), 0)

  # distance_label must not contain NA values
  expect_false(any(is.na(as.character(result$distance_label))),
    info = paste0(
      "distance_label contains NA; unique values: ",
      paste(unique(as.character(result$distance_label)), collapse = ", ")
    )
  )

  # 3000m row label must be "3000m", not NA
  rows_3000 <- result[result$distance == 3000, , drop = FALSE]
  expect_gt(nrow(rows_3000), 0)
  label_3000 <- as.character(rows_3000$distance_label)
  expect_true(all(label_3000 == "3000m"),
    info = sprintf("3000m row label expected '3000m', got: %s",
      paste(label_3000, collapse = ", "))
  )

  # factor levels must include "3000m"
  expect_true("3000m" %in% levels(result$distance_label))
})

# --- find_best_effort correctness & performance (v1.0.4) ----------------

test_that("find_best_effort interpolates target distance crossings (regression)", {
  # Stream sampled at 1 Hz, constant 3 m/s. Target = 1000 m → expected
  # elapsed time = 1000 / 3 ≈ 333.333 s. Nearest-row lookup would have
  # reported 334 s; linear interpolation recovers the fractional-second answer.
  d <- seq(0, 2000, by = 3)
  t <- seq(0, by = 1, length.out = length(d))
  stream <- data.frame(distance = d, time = t)

  eff <- Athlytics:::find_best_effort(stream, target_distance = 1000)

  expect_false(is.null(eff))
  expect_equal(eff$time_seconds, 1000 / 3, tolerance = 1e-6)
})

test_that("find_best_effort considers end-anchored interpolated starts", {
  stream <- data.frame(
    distance = 0:20,
    time = c(0:10, 10 + ((11:20) - 10) / 10)
  )

  eff <- Athlytics:::find_best_effort(stream, target_distance = 15.5)

  expect_false(is.null(eff))
  expect_equal(eff$time_seconds, 6.5, tolerance = 1e-9)
  expect_equal(eff$start_distance, 4.5, tolerance = 1e-9)
  expect_equal(eff$end_distance, 20, tolerance = 1e-9)
})

test_that("find_best_effort excludes distance plateaus from moving-time duration", {
  stream <- data.frame(
    distance = c(0:5, rep(5, 5), 6:20),
    time = 0:25
  )

  eff <- Athlytics:::find_best_effort(stream, target_distance = 15)

  expect_false(is.null(eff))
  expect_equal(eff$time_seconds, 15, tolerance = 1e-9)
})

test_that("GPS bounce-backs returning to the running max do not exclude moving time", {
  stream <- data.frame(
    distance = c(0:10, 9, 10, 11:25),
    time = c(seq(0, 100, by = 10), 110, 120, 121:135)
  )

  eff <- Athlytics:::find_best_effort(stream, target_distance = 15)

  expect_false(is.null(eff))
  expect_equal(eff$start_distance, 10, tolerance = 1e-9)
  expect_equal(eff$end_distance, 25, tolerance = 1e-9)
  expect_equal(eff$time_seconds, 35, tolerance = 1e-9)
})

test_that("find_best_effort drops non-monotonic distance samples (regression)", {
  # Inject a GPS bounce-back that previously manufactured fake sub-second
  # 100 m segments via pure nearest-row lookup. After the fix, the monotonic-
  # distance filter discards the bounce and returns the true ~33 s time.
  d_clean <- seq(0, 600, by = 3)
  t_clean <- seq(0, by = 1, length.out = length(d_clean))
  stream <- data.frame(
    distance = c(d_clean[1:50], 10, 20, 30, d_clean[51:length(d_clean)]),
    time = c(t_clean[1:50], t_clean[50] + 0.25,
      t_clean[50] + 0.5, t_clean[50] + 0.75,
      t_clean[51:length(d_clean)])
  )

  eff <- Athlytics:::find_best_effort(stream, target_distance = 100)

  expect_false(is.null(eff))
  # 100 m at 3 m/s = ~33.33 s; must not drop below 20 s due to GPS bounce.
  expect_gt(eff$time_seconds, 25)
})

test_that("find_best_effort remains fast on large streams (regression)", {
  # A 10-hour run (36000 rows) used to be O(n^2) in the number of start
  # indices. The bounded candidate sweep should remain comfortably below
  # a few seconds even in a cold R session.
  n <- 36000
  d <- seq(0, by = 3, length.out = n)
  t <- seq(0, by = 1, length.out = n)
  stream <- data.frame(distance = d, time = t)

  timing <- system.time({
    eff <- Athlytics:::find_best_effort(stream, target_distance = 5000)
  })

  expect_false(is.null(eff))
  # Generous bound to avoid flaky CI failures while still catching an
  # O(n^2) regression (the old algorithm took tens of seconds for n = 36000).
  expect_lt(as.numeric(timing["elapsed"]), 5)
})

# --- time_basis output column (v1.0.5) ----------------------------------

test_that("calculate_pbs output contract includes time_basis column", {
  # Empty-frame path still exercises the output contract shape.
  set.seed(6)
  empty <- calculate_pbs(
    activities_data = data.frame(
      id = integer(0),
      date = as.Date(character(0)),
      type = character(0),
      filename = character(0),
      distance = numeric(0),
      moving_time = numeric(0),
      stringsAsFactors = FALSE
    ),
    export_dir = tempdir(),
    distances_m = c(1000)
  ) |> suppressWarnings()

  expect_true("time_basis" %in% colnames(empty))
})

test_that("calculate_pbs no-best-efforts empty path keeps output columns", {
  activities <- data.frame(
    id = 1,
    name = "Missing detailed stream",
    date = as.Date("2024-01-01"),
    start_date_local = lubridate::as_datetime("2024-01-01 08:00:00"),
    type = "Run",
    filename = "missing_activity.tcx",
    distance = 5000,
    stringsAsFactors = FALSE
  )

  warnings <- character()
  result <- withCallingHandlers(
    calculate_pbs(
      activities_data = activities,
      export_dir = tempdir(),
      start_date = "2024-01-01",
      end_date = "2024-01-02",
      distances_m = c(1000)
    ),
    warning = function(w) {
      warnings <<- c(warnings, conditionMessage(w))
      invokeRestart("muffleWarning")
    }
  )

  expect_true(any(grepl("No best efforts could be calculated", warnings)))
  expect_equal(nrow(result), 0)
  expect_named(result, c(
    "activity_id",
    "activity_date",
    "distance",
    "elapsed_time",
    "moving_time",
    "time_seconds",
    "cumulative_pb_seconds",
    "is_pb",
    "distance_label",
    "time_period",
    "time_basis"
  ))
  expect_true(all(c("elapsed_time", "moving_time", "time_basis") %in% colnames(result)))
})

test_that("calculate_pbs accepts documented minimum activity columns structurally", {
  activities <- data.frame(
    date = as.Date("2024-01-01"),
    type = "Run",
    filename = "missing_activity.tcx",
    distance = 5000,
    stringsAsFactors = FALSE
  )

  expect_silent({
    result <- withCallingHandlers(
      calculate_pbs(
        activities_data = activities,
        export_dir = tempdir(),
        start_date = "2024-01-01",
        end_date = "2024-01-02",
        distances_m = c(1000)
      ),
      warning = function(w) invokeRestart("muffleWarning")
    )
  })
  expect_equal(nrow(result), 0)
  expect_named(result, c(
    "activity_id", "activity_date", "distance",
    "elapsed_time", "moving_time", "time_seconds",
    "cumulative_pb_seconds", "is_pb",
    "distance_label", "time_period", "time_basis"
  ))
})

# --- End-to-end PB extraction from a zip export_dir (v1.0.5) -----------

test_that("calculate_pbs reads streams from a zip export and emits v1.0.5 output columns (regression)", {
  # Build a self-contained .zip export so this test covers the full
  # read path (zip resolver in parse_activity_file() + zip-aware gate
  # in calculate_pbs()) rather than the unpacked-dir branch.
  staging <- tempfile(pattern = "athlytics_pb_zip_src_")
  dir.create(file.path(staging, "activities"), recursive = TRUE, showWarnings = FALSE)
  on.exit(unlink(staging, recursive = TRUE), add = TRUE)

  # 6 km synthetic run at 3 m/s → 1k / 3k / 5k PBs all fit inside.
  tcx_rel <- file.path("activities", "synthetic.tcx")
  write_synthetic_tcx(file.path(staging, tcx_rel),
    total_m = 6000, pace_s_per_m = 1 / 3, step_s = 1
  )

  # Write an activities.csv matching Strava's English-export schema so
  # load_local_activities() can consume it. The column set mirrors the
  # one shipped in inst/extdata/activities.csv (same 18 columns). Strava
  # stores Distance in METRES (not km), so a 6 km run is 6000.
  activities_csv <- data.frame(
    `Activity ID` = 1L,
    `Activity Date` = "Jan 1, 2025, 12:00:00 AM",
    `Activity Name` = "Synthetic PB run",
    `Activity Type` = "Run",
    `Elapsed Time` = 1800L,
    `Moving Time` = 1800L,
    `Distance` = 6000,
    `Filename` = tcx_rel,
    `Elevation Gain` = 0,
    `Elevation Loss` = 0,
    `Average Heart Rate` = 150,
    `Max Heart Rate` = 160,
    `Average Speed` = 3.0,
    `Max Speed` = 3.0,
    `Average Watts` = NA_real_,
    `Max Watts` = NA_real_,
    `Calories` = 500,
    `Relative Effort` = 50,
    check.names = FALSE,
    stringsAsFactors = FALSE
  )
  utils::write.csv(activities_csv,
    file = file.path(staging, "activities.csv"),
    row.names = FALSE
  )

  # Package staging/ as a .zip whose internal layout is
  # activities.csv + activities/synthetic.tcx. Use `-r` so the TCX
  # file *inside* the activities/ directory is actually included
  # (without -r, utils::zip writes the empty folder entry only).
  zip_path <- tempfile(fileext = ".zip")
  on.exit(unlink(zip_path), add = TRUE)
  old_wd <- getwd()
  setwd(staging)
  utils::zip(zip_path, files = c("activities.csv", "activities"), flags = "-rq")
  setwd(old_wd)

  activities <- suppressWarnings(load_local_activities(zip_path))
  skip_if(nrow(activities) == 0,
    "load_local_activities could not read synthetic csv; unrelated to PB fix"
  )

  result <- suppressWarnings(calculate_pbs(
    activities_data = activities,
    export_dir = zip_path,
    activity_type = "Run",
    distances_m = c(1000, 3000, 5000),
    start_date = as.Date("2024-01-01"),
    end_date = as.Date("2025-12-31")
  ))

  expect_s3_class(result, "data.frame")
  expect_gt(nrow(result), 0)
  # v1.0.5 output contract
  expect_true(all(
    c("distance_label", "time_basis", "time_seconds", "is_pb")
      %in% colnames(result)
  ))
  expect_true(all(result$time_basis == "moving"))
  # custom 3000m label must reach the output (tied to the zip path)
  expect_true("3000m" %in% as.character(result$distance_label))
})