# ── clean_tags ───────────────────────────────────────────────────────────────── test_that("clean_tags removes srt HTML-style tags", { s <- make_valid_subtitles() s$Text_content[1] <- "Hello world" result <- clean_tags(s, format = "srt") expect_false(grepl("<", result$Text_content[1])) expect_true(grepl("Hello", result$Text_content[1])) }) test_that("clean_tags removes ASS/SSA curly-brace tags", { f <- system.file("extdata", "ex_substation.ass", package = "subtools") s <- read_subtitles(f) s$Text_content[1] <- "{\\an8}Some text" result <- clean_tags(s, format = "ass") expect_false(grepl("\\{", result$Text_content[1])) expect_true(grepl("Some text", result$Text_content[1])) }) test_that("clean_tags format = 'all' removes both srt and ass tags", { s <- make_valid_subtitles() s$Text_content[1] <- "{\\an8}Mixed" result <- clean_tags(s, format = "all") expect_false(grepl("<|\\{", result$Text_content[1])) }) test_that("clean_tags accepts webvtt as an alias for srt tag removal", { s <- make_valid_subtitles() s$Text_content[1] <- "Bold" result <- clean_tags(s, format = "vtt") expect_false(grepl("<", result$Text_content[1])) }) test_that("clean_tags clean.empty = FALSE keeps empty rows", { s <- make_valid_subtitles() s$Text_content[1] <- "" result_keep <- clean_tags(s, format = "srt", clean.empty = FALSE) expect_equal(nrow(result_keep), nrow(s)) result_drop <- clean_tags(s, format = "srt", clean.empty = TRUE) expect_lt(nrow(result_drop), nrow(s)) }) test_that("clean_tags works on a multisubtitles object", { s <- make_valid_subtitles() multi <- bind_subtitles(s, s, collapse = FALSE) result <- clean_tags(multi, format = "srt") expect_s3_class(result, "multisubtitles") expect_length(result, 2L) }) test_that("clean_tags errors on non-subtitles input", { expect_error(clean_tags(data.frame()), "subtitles") }) # ── clean_captions ───────────────────────────────────────────────────────────── test_that("clean_captions removes parenthesised text", { s <- make_valid_subtitles() s$Text_content[1] <- "Hello (background noise) world" result <- clean_captions(s) expect_false(grepl("\\(", result$Text_content[1])) expect_true(grepl("Hello", result$Text_content[1])) }) test_that("clean_captions removes square-bracket text", { s <- make_valid_subtitles() s$Text_content[1] <- "Hello [MUSIC] world" result <- clean_captions(s) expect_false(grepl("\\[", result$Text_content[1])) }) test_that("clean_captions clean.empty = FALSE keeps rows that become empty", { s <- make_valid_subtitles() s$Text_content[1] <- "(caption only)" result_keep <- clean_captions(s, clean.empty = FALSE) expect_equal(nrow(result_keep), nrow(s)) result_drop <- clean_captions(s, clean.empty = TRUE) expect_lt(nrow(result_drop), nrow(s)) }) test_that("clean_captions works on a multisubtitles object", { s <- make_valid_subtitles() multi <- bind_subtitles(s, s, collapse = FALSE) result <- clean_captions(multi) expect_s3_class(result, "multisubtitles") expect_length(result, 2L) }) test_that("clean_captions errors on non-subtitles input", { expect_error(clean_captions(list()), "subtitles") }) # ── clean_patterns ───────────────────────────────────────────────────────────── test_that("clean_patterns removes text matching a regex", { s <- make_valid_subtitles() s$Text_content[1] <- "SPEAKER: Hello world" result <- clean_patterns(s, pattern = "^SPEAKER: ") expect_false(grepl("SPEAKER", result$Text_content[1])) expect_true(grepl("Hello", result$Text_content[1])) }) test_that("clean_patterns clean.empty = FALSE keeps rows that become empty", { s <- make_valid_subtitles() s$Text_content[1] <- "DELETE_ME" result_keep <- clean_patterns(s, pattern = "DELETE_ME", clean.empty = FALSE) expect_equal(nrow(result_keep), nrow(s)) result_drop <- clean_patterns(s, pattern = "DELETE_ME", clean.empty = TRUE) expect_lt(nrow(result_drop), nrow(s)) }) test_that("clean_patterns works on a multisubtitles object", { s <- make_valid_subtitles() multi <- bind_subtitles(s, s, collapse = FALSE) result <- clean_patterns(multi, pattern = "^>>") expect_s3_class(result, "multisubtitles") expect_length(result, 2L) }) test_that("clean_patterns errors on non-subtitles input", { expect_error(clean_patterns(42, pattern = "x"), "subtitles") })