# ============================================================================= # Backward-compatibility & new-feature tests for FFdownload v1.2.0 # # Strategy # -------- # All tests that require network access are guarded by skip_if_offline() and # skip_on_cran() so they run only in a connected dev environment. # # "Same output as master" is verified by: # 1. Running FFdownload() with the EXACT same parameters used in the v1.1.x # vignettes (old API, no new params). # 2. Checking that the output structure (list names, sub-list names, column # names, class, dimensions) matches the documented expectations. # 3. Additionally verifying that the new convenience wrappers (FFget, FFlist, # FFmatch) produce results that are structurally identical to the # corresponding slice of the FFdownload() output. # ============================================================================= # ── helpers ────────────────────────────────────────────────────────────────── small_dataset <- "F-F_Research_Data_Factors" # ~small, always available small_inputlist <- c("F-F_Research_Data_Factors") # Helper: run FFdownload() exactly as v1.1.x vignettes did, return FFdata list run_old_api <- function(tempd, output_file, format = "xts") { FFdownload( output_file = output_file, tempd = tempd, exclude_daily = TRUE, download = TRUE, download_only = FALSE, inputlist = small_inputlist, format = format ) e <- new.env(parent = emptyenv()) load(output_file, envir = e) e$FFdata } # Helper: run FFdownload() with new params but equivalent semantics run_new_api <- function(tempd, output_file, format = "xts") { FFdownload( output_file = output_file, tempd = tempd, exclude_daily = TRUE, action = "all", # new param: replaces download=T/download_only=F inputlist = small_inputlist, format = format, na_values = NULL, # NULL = original behaviour (no replacement) return_data = TRUE # new param: also return the list ) } # ============================================================================= # 1. FFlist() — dataset inventory # ============================================================================= test_that("FFlist() returns a data frame / tibble with expected columns", { skip_on_cran() skip_if_offline() fl <- FFlist(exclude_daily = TRUE) expect_true(is.data.frame(fl)) expect_true(all(c("name", "file_url") %in% names(fl))) expect_gt(nrow(fl), 50) # French has 100+ non-daily datasets expect_false(any(grepl("daily", fl$name, ignore.case = TRUE))) expect_true(all(grepl("^https://", fl$file_url))) }) test_that("FFlist(exclude_daily = FALSE) includes daily datasets", { skip_on_cran() skip_if_offline() fl_all <- FFlist(exclude_daily = FALSE) fl_nodaily <- FFlist(exclude_daily = TRUE) expect_true("is_daily" %in% names(fl_all)) expect_gt(nrow(fl_all), nrow(fl_nodaily)) expect_true(any(fl_all$is_daily)) }) test_that("FFlist() includes the canonical 3-factor dataset", { skip_on_cran() skip_if_offline() fl <- FFlist() expect_true(any(grepl("F-F_Research_Data_Factors", fl$name))) }) # ============================================================================= # 2. FFmatch() — fuzzy match preview # ============================================================================= test_that("FFmatch() returns one row per requested name", { skip_on_cran() skip_if_offline() queries <- c("Research_Data_Factors", "Momentum_Factor", "ST_Reversal") fm <- FFmatch(queries) expect_equal(nrow(fm), length(queries)) expect_true(all(c("requested", "matched", "edit_distance", "similarity") %in% names(fm))) expect_true(all(fm$similarity >= 0 & fm$similarity <= 1)) }) test_that("FFmatch() exact name gives similarity = 1", { skip_on_cran() skip_if_offline() fl <- FFlist() exact <- fl$name[1] fm <- FFmatch(exact) expect_equal(fm$matched[1], exact) expect_equal(fm$edit_distance[1], 0L) expect_equal(fm$similarity[1], 1) }) # ============================================================================= # 3. FFdownload() backward-compatibility (old API parameters) # ============================================================================= test_that("FFdownload() old API produces a named list with expected structure (xts)", { skip_on_cran() skip_if_offline() outd <- file.path(tempdir(), paste0("fftest_old_", format(Sys.time(), "%H%M%S"))) outf <- tempfile(fileext = ".RData") FFdata <- run_old_api(outd, outf, format = "xts") # Top-level list expect_type(FFdata, "list") expect_true(length(FFdata) >= 1) key <- grep("F-F_Research_Data_Factors", names(FFdata), value = TRUE)[1] expect_false(is.na(key)) ds <- FFdata[[key]] expect_true(all(c("annual", "monthly", "daily") %in% names(ds))) # Monthly sub-list must contain at least one sub-table expect_gt(length(ds$monthly), 0) # The primary sub-table is named Temp2 in factor files expect_true("Temp2" %in% names(ds$monthly)) # Temp2 should be an xts with the canonical FF columns tbl <- ds$monthly$Temp2 expect_s3_class(tbl, "xts") expect_true(all(c("Mkt.RF", "SMB", "HML", "RF") %in% colnames(tbl))) expect_gt(nrow(tbl), 100) # FF factors go back to 1926 }) test_that("FFdownload() old API produces a named list with expected structure (tbl)", { skip_on_cran() skip_if_offline() outd <- file.path(tempdir(), paste0("fftest_tbl_", format(Sys.time(), "%H%M%S"))) outf <- tempfile(fileext = ".RData") FFdata <- run_old_api(outd, outf, format = "tbl") key <- grep("F-F_Research_Data_Factors", names(FFdata), value = TRUE)[1] tbl <- FFdata[[key]]$monthly$Temp2 expect_true(is.data.frame(tbl)) expect_true("date" %in% names(tbl)) expect_true(all(c("Mkt.RF", "SMB", "HML", "RF") %in% names(tbl))) }) test_that("FFdownload() listsave writes a readable CSV", { skip_on_cran() skip_if_offline() listfile <- tempfile(fileext = ".txt") FFdownload(exclude_daily = TRUE, download = FALSE, download_only = TRUE, listsave = listfile) expect_true(file.exists(listfile)) df <- read.csv(listfile) expect_gt(nrow(df), 50) }) # ============================================================================= # 4. New API: action parameter equivalence with old flags # ============================================================================= test_that("action='all' gives identical output to download=TRUE/download_only=FALSE", { skip_on_cran() skip_if_offline() outd_old <- file.path(tempdir(), paste0("fftest_act_old_", format(Sys.time(), "%H%M%S"))) outd_new <- file.path(tempdir(), paste0("fftest_act_new_", format(Sys.time(), "%H%M%S"))) outf_old <- tempfile(fileext = ".RData") outf_new <- tempfile(fileext = ".RData") old_data <- run_old_api(outd_old, outf_old, format = "xts") new_data <- run_new_api(outd_new, outf_new, format = "xts") # Same top-level keys expect_equal(sort(names(old_data)), sort(names(new_data))) key <- grep("F-F_Research_Data_Factors", names(old_data), value = TRUE)[1] # Same sub-list names expect_equal(names(old_data[[key]]), names(new_data[[key]])) expect_equal(names(old_data[[key]]$monthly), names(new_data[[key]]$monthly)) # Same dimensions for Temp2 old_tbl <- old_data[[key]]$monthly$Temp2 new_tbl <- new_data[[key]]$monthly$Temp2 expect_equal(dim(old_tbl), dim(new_tbl)) expect_equal(colnames(old_tbl), colnames(new_tbl)) }) # ============================================================================= # 5. New API: return_data = TRUE # ============================================================================= test_that("return_data=TRUE returns the same object as loading from file", { skip_on_cran() skip_if_offline() outd <- file.path(tempdir(), paste0("fftest_ret_", format(Sys.time(), "%H%M%S"))) outf <- tempfile(fileext = ".RData") returned <- FFdownload( output_file = outf, tempd = outd, exclude_daily = TRUE, download = TRUE, download_only = FALSE, inputlist = small_inputlist, format = "xts", return_data = TRUE ) expect_false(is.null(returned)) e <- new.env(parent = emptyenv()) load(outf, envir = e) from_file <- e$FFdata expect_equal(names(returned), names(from_file)) key <- names(returned)[1] expect_equal( coredata(returned[[key]]$monthly$Temp2), coredata(from_file[[key]]$monthly$Temp2) ) }) # ============================================================================= # 6. New API: na_values sentinel replacement # ============================================================================= test_that("na_values=NULL (default) leaves sentinel values as-is", { skip_on_cran() skip_if_offline() outd <- file.path(tempdir(), paste0("fftest_na_null_", format(Sys.time(), "%H%M%S"))) outf <- tempfile(fileext = ".RData") FFdata <- FFdownload( output_file = outf, tempd = outd, exclude_daily = TRUE, download = TRUE, download_only = FALSE, inputlist = small_inputlist, format = "tbl", na_values = NULL, return_data = TRUE ) key <- names(FFdata)[1] tbl <- FFdata[[key]]$monthly$Temp2 # With na_values=NULL no NAs are introduced artificially; # the dataset should have very few or no NAs in its core columns n_na <- sum(is.na(tbl[, c("Mkt.RF", "SMB", "HML")])) expect_lte(n_na, 5) # tiny tolerance for any genuine missings }) test_that("na_values replaces sentinels with NA", { skip_on_cran() skip_if_offline() # Use an obviously absurd sentinel that matches all values → everything becomes NA sentinel <- -99999 outd_s <- file.path(tempdir(), paste0("fftest_na_sent_", format(Sys.time(), "%H%M%S"))) outf_s <- tempfile(fileext = ".RData") FFdata_sent <- FFdownload( output_file = outf_s, tempd = outd_s, exclude_daily = TRUE, download = TRUE, download_only = FALSE, inputlist = small_inputlist, format = "tbl", na_values = sentinel, return_data = TRUE ) key <- names(FFdata_sent)[1] tbl <- FFdata_sent[[key]]$monthly$Temp2 # sentinel -99999 should not appear in the real FF data, so no replacement expected expect_false(any(tbl[, c("Mkt.RF","SMB","HML","RF")] == sentinel, na.rm = TRUE)) # Now use a value that IS present in FF data (very large return like 99.99 is rare; # use a value we know is in the data: RF values are small but Mkt.RF can be large) # Instead, test by directly calling converter_tbl on a fixture (see test-converter.R) }) # ============================================================================= # 7. FFget() convenience wrapper # ============================================================================= test_that("FFget() returns a tibble with the expected columns for monthly data", { skip_on_cran() skip_if_offline() tbl <- FFget(small_dataset, frequency = "monthly", subtable = "Temp2", format = "tbl", na_values = NULL) expect_true(is.data.frame(tbl)) expect_true("date" %in% names(tbl)) expect_true(all(c("Mkt.RF", "SMB", "HML", "RF") %in% names(tbl))) expect_gt(nrow(tbl), 100) }) test_that("FFget() with format='xts' returns an xts object", { skip_on_cran() skip_if_offline() obj <- FFget(small_dataset, frequency = "monthly", subtable = "Temp2", format = "xts", na_values = NULL) expect_s3_class(obj, "xts") expect_true(all(c("Mkt.RF", "SMB", "HML", "RF") %in% colnames(obj))) }) test_that("FFget() subtable=NULL returns all sub-tables as named list", { skip_on_cran() skip_if_offline() all_tbls <- FFget(small_dataset, frequency = "monthly", subtable = NULL, format = "tbl", na_values = NULL) expect_type(all_tbls, "list") expect_gt(length(all_tbls), 0) expect_true("Temp2" %in% names(all_tbls)) }) test_that("FFget() frequency=NULL returns all frequencies as named list", { skip_on_cran() skip_if_offline() all_freqs <- FFget(small_dataset, frequency = NULL, format = "tbl", na_values = NULL) expect_type(all_freqs, "list") expect_true(all(c("annual", "monthly", "daily") %in% names(all_freqs))) }) test_that("FFget() result matches corresponding FFdownload() slice", { skip_on_cran() skip_if_offline() outd <- file.path(tempdir(), paste0("fftest_get_cmp_", format(Sys.time(), "%H%M%S"))) outf <- tempfile(fileext = ".RData") FFdata <- run_old_api(outd, outf, format = "tbl") key <- grep("F-F_Research_Data_Factors", names(FFdata), value = TRUE)[1] from_dl <- FFdata[[key]]$monthly$Temp2 from_get <- FFget(small_dataset, frequency = "monthly", subtable = "Temp2", format = "tbl", na_values = NULL) expect_equal(names(from_dl), names(from_get)) expect_equal(nrow(from_dl), nrow(from_get)) expect_equal(from_dl$Mkt.RF, from_get$Mkt.RF) }) # ============================================================================= # 8. cache_days: file not re-downloaded when cache is fresh # ============================================================================= test_that("cache_days prevents re-download of a fresh cached file", { skip_on_cran() skip_if_offline() outd <- file.path(tempdir(), paste0("fftest_cache_", format(Sys.time(), "%H%M%S"))) outf <- tempfile(fileext = ".RData") # First download FFdownload(output_file = outf, tempd = outd, exclude_daily = TRUE, download = TRUE, download_only = TRUE, inputlist = small_inputlist) zip_before <- list.files(outd, pattern = "\\.zip$", full.names = TRUE) expect_gt(length(zip_before), 0) mtime_before <- file.info(zip_before[1])$mtime Sys.sleep(1) # ensure at least 1 second passes # Second call with cache_days = 1 — should NOT re-download (file is < 1 day old) outf2 <- tempfile(fileext = ".RData") outd2 <- file.path(tempdir(), paste0("fftest_cache2_", format(Sys.time(), "%H%M%S"))) # Copy cached zips to outd2 to simulate having a local cache dir.create(outd2, showWarnings = FALSE) file.copy(zip_before, outd2) mtime_cached <- file.info(file.path(outd2, basename(zip_before[1])))$mtime # With cache_days=1 the fresh copy should be reused (mtime should not change) FFdownload(output_file = outf2, tempd = outd2, exclude_daily = TRUE, download = TRUE, download_only = TRUE, inputlist = small_inputlist, cache_days = 1) mtime_after <- file.info(file.path(outd2, basename(zip_before[1])))$mtime expect_equal(mtime_cached, mtime_after) }) # ============================================================================= # 9. match_threshold warning # ============================================================================= test_that("FFdownload() warns when fuzzy match similarity is below threshold", { skip_on_cran() skip_if_offline() # "zzzzz" should match nothing well (similarity will be near 0) expect_warning( FFdownload( output_file = tempfile(fileext = ".RData"), tempd = tempfile(), exclude_daily = TRUE, download = FALSE, download_only = TRUE, inputlist = "zzzzzzzzzzz", match_threshold = 0.99 # extremely strict → almost any match triggers warning ), regexp = "Low-confidence fuzzy match" ) }) test_that("FFdownload() does not warn when match is exact", { skip_on_cran() skip_if_offline() fl <- FFlist() exact <- fl$name[grep("F-F_Research_Data_Factors$", fl$name)][1] expect_no_warning( FFdownload( output_file = tempfile(fileext = ".RData"), tempd = tempfile(), exclude_daily = TRUE, download = FALSE, download_only = TRUE, inputlist = exact, match_threshold = 0.3 ) ) })