test_that("spod_convert works for duckdb output", { skip_if_not(requireNamespace("duckdb", quietly = TRUE)) # Setup fixture env test_dir <- setup_test_data_dir() on.exit(unlink(test_dir, recursive = TRUE)) spod_set_data_dir(test_dir, quiet = TRUE) local_mock_download() # Mock valid dates to ensure version inference works local_mock_valid_dates <- function(env = parent.frame()) { mock_dates <- seq.Date(as.Date("2022-02-01"), as.Date("2022-02-10"), by = "day") mock_func <- function(ver = NULL) { if (ver == 2) return(mock_dates) if (ver == 1) return(as.Date(character(0))) # V1 empty stop("Invalid version") } orig <- get("spod_get_valid_dates", envir = asNamespace("spanishoddata")) assignInNamespace("spod_get_valid_dates", mock_func, ns = "spanishoddata") withr::defer(assignInNamespace("spod_get_valid_dates", orig, ns = "spanishoddata"), envir = env) } local_mock_valid_dates() # Use a date range covered by our mocked fixtures # Fixtures: v2/zones/distritos_mitma.gpkg and v2/.../viajes_distrito_2022-02-01.csv.gz # Note: The fixture CSV is 2022-02-01 in Hive structure. # Define output path out_file <- file.path(test_dir, "output.duckdb") # Note: spod_convert calls spod_get which triggers the SQL logic. # We know spod_get currently fails on SQL view ordering in v2-od-dist-clean-csv-view-en.sql # However, let's write the test assuming it might work or to confirm failure mode directly. # If it fails, that confirms we really need to fix the internal SQL bug. expect_error( spod_convert( type = "od", zones = "distr", dates = "2022-02-01", save_format = "duckdb", save_path = out_file, overwrite = TRUE, quiet = TRUE ), NA # Expect no error ) expect_true(file.exists(out_file)) con <- spod_connect(out_file) expect_s3_class(con, "tbl_duckdb_connection") spod_disconnect(con) }) test_that("spod_convert works for parquet output", { skip_if_not(requireNamespace("duckdb", quietly = TRUE)) test_dir <- setup_test_data_dir() on.exit(unlink(test_dir, recursive = TRUE)) spod_set_data_dir(test_dir, quiet = TRUE) local_mock_download() # Mock valid dates here too local_mock_valid_dates <- function(env = parent.frame()) { mock_dates <- seq.Date(as.Date("2022-02-01"), as.Date("2022-02-10"), by = "day") mock_func <- function(ver = NULL) { if (ver == 2) return(mock_dates) if (ver == 1) return(as.Date(character(0))) stop("Invalid version") } orig <- get("spod_get_valid_dates", envir = asNamespace("spanishoddata")) assignInNamespace("spod_get_valid_dates", mock_func, ns = "spanishoddata") withr::defer(assignInNamespace("spod_get_valid_dates", orig, ns = "spanishoddata"), envir = env) } local_mock_valid_dates() out_dir <- file.path(test_dir, "output_parquet") expect_error( spod_convert( type = "od", zones = "distr", dates = "2022-02-01", save_format = "parquet", save_path = out_dir, overwrite = TRUE, quiet = TRUE ), NA ) if (dir.exists(out_dir)) { # check for partition structure (year=2022...) files <- list.files(out_dir, recursive = TRUE) expect_true(any(grepl("year=2022", files))) } }) test_that("spod_convert infers format from save_path (.duckdb)", { skip_if_not(requireNamespace("duckdb", quietly = TRUE)) test_dir <- withr::local_tempdir() local_mocked_bindings( spod_get = function(...) { # Return a mock duckdb connection with src$con con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") DBI::dbExecute(con, "CREATE VIEW od_csv_clean_filtered AS SELECT 1::INTEGER as id") tbl <- dplyr::tbl(con, "od_csv_clean_filtered") return(tbl) } ) local_mocked_bindings( spod_dates_argument_to_dates_seq = function(...) as.Date("2022-01-01"), spod_infer_data_v_from_dates = function(...) 2 ) out_file <- file.path(test_dir, "output.duckdb") # Call with save_format=NULL (will default), should infer from path not used here result <- spod_convert( type = "od", zones = "dist", dates = "2022-01-01", save_format = NULL, # Will default to duckdb save_path = out_file, overwrite = TRUE, quiet = TRUE ) expect_equal(result, out_file) # expect_true(file.exists(out_file)) # Skipped because mock uses in-memory DB }) test_that("spod_convert infers format from save_path (parquet folder)", { skip_if_not(requireNamespace("duckdb", quietly = TRUE)) test_dir <- withr::local_tempdir() local_mocked_bindings( spod_get = function(...) { con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") DBI::dbExecute(con, "CREATE VIEW od_csv_clean_filtered AS SELECT 1::INTEGER as id, 2022::INTEGER as year, 1::INTEGER as month, 1::INTEGER as day") tbl <- dplyr::tbl(con, "od_csv_clean_filtered") return(tbl) } ) local_mocked_bindings( spod_dates_argument_to_dates_seq = function(...) as.Date("2022-01-01"), spod_infer_data_v_from_dates = function(...) 2 ) out_dir <- file.path(test_dir, "output.parquet") result <- spod_convert( type = "od", zones = "dist", dates = "2022-01-01", save_format = NULL, # Will default to duckdb save_path = out_dir, overwrite = TRUE, quiet = TRUE ) expect_equal(result, out_dir) }) test_that("spod_convert prompts for duckdb overwrite when file exists", { skip_if_not(requireNamespace("duckdb", quietly = TRUE)) test_dir <- withr::local_tempdir() out_file <- file.path(test_dir, "existing.duckdb") # Create an existing file writeLines("existing", out_file) local_mocked_bindings( spod_get = function(...) { con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") DBI::dbExecute(con, "CREATE TABLE od_csv_clean_filtered (id INTEGER)") tbl <- dplyr::tbl(con, "od_csv_clean_filtered") return(tbl) } ) local_mocked_bindings( spod_dates_argument_to_dates_seq = function(...) as.Date("2022-01-01"), spod_infer_data_v_from_dates = function(...) 2 ) # Mock spod_readline to say "no" local_mocked_bindings( spod_readline = function(...) "no" ) result <- NULL msgs <- capture_messages( result <- spod_convert( type = "od", zones = "dist", dates = "2022-01-01", save_path = out_file, overwrite = FALSE, quiet = FALSE ) ) expect_match(msgs, "Exiting without overwriting", all = FALSE) expect_null(result) }) test_that("spod_convert overwrites duckdb when confirmed", { skip_if_not(requireNamespace("duckdb", quietly = TRUE)) test_dir <- withr::local_tempdir() out_file <- file.path(test_dir, "existing.duckdb") writeLines("existing", out_file) local_mocked_bindings( spod_get = function(...) { con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") DBI::dbExecute(con, "CREATE VIEW od_csv_clean_filtered AS SELECT 1::INTEGER as id") tbl <- dplyr::tbl(con, "od_csv_clean_filtered") return(tbl) } ) local_mocked_bindings( spod_dates_argument_to_dates_seq = function(...) as.Date("2022-01-01"), spod_infer_data_v_from_dates = function(...) 2 ) # Mock spod_readline to say "yes" local_mocked_bindings( spod_readline = function(...) "yes" ) result <- spod_convert( type = "od", zones = "dist", dates = "2022-01-01", save_path = out_file, overwrite = FALSE, quiet = TRUE ) expect_equal(result, out_file) }) test_that("spod_convert errors on update mode for duckdb", { # Mock to prevent reaching real functions local_mocked_bindings( spod_dates_argument_to_dates_seq = function(...) as.Date("2022-01-01"), spod_infer_data_v_from_dates = function(...) 2 ) # Test the early validation that happens before spod_get is called expect_error( spod_convert( type = "od", zones = "dist", dates = "2022-01-01", save_format = "duckdb", save_path = tempfile(fileext = ".duckdb"), overwrite = "update", quiet = TRUE ), "not supported" ) }) test_that("spod_convert handles parquet update mode (cancel)", { skip_if_not(requireNamespace("duckdb", quietly = TRUE)) test_dir <- withr::local_tempdir() out_dir <- file.path(test_dir, "existing_parquet") dir.create(out_dir, recursive = TRUE) # Create existing parquet file dir.create(file.path(out_dir, "year=2022/month=1/day=1"), recursive = TRUE) writeLines("existing", file.path(out_dir, "year=2022/month=1/day=1/data_0.parquet")) local_mocked_bindings( spod_get = function(...) { con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") DBI::dbExecute(con, "CREATE VIEW od_csv_clean_filtered AS SELECT 1::INTEGER as id, 2022::INTEGER as year, 1::INTEGER as month, 1::INTEGER as day") tbl <- dplyr::tbl(con, "od_csv_clean_filtered") return(tbl) }, spod_dates_argument_to_dates_seq = function(...) as.Date("2022-01-01"), spod_infer_data_v_from_dates = function(...) 2 ) # Mock spod_readline to say "cancel" local_mocked_bindings( spod_readline = function(...) "c" ) result <- NULL msgs <- capture_messages( result <- spod_convert( type = "od", zones = "dist", dates = "2022-01-01", save_path = out_dir, save_format = "parquet", overwrite = FALSE, quiet = FALSE ) ) expect_match(msgs, "Cancelled", all = FALSE) expect_null(result) }) test_that("spod_convert handles parquet update mode (delete)", { skip_if_not(requireNamespace("duckdb", quietly = TRUE)) test_dir <- withr::local_tempdir() out_dir <- file.path(test_dir, "existing_parquet") dir.create(out_dir, recursive = TRUE) # Create existing parquet file dir.create(file.path(out_dir, "year=2022/month=1/day=1"), recursive = TRUE) writeLines("existing", file.path(out_dir, "year=2022/month=1/day=1/data_0.parquet")) local_mocked_bindings( spod_get = function(...) { con <- DBI::dbConnect(duckdb::duckdb(), ":memory:") DBI::dbExecute(con, "CREATE VIEW od_csv_clean_filtered AS SELECT 1::INTEGER as id, 2022::INTEGER as year, 1::INTEGER as month, 1::INTEGER as day") tbl <- dplyr::tbl(con, "od_csv_clean_filtered") return(tbl) }, spod_dates_argument_to_dates_seq = function(...) as.Date("2022-01-01"), spod_infer_data_v_from_dates = function(...) 2 ) # Mock spod_readline to say "delete" local_mocked_bindings( spod_readline = function(...) "d" ) result <- NULL capture_messages( result <- spod_convert( type = "od", zones = "dist", dates = "2022-01-01", save_path = out_dir, save_format = "parquet", overwrite = FALSE, quiet = FALSE ) ) expect_equal(result, out_dir) }) test_that("spod_convert validates inconsistent save_format and save_path", { # Mock to prevent reaching real functions local_mocked_bindings( spod_dates_argument_to_dates_seq = function(...) as.Date("2022-01-01"), spod_infer_data_v_from_dates = function(...) 2 ) expect_error( spod_convert( type = "od", zones = "dist", dates = "2022-01-01", save_format = "duckdb", save_path = "/path/to/folder", # No .duckdb extension quiet = TRUE ), "folder.*duckdb" ) })