context('Load project') test_that('All elements have length 1', { test_project <- tempfile('test_project') suppressMessages(create.project(test_project)) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) suppressMessages(load.project()) expect_equal(unname(vapply(config, length, integer(1))), rep(1L, length(config))) }) test_that('user commands fail when not in ProjectTemplate directory', { test_project <- tempfile('test_project') dir.create(test_project) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) # Check load.project() expect_error(load.project()) # Check clear.cache() expect_error(clear.cache()) # Check cache() expect_error(cache()) # Check reload.project() expect_error(reload.project()) # Check reload.project() expect_error(test.project()) # Check stub.tests() expect_error(stub.tests()) # Check project.config() expect_error(project.config()) }) test_that('auto loaded data is cached by default', { test_project <- tempfile('test_project') suppressMessages(create.project(test_project)) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) # clear the global environment rm(list=ls(envir = .TargetEnv), envir = .TargetEnv) test_data <- tibble::as_tibble(data.frame(Names=c("a", "b", "c"), Ages=c(20,30,40))) # save test data as a csv in the data directory write.csv(test_data, file="data/test.csv", row.names = FALSE) suppressMessages(load.project()) # check that the cached file loads without error expect_error(load("cache/test.RData", envir = environment()), NA) # and check that the loaded data from the cache is what we saved expect_equal(test, test_data) }) test_that('auto loaded data is not cached when cached_loaded_data is FALSE', { test_project <- tempfile('test_project') suppressMessages(create.project(test_project)) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) # clear the global environment rm(list=ls(envir = .TargetEnv), envir = .TargetEnv) test_data <- data.frame(Names=c("a", "b", "c"), Ages=c(20,30,40)) # save test data as a csv in the data directory write.csv(test_data, file="data/test.csv", row.names = FALSE) # Read the config data and set cache_loaded_data to FALSE config <- .read.config() expect_error(config$cache_loaded_data <- FALSE, NA) .save.config(config) suppressMessages(load.project()) # check that the the test variable has not been cached expect_error(suppressWarnings(load("cache/test.RData", envir = environment())), "cannot open the connection") }) test_that('auto loaded data from an R script is cached correctly', { test_project <- tempfile('test_project') suppressMessages(create.project(test_project)) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) # clear the global environment rm(list=ls(envir = .TargetEnv), envir = .TargetEnv) # create some variables in the global env that shouldn't be cached test_data11 <- data.frame(Names=c("a", "b", "c"), Ages=c(20,30,40)) test_data21 <- data.frame(Names=c("a1", "b1", "c1"), Ages=c(20,30,40)) # Create some R code and put in data directory CODE <- paste0(deparse(substitute({ test_data12 <- data.frame(Names=c("a", "b", "c"), Ages=c(20,30,40)) test_data22 <- data.frame(Names=c("a1", "b1", "c1"), Ages=c(20,30,40)) })), collapse ="\n") # save R code in the data directory writeLines(CODE, "data/test.R") # load the project and R code suppressMessages(load.project()) # check that the test variables have been cached correctly expect_error(load("cache/test_data12.RData", envir = environment()), NA) expect_error(load("cache/test_data22.RData", envir = environment()), NA) # check that the other test variables have not been cached expect_error(suppressWarnings(load("cache/test_data11.RData", envir = environment())), "cannot open the connection") expect_error(suppressWarnings(load("cache/test_data21.RData", envir = environment())), "cannot open the connection") }) test_that('ignored data files are not loaded', { test_project <- tempfile('test_project') suppressMessages(create.project(test_project)) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) # clear the global environment rm(list = ls(envir = .TargetEnv), envir = .TargetEnv) # Read the config data and set cache_loaded_data to FALSE config <- .read.config() expect_error(config$cache_loaded_data <- FALSE, NA) .save.config(config) # create some test data so the file can be loaded if not ignored test_data <- tibble::as_tibble(data.frame(Names = c("a", "b", "c"), Ages = c(20,30,40))) # write test data to files write.csv(test_data, file = 'data/test.csv', row.names = FALSE) dir.create('data/test') write.csv(test_data, file = 'data/test/test.csv', row.names = FALSE) # load the project and data with default settings, # check that data/test.csv is loaded suppressMessages(load.project()) expect_equal(test, test_data) # reload the project, now with recursive_loading rm(list = ls(envir = .TargetEnv), envir = .TargetEnv) suppressMessages(load.project(recursive_loading = TRUE)) expect_equal(test, test_data) expect_equal(test.test, test_data) writeLines('\n', 'data/Thumbs.db') # Should trigger error # reload the project, now with an illegal Thumbs.db rm(list = ls(envir = .TargetEnv), envir = .TargetEnv) # The Thumbs.db is not a valid SQLite database so should raise an error expect_error(suppressWarnings(load.project(override.config = list(data_ignore = '')))) # reload the project, ignore *.csv rm(list = ls(envir = .TargetEnv), envir = .TargetEnv) suppressMessages(load.project(data_ignore = 'Thumbs.db, *.csv')) expect_false(exists("test", envir = .TargetEnv)) expect_false(exists("test.test", envir = .TargetEnv)) # reload the project, ignore *.csv with recursive loading rm(list = ls(envir = .TargetEnv), envir = .TargetEnv) suppressMessages(load.project(data_ignore = 'Thumbs.db, *.csv', recursive_loading = TRUE)) expect_false(exists("test", envir = .TargetEnv)) expect_false(exists("test.test", envir = .TargetEnv)) # reload the project, ignore test/*.csv with recursive loading rm(list = ls(envir = .TargetEnv), envir = .TargetEnv) suppressMessages(load.project(data_ignore = 'Thumbs.db, test/*.csv', recursive_loading = TRUE)) expect_equal(test, test_data) expect_false(exists("test.test", envir = .TargetEnv)) # reload the project, ignore test/ with recursive loading rm(list = ls(envir = .TargetEnv), envir = .TargetEnv) suppressMessages(load.project(data_ignore = 'Thumbs.db, test/', recursive_loading = TRUE)) expect_equal(test, test_data) expect_false(exists("test.test", envir = .TargetEnv)) # reload the project, ignore test/*.csv as a regular expression with recursive loading rm(list = ls(envir = .TargetEnv), envir = .TargetEnv) suppressMessages(load.project(data_ignore = 'Thumbs.db, /test/.*\\.csv/', recursive_loading = TRUE)) expect_equal(test, test_data) expect_false(exists("test.test", envir = .TargetEnv)) # reload the project, ignore cached var_to_cache rm(list = ls(envir = .TargetEnv), envir = .TargetEnv) assign("var_to_cache", test_data, envir = .TargetEnv) cache("var_to_cache") rm(var_to_cache, envir = .TargetEnv) suppressMessages(load.project(data_ignore = 'Thumbs.db, var_to_cache')) expect_false(exists("var_to_cache", envir = .TargetEnv)) }) test_that('data is loaded as data_frame', { test_project <- tempfile('test_project') suppressMessages(create.project(test_project)) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) # clear the global environment rm(list=ls(envir = .TargetEnv), envir = .TargetEnv) test_data <- data.frame(Names=c("a", "b", "c"), Ages=c(20,30,40)) # save test data as a csv in the data directory write.csv(test_data, file="data/test.csv", row.names = FALSE) config <- .new.config config$tables_type <- "data_frame" write.dcf(config, 'config/global.dcf') suppressMessages(load.project()) # and check that the loaded data from the cache is what we saved expect_equal(test, test_data) }) test_that('data is loaded as data_table', { skip_if_not_installed("data.table") test_project <- tempfile('test_project') suppressMessages(create.project(test_project)) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) # clear the global environment rm(list=ls(envir = .TargetEnv), envir = .TargetEnv) require('data.table') test_data <- data.table::data.table(data.frame(Names=c("a", "b", "c"), Ages=c(20,30,40))) # save test data as a csv in the data directory write.csv(test_data, file="data/test.csv", row.names = FALSE) config <- .new.config config$tables_type <- "data_table" write.dcf(config, 'config/global.dcf') suppressMessages(load.project()) # and check that the loaded data from the cache is what we saved expect_equal(test, test_data) }) test_that('logs are written to a logs subdirectory',{ skip_if_not_installed("log4r") test_project <- tempfile('test_project') suppressMessages(create.project(test_project)) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) config <- .new.config config$logging <- TRUE write.dcf(config, 'config/global.dcf') dir.create("logs/test_logs") # Don't want to trigger warning - causes problems with CRAN # Create some R code and put in data directory CODE <- paste0(deparse(substitute({ require.package('log4r') info(logger,"this is a test file") })), collapse ="\n") # save R code in the munge directory writeLines(CODE, "munge/test.R") #load the project and R code suppressMessages(load.project(logs_sub_dir="test_logs")) expect_false(file.exists(file.path("logs","project.log"))) expect_true(file.exists(file.path("logs","test_logs","project.log"))) }) test_that('read from munge subdirectory',{ test_project <- tempfile('test_project') suppressMessages(create.project(test_project)) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) # Create some R code and put in munge subdirectory directory CODE <- paste0(deparse(substitute({ test_data <- tibble::as_tibble(data.frame(Names=c("a", "b", "c"), Ages=c(20,30,40))) })), collapse ="\n") dir.create(file.path("munge","test_munge")) writeLines(CODE, file.path("munge","test_munge","02-test_data.R") ) #load the project and R code suppressMessages(load.project(munge_sub_dir="test_munge")) expect_true(exists("test_data")) }) test_that('pass munge files to run',{ test_project <- tempfile('test_project') suppressMessages(create.project(test_project)) on.exit(unlink(test_project, recursive = TRUE), add = TRUE) oldwd <- setwd(test_project) on.exit(setwd(oldwd), add = TRUE) # Create some R code and put in munge subdirectory directory CODE <- paste0(deparse(substitute({ test_data_1 <- tibble::as_tibble(data.frame(Names=c("a", "b", "c"), Ages=c(20,30,40))) })), collapse ="\n") writeLines(CODE, file.path("munge","01-test_data.R")) CODE <- paste0(deparse(substitute({ test_data_2 <- tibble::as_tibble(data.frame(Names=c("d", "e", "f"), Ages=c(50,60,70))) })), collapse ="\n") writeLines(CODE, file.path("munge","02-test_data.R")) CODE <- paste0(deparse(substitute({ test_data_3 <- tibble::as_tibble(data.frame(Names=c("d", "e", "f"), Ages=c(50,60,70))) })), collapse ="\n") writeLines(CODE, file.path("munge","03-test_data.R")) #load the project and R code suppressMessages(load.project(munge_files=c("02-test_data.R"))) # expect_false(exists("test_data_1")) expect_true(exists("test_data_2")) # expect_false(exists("test_data_3")) suppressMessages(load.project(munge_files=c("03-test_data.R" ,"02-test_data.R"))) # expect_false(exists("test_data_1")) expect_true(exists("test_data_2")) expect_true(exists("test_data_3")) # ------------------------------------------------------------------------------ # Define a Python script and put in munge subdirectory directory # ------------------------------------------------------------------------------ python_code <- c( "print(' ')", "print('01-test_data.py start')", "", # Empty line for readability "import pandas as pd", "import os", "data = pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']})", "# Write data to CSV file in munge directory (adjust path if needed)", "data.to_csv('munge/test_data_py.csv', index=False)", "", # Empty line for readability "# Print data sum for testing purposes", "print(data.sum())", "print('01-test_data.py finish')" ) # ------------------------------------------------------------------------------ # Write the Python code to a .py file # ------------------------------------------------------------------------------ writeLines(python_code, file.path("munge","01-test_data.py")) # ------------------------------------------------------------------------------ # Check if python dataframe exists # ------------------------------------------------------------------------------ check_py_data <- c( "print(' ')", "print('02-test_data.py start')", "import os", "import pandas as pd", "import sys", # ------------------------------------------------------------------------------ "df_csv = pd.read_csv('munge/test_data_py.csv')", "df_csv.to_csv('munge/write_test_data_py.csv', index=False)", # ------------------------------------------------------------------------------ "", "py_data = pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']})", "subdirectory = 'munge'", # ------------------------------------------------------------------------------ "", "if 'subdirectory' in globals():", " data = 'y'", " print('Python data exists in the environment')", "else:", " data = 'n'", " print('Python data NOT in the environment')", # ------------------------------------------------------------------------------ "", "df = pd.DataFrame([[data]], columns=['text'])", "print(df)", "", "full_file_path = os.path.join(subdirectory, f'{data}.csv')", "df.to_csv(full_file_path, sep=',', index=False)", # ------------------------------------------------------------------------------ "", "print('02-test_data.py finish')" # ------------------------------------------------------------------------------ ) # ------------------------------------------------------------------------------ writeLines(check_py_data, file.path( "munge", "02-test_data.py" )) # ------------------------------------------------------------------------------ suppressMessages(load.project()) # ------------------------------------------------------------------------------ # Check if python and R source file exists # ------------------------------------------------------------------------------ expect_true(file.exists(file.path("munge", "01-test_data.py"))) expect_true(file.exists(file.path("munge", "01-test_data.R"))) expect_true(file.exists(file.path("munge", "02-test_data.py"))) expect_true(file.exists(file.path("munge", "02-test_data.R"))) # ------------------------------------------------------------------------------ # Check if CSV file exists (created by 01-test_data.py) # ------------------------------------------------------------------------------ expect_true(file.exists(file.path("munge", "test_data_py.csv"))) expect_true(file.exists(file.path("munge", "write_test_data_py.csv"))) # ------------------------------------------------------------------------------ # validate if entry defined in python environmen (created by 02-test_data.py) # ------------------------------------------------------------------------------ expect_true(file.exists(file.path( "munge", "y.csv"))) expect_false(file.exists(file.path("munge", "n.csv"))) # ------------------------------------------------------------------------------ # Check if py_data is present in the current R environment # ------------------------------------------------------------------------------ expect_false("data" %in% ls(), "Python dataframe exists in the R environment") # ------------------------------------------------------------------------------ })