test_that( "data loads correctly - long - parquet", { # get data if available, otherwise skip the test ref_res <- get_example_data("reference_results.rds") npx_file <- get_inst_extdata_file(filename = "npx_data_ext.parquet") withr::with_tempfile( new = "tmp_long_parquet", pattern = "parquet-long-", fileext = ".parquet", code = { # check that xlsx file can by copied without issues expect_no_condition( object = file.copy(npx_file, tmp_long_parquet) ) # check that data can be loaded expect_no_condition( object = npx_df <- read_NPX(filename = tmp_long_parquet, out_df = "tibble") ) expect_no_condition( object = npx_arrow <- read_NPX(filename = tmp_long_parquet, out_df = "arrow") ) # check that data frame exists expect(ok = exists("npx_df"), failure_message = "failed to read long paruqet in tibble") expect(ok = exists("npx_arrow"), failure_message = "failed to read long paruqet in arrow") # check that data set has correct number of rows and columns expect_equal(object = nrow(npx_df), expected = 1L) expect_equal(object = ncol(npx_df), expected = 19L) expect_equal(object = nrow(npx_arrow), expected = 1L) expect_equal(object = ncol(npx_arrow), expected = 19L) # check that dataset has the correct column names expect_identical( object = colnames(npx_df), expected = c("SampleID", "SampleType", "WellID", "PlateID", "DataAnalysisRefID", "OlinkID", "UniProt", "Assay", "AssayType", "Panel", "Block", "Count", "ExtNPX", "NPX", "Normalization", "PCNormalizedNPX", "AssayQC", "SampleQC", "ExploreVersion") ) expect_identical( object = names(npx_arrow), expected = c("SampleID", "SampleType", "WellID", "PlateID", "DataAnalysisRefID", "OlinkID", "UniProt", "Assay", "AssayType", "Panel", "Block", "Count", "ExtNPX", "NPX", "Normalization", "PCNormalizedNPX", "AssayQC", "SampleQC", "ExploreVersion") ) # check identical to reference expect_equal( object = npx_df, expected = ref_res$npx_data_parquet, tolerance = 1e-4 ) } ) } ) test_that( "data loads correctly - long - csv", { # get data if available, otherwise skip the test ref_res <- get_example_data("reference_results.rds") npx_file <- get_inst_extdata_file(filename = "npx_data_long_csv.csv") withr::with_tempfile( new = "tmp_long_csv", pattern = "csv-long-", fileext = ".csv", code = { # check that xlsx file can by copied without issues expect_no_condition( object = file.copy(npx_file, tmp_long_csv) ) # check that data can be loaded expect_no_condition( object = npx_df <- read_NPX(filename = tmp_long_csv, out_df = "tibble") ) expect_no_condition( object = npx_arrow <- read_NPX(filename = tmp_long_csv, out_df = "arrow") ) # check that data frame exists expect(ok = exists("npx_df"), failure_message = "failed to read long csv in tibble") expect(ok = exists("npx_arrow"), failure_message = "failed to read long csv in arrow") # check that data set has correct number of rows and columns expected_rows <- 1000L expected_cols <- 16L expect_equal(object = nrow(npx_df), expected = expected_rows) expect_equal(object = ncol(npx_df), expected = expected_cols) expect_equal(object = nrow(npx_arrow), expected = expected_rows) expect_equal(object = ncol(npx_arrow), expected = expected_cols) # check that dataset has the correct column names expected_colnames <- c("SampleID", "Index", "OlinkID", "UniProt", "Assay", "MissingFreq", "Panel", "Panel_Lot_Nr", "PlateID", "QC_Warning", "LOD", "NPX", "Normalization", "Assay_Warning", "Sample_Type", "ExploreVersion") expect_identical( object = colnames(npx_df), expected = expected_colnames ) expect_identical( object = names(npx_arrow), expected = expected_colnames ) # check identical to reference expect_equal( object = npx_df, expected = ref_res$npx_data_long_csv, tolerance = 1e-4 ) } ) } ) test_that( "data loads correctly - long - zip", { skip_if_not_installed("zip") # get data if available, otherwise skip the test ref_res <- get_example_data("reference_results.rds") npx_file <- get_inst_extdata_file(filename = "npx_data_long_zip.zip") withr::with_tempfile( new = "tmp_long_csv_zip", pattern = "csv-zip-long-", fileext = ".zip", code = { # check that xlsx file can by copied without issues expect_no_condition( object = file.copy(npx_file, tmp_long_csv_zip) ) # check that data can be loaded expect_no_condition( object = npx_df <- read_NPX(filename = tmp_long_csv_zip, out_df = "tibble") ) expect_no_condition( object = npx_arrow <- read_NPX(filename = tmp_long_csv_zip, out_df = "arrow") ) # check that data frame exists expect(ok = exists("npx_df"), failure_message = "failed to read long zip csv in tibble") expect(ok = exists("npx_arrow"), failure_message = "failed to read long zip csv in arrow") # check that data set has correct number of rows and columns expected_rows <- 1000L expected_cols <- 16L expect_equal(object = nrow(npx_df), expected = expected_rows) expect_equal(object = ncol(npx_df), expected = expected_cols) expect_equal(object = nrow(npx_arrow), expected = expected_rows) expect_equal(object = ncol(npx_arrow), expected = expected_cols) # check that dataset has the correct column names expected_colnames <- c("SampleID", "Index", "OlinkID", "UniProt", "Assay", "MissingFreq", "Panel", "Panel_Lot_Nr", "PlateID", "QC_Warning", "LOD", "NPX", "Normalization", "Assay_Warning", "Sample_Type", "ExploreVersion") expect_identical( object = colnames(npx_df), expected = expected_colnames ) expect_identical( object = names(npx_arrow), expected = expected_colnames ) # check identical to reference expect_equal( object = npx_df, expected = ref_res$npx_data_long_zip, tolerance = 1e-4 ) } ) } ) test_that( "data loads correctly - wide - npx_data2 - xlsx", { skip_if_not_installed(pkg = "readxl") # get data if available, otherwise skip the test ref_res <- get_example_data("reference_results.rds") npx_file <- get_inst_extdata_file(filename = "npx_data2.xlsx") withr::with_tempfile( new = "tmp_wide_xlsx", pattern = "xlsx-wide-", fileext = ".xlsx", code = { # check that xlsx file can by copied without issues expect_no_condition( object = file.copy(npx_file, tmp_wide_xlsx) ) # check that data can be loaded expect_message( object = npx_df <- read_NPX(filename = tmp_wide_xlsx, out_df = "tibble"), regexp = "Identified 2 duplicates!" ) expect_message( object = npx_arrow <- read_NPX(filename = tmp_wide_xlsx, out_df = "arrow"), regexp = "Identified 2 duplicates!" ) # check that data frame exists expect(ok = exists("npx_df"), failure_message = "failed to read wide xlsx in tibble") expect(ok = exists("npx_arrow"), failure_message = "failed to read wide xlsx in arrow") # check that data set has correct number of rows and columns expected_rows <- 32384L expected_cols <- 12L expect_equal(object = nrow(npx_df), expected = expected_rows) expect_equal(object = ncol(npx_df), expected = expected_cols) expect_equal(object = nrow(npx_arrow), expected = expected_rows) expect_equal(object = ncol(npx_arrow), expected = expected_cols) # check that dataset has the correct column names expected_colnames <- c("SampleID", "NPX", "Panel", "Assay", "UniProt", "OlinkID", "Panel_Version", "PlateID", "QC_Warning", "LOD", "MissingFreq", "Olink NPX Signature Version") expect_identical( object = colnames(npx_df), expected = expected_colnames ) expect_identical( object = names(npx_arrow), expected = expected_colnames ) # check that excel and csv are identical # check that the correct values are returned lst_df <- expected_vs_legacy_df_prep( long_expected = npx_df, long_legacy = ref_res$npx_data2, olink_platform = "Target 96" ) # check identical to reference # making some harmless minor modifications to enable the match expect_equal( object = lst_df$df_expected |> dplyr::mutate( Panel = toupper(.data[["Panel"]]) ), expected = lst_df$df_legacy |> dplyr::mutate( Panel_Version = NA_character_, Panel = toupper(.data[["Panel"]]) ), tolerance = 1e-4 ) } ) } ) test_that( "data loads correctly - wide - npx_data1 - xlsx", { skip_if_not_installed(pkg = "readxl") # get data if available, otherwise skip the test ref_res <- get_example_data("reference_results.rds") npx_file <- get_inst_extdata_file(filename = "npx_data1.xlsx") withr::with_tempfile( new = "tmp_wide_xlsx", pattern = "xlsx-wide-", fileext = ".xlsx", code = { # check that xlsx file can by copied without issues expect_no_condition( object = file.copy(npx_file, tmp_wide_xlsx) ) # check that data load fails because we cannot determine platform expect_error( object = read_NPX(filename = tmp_wide_xlsx, out_df = "tibble"), regexp = "Unable to recognize the Olink platform from the input file" ) # check that data can be loaded expect_message( object = expect_warning( object = npx_df <- read_NPX(filename = tmp_wide_xlsx, olink_platform = "Target 96", out_df = "tibble"), regexp = "Unable to recognize the Olink platform from the input" ), regexp = "Identified 2 duplicates!" ) expect_message( object = expect_warning( object = npx_arrow <- read_NPX(filename = tmp_wide_xlsx, olink_platform = "Target 96", out_df = "arrow"), regexp = "Unable to recognize the Olink platform from the input" ), regexp = "Identified 2 duplicates!" ) # check that data frame exists expect(ok = exists("npx_df"), failure_message = "failed to read wide xlsx in tibble") expect(ok = exists("npx_arrow"), failure_message = "failed to read wide xlsx in arrow") # check that data set has correct number of rows and columns expected_rows <- 29440L expected_cols <- 12L expect_equal(object = nrow(npx_df), expected = expected_rows) expect_equal(object = ncol(npx_df), expected = expected_cols) expect_equal(object = nrow(npx_arrow), expected = expected_rows) expect_equal(object = ncol(npx_arrow), expected = expected_cols) # check that dataset has the correct column names expected_colnames <- c("SampleID", "NPX", "Panel", "Assay", "UniProt", "OlinkID", "Panel_Version", "PlateID", "QC_Warning", "LOD", "MissingFreq", "Olink NPX Signature Version") expect_identical( object = colnames(npx_df), expected = expected_colnames ) expect_identical( object = names(npx_arrow), expected = expected_colnames ) # check that excel and csv are identical # check that the correct values are returned lst_df <- expected_vs_legacy_df_prep( long_expected = npx_df, long_legacy = ref_res$npx_data1, olink_platform = "Target 96" ) # check identical to reference # making some harmless minor modifications to enable the match expect_equal( object = lst_df$df_expected |> dplyr::mutate( Panel = toupper(.data[["Panel"]]) ), expected = lst_df$df_legacy |> dplyr::mutate( Panel_Version = NA_character_, Panel = toupper(.data[["Panel"]]) ), tolerance = 1e-4 ) } ) } ) test_that( "data loads correctly - wide - csv", { # get data if available, otherwise skip the test ref_res <- get_example_data("reference_results.rds") npx_file <- get_inst_extdata_file(filename = "npx_data_wide_csv.csv") withr::with_tempfile( new = "tmp_wide_csv", pattern = "csv-wide-", fileext = ".csv", code = { # check that xlsx file can by copied without issues expect_no_condition( object = file.copy(npx_file, tmp_wide_csv) ) # check that data load fails because we cannot determine platform expect_error( object = read_NPX(filename = tmp_wide_csv, out_df = "tibble"), regexp = "Unable to recognize the Olink platform from the input file" ) # check that data can be loaded expect_message( object = expect_warning( object = npx_df <- read_NPX(filename = tmp_wide_csv, olink_platform = "Target 96", out_df = "tibble"), regexp = "Unable to recognize the Olink platform from the input" ), regexp = "Identified 2 duplicates!" ) expect_message( object = expect_warning( object = npx_arrow <- read_NPX(filename = tmp_wide_csv, olink_platform = "Target 96", out_df = "arrow"), regexp = "Unable to recognize the Olink platform from the input" ), regexp = "Identified 2 duplicates!" ) # check that data frame exists expect(ok = exists("npx_df"), failure_message = "failed to read wide csv in tibble") expect(ok = exists("npx_arrow"), failure_message = "failed to read wide csv in arrow") # check that data set has correct number of rows and columns expected_rows <- 29440L expected_cols <- 12L expect_equal(object = nrow(npx_df), expected = expected_rows) expect_equal(object = ncol(npx_df), expected = expected_cols) expect_equal(object = nrow(npx_arrow), expected = expected_rows) expect_equal(object = ncol(npx_arrow), expected = expected_cols) # check that dataset has the correct column names expected_colnames <- c("SampleID", "NPX", "Panel", "Assay", "UniProt", "OlinkID", "Panel_Version", "PlateID", "QC_Warning", "LOD", "MissingFreq", "Olink NPX Signature Version") expect_identical( object = colnames(npx_df), expected = expected_colnames ) expect_identical( object = names(npx_arrow), expected = expected_colnames ) ## check that it matches reference results expect_identical( object = dim(npx_df), expected = dim(ref_res$npx_data1) ) # check that excel and csv are identical # check that the correct values are returned lst_df <- expected_vs_legacy_df_prep( long_expected = npx_df, long_legacy = ref_res$npx_data1, olink_platform = "Target 96" ) # check identical to reference # making some harmless minor modifications to enable the match expect_equal( object = lst_df$df_expected |> dplyr::mutate( Panel = toupper(.data[["Panel"]]) ), expected = lst_df$df_legacy |> dplyr::mutate( Panel_Version = NA_character_, Panel = toupper(.data[["Panel"]]) ), tolerance = 1e-4 ) } ) } ) test_that( "data loads correctly - legacy - wide - npx_data1 - xlsx", { skip_if_not_installed(pkg = "readxl") # get data if available, otherwise skip the test ref_res <- get_example_data("reference_results.rds") npx_file <- get_inst_extdata_file(filename = "npx_data1.xlsx") withr::with_tempfile( new = "tmp_wide_xlsx", pattern = "xlsx-wide-", fileext = ".xlsx", code = { # check that xlsx file can by copied without issues expect_no_condition( object = file.copy(npx_file, tmp_wide_xlsx) ) # check that data can be loaded expect_warning( object = expect_warning( object = npx_df <- read_NPX(filename = tmp_wide_xlsx, olink_platform = "Target 96", out_df = "tibble", long_format = FALSE, data_type = "NPX", legacy = TRUE, quiet = TRUE), regexp = "You are using the function read_npx_legacy" ), regexp = "Unable to recognize the Olink platform from the input" ) # check that data frame exists expect(ok = exists("npx_df"), failure_message = "failed to read wide xlsx in tibble") # check that data set has correct number of rows and columns expected_rows <- 29440L expected_cols <- 12L expect_equal(object = nrow(npx_df), expected = expected_rows) expect_equal(object = ncol(npx_df), expected = expected_cols) # check that dataset has the correct column names expected_colnames <- c("SampleID", "Index", "OlinkID", "UniProt", "Assay", "MissingFreq", "Panel", "Panel_Version", "PlateID", "QC_Warning", "LOD", "NPX") expect_identical( object = colnames(npx_df), expected = expected_colnames ) ## check that it matches reference results expect_identical( object = dim(npx_df), expected = dim(ref_res$npx_data1) ) expect_equal( object = npx_df |> dplyr::arrange( .data[["OlinkID"]], .data[["Assay"]], .data[["SampleID"]] ), expected = ref_res$npx_data1 |> dplyr::select( dplyr::all_of(colnames(npx_df)) ) |> dplyr::arrange( .data[["OlinkID"]], .data[["Assay"]], .data[["SampleID"]] ), tolerance = 1e-4 ) } ) } )