test_that(
  "data loads correctly - long - parquet",
  {
    # get data if available, otherwise skip the test
    ref_res <- get_example_data("reference_results.rds")
    npx_file <- get_inst_extdata_file(filename = "npx_data_ext.parquet")

    withr::with_tempfile(
      new = "tmp_long_parquet",
      pattern = "parquet-long-",
      fileext = ".parquet",
      code = {
        # check that xlsx file can by copied without issues
        expect_no_condition(
          object = file.copy(npx_file, tmp_long_parquet)
        )

        # check that data can be loaded
        expect_no_condition(
          object = npx_df <- read_NPX(filename = tmp_long_parquet,
                                      out_df = "tibble")
        )

        expect_no_condition(
          object = npx_arrow <- read_NPX(filename = tmp_long_parquet,
                                         out_df = "arrow")
        )

        # check that data frame exists
        expect(ok = exists("npx_df"),
               failure_message = "failed to read long paruqet in tibble")
        expect(ok = exists("npx_arrow"),
               failure_message = "failed to read long paruqet in arrow")

        # check that data set has correct number of rows and columns
        expect_equal(object = nrow(npx_df), expected = 1L)
        expect_equal(object = ncol(npx_df), expected = 19L)
        expect_equal(object = nrow(npx_arrow), expected = 1L)
        expect_equal(object = ncol(npx_arrow), expected = 19L)

        # check that dataset has the correct column names
        expect_identical(
          object = colnames(npx_df),
          expected = c("SampleID", "SampleType", "WellID", "PlateID",
                       "DataAnalysisRefID", "OlinkID", "UniProt", "Assay",
                       "AssayType", "Panel", "Block", "Count", "ExtNPX", "NPX",
                       "Normalization", "PCNormalizedNPX", "AssayQC",
                       "SampleQC", "ExploreVersion")
        )
        expect_identical(
          object = names(npx_arrow),
          expected = c("SampleID", "SampleType", "WellID", "PlateID",
                       "DataAnalysisRefID", "OlinkID", "UniProt", "Assay",
                       "AssayType", "Panel", "Block", "Count", "ExtNPX", "NPX",
                       "Normalization", "PCNormalizedNPX", "AssayQC",
                       "SampleQC", "ExploreVersion")
        )

        # check identical to reference
        expect_equal(
          object = npx_df,
          expected = ref_res$npx_data_parquet,
          tolerance = 1e-4
        )
      }
    )
  }
)

test_that(
  "data loads correctly - long - csv",
  {
    # get data if available, otherwise skip the test
    ref_res <- get_example_data("reference_results.rds")
    npx_file <- get_inst_extdata_file(filename = "npx_data_long_csv.csv")

    withr::with_tempfile(
      new = "tmp_long_csv",
      pattern = "csv-long-",
      fileext = ".csv",
      code = {
        # check that xlsx file can by copied without issues
        expect_no_condition(
          object = file.copy(npx_file, tmp_long_csv)
        )

        # check that data can be loaded
        expect_no_condition(
          object = npx_df <- read_NPX(filename = tmp_long_csv,
                                      out_df = "tibble")
        )
        expect_no_condition(
          object = npx_arrow <- read_NPX(filename = tmp_long_csv,
                                         out_df = "arrow")
        )

        # check that data frame exists
        expect(ok = exists("npx_df"),
               failure_message = "failed to read long csv in tibble")
        expect(ok = exists("npx_arrow"),
               failure_message = "failed to read long csv in arrow")

        # check that data set has correct number of rows and columns
        expected_rows <- 1000L
        expected_cols <- 16L
        expect_equal(object = nrow(npx_df), expected = expected_rows)
        expect_equal(object = ncol(npx_df), expected = expected_cols)
        expect_equal(object = nrow(npx_arrow), expected = expected_rows)
        expect_equal(object = ncol(npx_arrow), expected = expected_cols)

        # check that dataset has the correct column names
        expected_colnames <- c("SampleID", "Index", "OlinkID", "UniProt",
                               "Assay", "MissingFreq", "Panel", "Panel_Lot_Nr",
                               "PlateID", "QC_Warning", "LOD", "NPX",
                               "Normalization", "Assay_Warning", "Sample_Type",
                               "ExploreVersion")
        expect_identical(
          object = colnames(npx_df),
          expected = expected_colnames
        )
        expect_identical(
          object = names(npx_arrow),
          expected = expected_colnames
        )

        # check identical to reference
        expect_equal(
          object = npx_df,
          expected = ref_res$npx_data_long_csv,
          tolerance = 1e-4
        )
      }
    )
  }
)

test_that(
  "data loads correctly - long - zip",
  {
    skip_if_not_installed("zip")

    # get data if available, otherwise skip the test
    ref_res <- get_example_data("reference_results.rds")
    npx_file <- get_inst_extdata_file(filename = "npx_data_long_zip.zip")

    withr::with_tempfile(
      new = "tmp_long_csv_zip",
      pattern = "csv-zip-long-",
      fileext = ".zip",
      code = {
        # check that xlsx file can by copied without issues
        expect_no_condition(
          object = file.copy(npx_file, tmp_long_csv_zip)
        )

        # check that data can be loaded
        expect_no_condition(
          object = npx_df <- read_NPX(filename = tmp_long_csv_zip,
                                      out_df = "tibble")
        )
        expect_no_condition(
          object = npx_arrow <- read_NPX(filename = tmp_long_csv_zip,
                                         out_df = "arrow")
        )

        # check that data frame exists
        expect(ok = exists("npx_df"),
               failure_message = "failed to read long zip csv in tibble")
        expect(ok = exists("npx_arrow"),
               failure_message = "failed to read long zip csv in arrow")

        # check that data set has correct number of rows and columns
        expected_rows <- 1000L
        expected_cols <- 16L
        expect_equal(object = nrow(npx_df), expected = expected_rows)
        expect_equal(object = ncol(npx_df), expected = expected_cols)
        expect_equal(object = nrow(npx_arrow), expected = expected_rows)
        expect_equal(object = ncol(npx_arrow), expected = expected_cols)

        # check that dataset has the correct column names
        expected_colnames <- c("SampleID", "Index", "OlinkID", "UniProt",
                               "Assay", "MissingFreq", "Panel", "Panel_Lot_Nr",
                               "PlateID", "QC_Warning", "LOD", "NPX",
                               "Normalization", "Assay_Warning", "Sample_Type",
                               "ExploreVersion")
        expect_identical(
          object = colnames(npx_df),
          expected = expected_colnames
        )
        expect_identical(
          object = names(npx_arrow),
          expected = expected_colnames
        )

        # check identical to reference
        expect_equal(
          object = npx_df,
          expected = ref_res$npx_data_long_zip,
          tolerance = 1e-4
        )
      }
    )
  }
)

test_that(
  "data loads correctly - wide - npx_data2 - xlsx",
  {
    skip_if_not_installed(pkg = "readxl")

    # get data if available, otherwise skip the test
    ref_res <- get_example_data("reference_results.rds")
    npx_file <- get_inst_extdata_file(filename = "npx_data2.xlsx")

    withr::with_tempfile(
      new = "tmp_wide_xlsx",
      pattern = "xlsx-wide-",
      fileext = ".xlsx",
      code = {
        # check that xlsx file can by copied without issues
        expect_no_condition(
          object = file.copy(npx_file, tmp_wide_xlsx)
        )

        # check that data can be loaded
        expect_message(
          object = npx_df <- read_NPX(filename = tmp_wide_xlsx,
                                      out_df = "tibble"),
          regexp = "Identified 2 duplicates!"
        )
        expect_message(
          object = npx_arrow <- read_NPX(filename = tmp_wide_xlsx,
                                         out_df = "arrow"),
          regexp = "Identified 2 duplicates!"
        )

        # check that data frame exists
        expect(ok = exists("npx_df"),
               failure_message = "failed to read wide xlsx in tibble")
        expect(ok = exists("npx_arrow"),
               failure_message = "failed to read wide xlsx in arrow")

        # check that data set has correct number of rows and columns
        expected_rows <- 32384L
        expected_cols <- 12L
        expect_equal(object = nrow(npx_df), expected = expected_rows)
        expect_equal(object = ncol(npx_df), expected = expected_cols)
        expect_equal(object = nrow(npx_arrow), expected = expected_rows)
        expect_equal(object = ncol(npx_arrow), expected = expected_cols)

        # check that dataset has the correct column names
        expected_colnames <- c("SampleID", "NPX", "Panel", "Assay", "UniProt",
                               "OlinkID", "Panel_Version", "PlateID",
                               "QC_Warning", "LOD", "MissingFreq",
                               "Olink NPX Signature Version")
        expect_identical(
          object = colnames(npx_df),
          expected = expected_colnames
        )
        expect_identical(
          object = names(npx_arrow),
          expected = expected_colnames
        )

        # check that excel and csv are identical
        # check that the correct values are returned
        lst_df <- expected_vs_legacy_df_prep(
          long_expected = npx_df,
          long_legacy = ref_res$npx_data2,
          olink_platform = "Target 96"
        )

        # check identical to reference
        # making some harmless minor modifications to enable the match
        expect_equal(
          object = lst_df$df_expected |>
            dplyr::mutate(
              Panel = toupper(.data[["Panel"]])
            ),
          expected = lst_df$df_legacy |>
            dplyr::mutate(
              Panel_Version = NA_character_,
              Panel = toupper(.data[["Panel"]])
            ),
          tolerance = 1e-4
        )
      }
    )
  }
)

test_that(
  "data loads correctly - wide - npx_data1 - xlsx",
  {
    skip_if_not_installed(pkg = "readxl")

    # get data if available, otherwise skip the test
    ref_res <- get_example_data("reference_results.rds")
    npx_file <- get_inst_extdata_file(filename = "npx_data1.xlsx")

    withr::with_tempfile(
      new = "tmp_wide_xlsx",
      pattern = "xlsx-wide-",
      fileext = ".xlsx",
      code = {
        # check that xlsx file can by copied without issues
        expect_no_condition(
          object = file.copy(npx_file, tmp_wide_xlsx)
        )

        # check that data load fails because we cannot determine platform
        expect_error(
          object = read_NPX(filename = tmp_wide_xlsx,
                            out_df = "tibble"),
          regexp = "Unable to recognize the Olink platform from the input file"
        )

        # check that data can be loaded
        expect_message(
          object = expect_warning(
            object = npx_df <- read_NPX(filename = tmp_wide_xlsx,
                                        olink_platform = "Target 96",
                                        out_df = "tibble"),
            regexp = "Unable to recognize the Olink platform from the input"
          ),
          regexp = "Identified 2 duplicates!"
        )
        expect_message(
          object = expect_warning(
            object = npx_arrow <- read_NPX(filename = tmp_wide_xlsx,
                                           olink_platform = "Target 96",
                                           out_df = "arrow"),
            regexp = "Unable to recognize the Olink platform from the input"
          ),
          regexp = "Identified 2 duplicates!"
        )

        # check that data frame exists
        expect(ok = exists("npx_df"),
               failure_message = "failed to read wide xlsx in tibble")
        expect(ok = exists("npx_arrow"),
               failure_message = "failed to read wide xlsx in arrow")

        # check that data set has correct number of rows and columns
        expected_rows <- 29440L
        expected_cols <- 12L
        expect_equal(object = nrow(npx_df), expected = expected_rows)
        expect_equal(object = ncol(npx_df), expected = expected_cols)
        expect_equal(object = nrow(npx_arrow), expected = expected_rows)
        expect_equal(object = ncol(npx_arrow), expected = expected_cols)

        # check that dataset has the correct column names
        expected_colnames <- c("SampleID", "NPX", "Panel", "Assay", "UniProt",
                               "OlinkID", "Panel_Version", "PlateID",
                               "QC_Warning", "LOD", "MissingFreq",
                               "Olink NPX Signature Version")
        expect_identical(
          object = colnames(npx_df),
          expected = expected_colnames
        )
        expect_identical(
          object = names(npx_arrow),
          expected = expected_colnames
        )

        # check that excel and csv are identical
        # check that the correct values are returned
        lst_df <- expected_vs_legacy_df_prep(
          long_expected = npx_df,
          long_legacy = ref_res$npx_data1,
          olink_platform = "Target 96"
        )

        # check identical to reference
        # making some harmless minor modifications to enable the match
        expect_equal(
          object = lst_df$df_expected |>
            dplyr::mutate(
              Panel = toupper(.data[["Panel"]])
            ),
          expected = lst_df$df_legacy |>
            dplyr::mutate(
              Panel_Version = NA_character_,
              Panel = toupper(.data[["Panel"]])
            ),
          tolerance = 1e-4
        )
      }
    )
  }
)

test_that(
  "data loads correctly - wide - csv",
  {
    # get data if available, otherwise skip the test
    ref_res <- get_example_data("reference_results.rds")
    npx_file <- get_inst_extdata_file(filename = "npx_data_wide_csv.csv")

    withr::with_tempfile(
      new = "tmp_wide_csv",
      pattern = "csv-wide-",
      fileext = ".csv",
      code = {
        # check that xlsx file can by copied without issues
        expect_no_condition(
          object = file.copy(npx_file, tmp_wide_csv)
        )

        # check that data load fails because we cannot determine platform
        expect_error(
          object = read_NPX(filename = tmp_wide_csv,
                            out_df = "tibble"),
          regexp = "Unable to recognize the Olink platform from the input file"
        )

        # check that data can be loaded
        expect_message(
          object = expect_warning(
            object = npx_df <- read_NPX(filename = tmp_wide_csv,
                                        olink_platform = "Target 96",
                                        out_df = "tibble"),
            regexp = "Unable to recognize the Olink platform from the input"
          ),
          regexp = "Identified 2 duplicates!"
        )
        expect_message(
          object = expect_warning(
            object = npx_arrow <- read_NPX(filename = tmp_wide_csv,
                                           olink_platform = "Target 96",
                                           out_df = "arrow"),
            regexp = "Unable to recognize the Olink platform from the input"
          ),
          regexp = "Identified 2 duplicates!"
        )

        # check that data frame exists
        expect(ok = exists("npx_df"),
               failure_message = "failed to read wide csv in tibble")
        expect(ok = exists("npx_arrow"),
               failure_message = "failed to read wide csv in arrow")

        # check that data set has correct number of rows and columns
        expected_rows <- 29440L
        expected_cols <- 12L
        expect_equal(object = nrow(npx_df), expected = expected_rows)
        expect_equal(object = ncol(npx_df), expected = expected_cols)
        expect_equal(object = nrow(npx_arrow), expected = expected_rows)
        expect_equal(object = ncol(npx_arrow), expected = expected_cols)

        # check that dataset has the correct column names
        expected_colnames <- c("SampleID", "NPX", "Panel", "Assay", "UniProt",
                               "OlinkID", "Panel_Version", "PlateID",
                               "QC_Warning", "LOD", "MissingFreq",
                               "Olink NPX Signature Version")
        expect_identical(
          object = colnames(npx_df),
          expected = expected_colnames
        )
        expect_identical(
          object = names(npx_arrow),
          expected = expected_colnames
        )

        ## check that it matches reference results
        expect_identical(
          object = dim(npx_df),
          expected = dim(ref_res$npx_data1)
        )

        # check that excel and csv are identical
        # check that the correct values are returned
        lst_df <- expected_vs_legacy_df_prep(
          long_expected = npx_df,
          long_legacy = ref_res$npx_data1,
          olink_platform = "Target 96"
        )

        # check identical to reference
        # making some harmless minor modifications to enable the match
        expect_equal(
          object = lst_df$df_expected |>
            dplyr::mutate(
              Panel = toupper(.data[["Panel"]])
            ),
          expected = lst_df$df_legacy |>
            dplyr::mutate(
              Panel_Version = NA_character_,
              Panel = toupper(.data[["Panel"]])
            ),
          tolerance = 1e-4
        )
      }
    )
  }
)

test_that(
  "data loads correctly - legacy - wide - npx_data1 - xlsx",
  {
    skip_if_not_installed(pkg = "readxl")

    # get data if available, otherwise skip the test
    ref_res <- get_example_data("reference_results.rds")
    npx_file <- get_inst_extdata_file(filename = "npx_data1.xlsx")

    withr::with_tempfile(
      new = "tmp_wide_xlsx",
      pattern = "xlsx-wide-",
      fileext = ".xlsx",
      code = {
        # check that xlsx file can by copied without issues
        expect_no_condition(
          object = file.copy(npx_file, tmp_wide_xlsx)
        )

        # check that data can be loaded
        expect_warning(
          object = expect_warning(
            object = npx_df <- read_NPX(filename = tmp_wide_xlsx,
                                        olink_platform = "Target 96",
                                        out_df = "tibble",
                                        long_format = FALSE,
                                        data_type = "NPX",
                                        legacy = TRUE,
                                        quiet = TRUE),
            regexp = "You are using the function read_npx_legacy"
          ),
          regexp = "Unable to recognize the Olink platform from the input"
        )

        # check that data frame exists
        expect(ok = exists("npx_df"),
               failure_message = "failed to read wide xlsx in tibble")

        # check that data set has correct number of rows and columns
        expected_rows <- 29440L
        expected_cols <- 12L
        expect_equal(object = nrow(npx_df), expected = expected_rows)
        expect_equal(object = ncol(npx_df), expected = expected_cols)

        # check that dataset has the correct column names
        expected_colnames <- c("SampleID", "Index", "OlinkID", "UniProt",
                               "Assay", "MissingFreq", "Panel", "Panel_Version",
                               "PlateID", "QC_Warning", "LOD", "NPX")
        expect_identical(
          object = colnames(npx_df),
          expected = expected_colnames
        )

        ## check that it matches reference results
        expect_identical(
          object = dim(npx_df),
          expected = dim(ref_res$npx_data1)
        )

        expect_equal(
          object = npx_df |>
            dplyr::arrange(
              .data[["OlinkID"]], .data[["Assay"]], .data[["SampleID"]]
            ),
          expected = ref_res$npx_data1 |>
            dplyr::select(
              dplyr::all_of(colnames(npx_df))
            ) |>
            dplyr::arrange(
              .data[["OlinkID"]], .data[["Assay"]], .data[["SampleID"]]
            ),
          tolerance = 1e-4
        )
      }
    )
  }
)