# External-oracle cross-check: prove artoo's xpt bytes are real SAS XPORT by # reading them with pyreadstat (a third-party readstat-based reader), not just # round-tripping against artoo itself. Skipped on CRAN and wherever python / # pyreadstat is absent (haven is banned, so this is the only real-SAS oracle). # py_with_pyreadstat() lives in helper-pyreadstat.R (shared with the fixture). test_that("a artoo-written xpt reads correctly in pyreadstat", { skip_on_cran() py <- py_with_pyreadstat() skip_if(py == "", "python3 + pyreadstat not available") spec <- artoo_spec( cdisc_sdtm_datasets, cdisc_sdtm_variables, codelists = cdisc_codelists ) dm <- apply_spec(cdisc_dm, spec, "DM", conformance = "off") p <- withr::local_tempfile(fileext = ".xpt") write_xpt(dm, p, created = as.POSIXct("2020-01-01", tz = "UTC")) script <- withr::local_tempfile(fileext = ".py") writeLines( c( "import sys, json, pyreadstat", "df, meta = pyreadstat.read_xport(sys.argv[1])", "out = {", " 'nrow': int(df.shape[0]),", " 'cols': list(df.columns),", " 'usubjid': [str(x) for x in df['USUBJID']],", " 'age': [float(x) for x in df['AGE']],", " 'studyid_label': meta.column_names_to_labels.get('STUDYID', ''),", "}", "print(json.dumps(out))" ), script ) res <- system2(py, shQuote(c(script, p)), stdout = TRUE, stderr = TRUE) parsed <- jsonlite::fromJSON(paste(res, collapse = "\n")) expect_identical(parsed$nrow, nrow(dm)) expect_true(all(c("STUDYID", "USUBJID", "AGE") %in% parsed$cols)) expect_identical(parsed$usubjid, as.character(dm$USUBJID)) expect_equal(parsed$age, as.numeric(dm$AGE)) expect_identical(parsed$studyid_label, "Study Identifier") }) test_that("a pyreadstat-written xpt reads correctly in artoo", { skip_on_cran() py <- py_with_pyreadstat() skip_if(py == "", "python3 + pyreadstat not available") xpt <- withr::local_tempfile(fileext = ".xpt") script <- withr::local_tempfile(fileext = ".py") writeLines( c( "import sys, pandas as pd, pyreadstat", "df = pd.DataFrame({", " 'SUBJID': ['001', '002', '003'],", " 'AVAL': [1.5, 2.5, 3.5],", "})", "pyreadstat.write_xport(df, sys.argv[1], column_labels={'SUBJID': 'Subject', 'AVAL': 'Value'})" ), script ) status <- system2(py, shQuote(c(script, xpt)), stdout = FALSE, stderr = FALSE) skip_if(status != 0, "pyreadstat.write_xport failed") back <- read_xpt(xpt) expect_true(all(c("SUBJID", "AVAL") %in% names(back))) expect_identical(as.character(back$SUBJID), c("001", "002", "003")) expect_equal(as.numeric(back$AVAL), c(1.5, 2.5, 3.5)) })