context("ExportingCsvFiles")
library(dplyr)

tempFolder <- file.path(tempdir(),"exporting")
on.exit(unlink(tempFolder, recursive = TRUE), add = TRUE)

tempFolder2 <- file.path(tempdir(),"results")
on.exit(unlink(tempFolder2, recursive = TRUE), add = TRUE)

tempFolder3 <- file.path(tempdir(),"attrition")
on.exit(unlink(tempFolder3, recursive = TRUE), add = TRUE)

test_that("addDbAndSettings works for single table", {

  andromeda <- Andromeda::andromeda()
  andromeda$singleTable <- data.frame(
    column1 = 1:5,
    column2 = rep('empty', 5)
  )

  andromeda <- addDbAndSettings(
    andromeda = andromeda,
    databaseId = 1,
    settingId = 'madeup'
  )

  testthat::expect_true(names(andromeda) == 'singleTable')
  testthat::expect_true(sum(colnames(andromeda$singleTable) %in%
                          c("column1", "column2", "databaseId", "settingId")) == 4)
  testthat::expect_true(unique(as.data.frame(andromeda$singleTable)$databaseId) == 1)
  testthat::expect_true(unique(as.data.frame(andromeda$singleTable)$settingId) == 'madeup')

})


test_that("addDbAndSettings works for multiple tables including empty", {

  andromeda <- Andromeda::andromeda()
  andromeda$firstTable <- data.frame(
    column1 = 1:5,
    column2 = rep('empty', 5)
  )

  andromeda$secondTable <- data.frame(
    column3 = 1:5,
    column4 = rep('empty', 5)
  )

  # checking it works with empty
  andromeda$emptyTable <- data.frame(
    column5 = 1,
    column6 = 'fake'
  )[-1,]

  andromeda <-addDbAndSettings(
    andromeda = andromeda,
    databaseId = 1,
    settingId = 'madeup'
  )

  testthat::expect_true(sum(names(andromeda) %in% c('firstTable','secondTable', 'emptyTable')) == 3)
  testthat::expect_true(sum(colnames(andromeda$firstTable) %in%
                              c("column1", "column2", "databaseId", "settingId")) == 4)
  testthat::expect_true(sum(colnames(andromeda$secondTable) %in%
                              c("column3", "column4", "databaseId", "settingId")) == 4)
  testthat::expect_true(sum(colnames(andromeda$emptyTable) %in%
                              c("column5", "column6", "databaseId", "settingId")) == 4)

  testthat::expect_true(unique(as.data.frame(andromeda$firstTable)$databaseId) == 1)
  testthat::expect_true(unique(as.data.frame(andromeda$firstTable)$settingId) == 'madeup')
  testthat::expect_true(unique(as.data.frame(andromeda$secondTable)$databaseId) == 1)
  testthat::expect_true(unique(as.data.frame(andromeda$secondTable)$settingId) == 'madeup')

  testthat::expect_true(nrow(as.data.frame(andromeda$emptyTable)) == 0)

})

test_that("saveCharacterizationAndromeda", {

  andromeda <- Andromeda::andromeda()
  andromeda$firstTable <- data.frame(
    column1 = 1:5,
    column2 = rep('empty', 5)
  )

  saveCharacterizationAndromeda(
    andromeda = andromeda,
    outputFolder = tempFolder
  )

  testthat::expect_true(file.exists(file.path(tempFolder, 'result')))

  andromeda <- Andromeda::loadAndromeda(file.path(tempFolder, 'result'))

  testthat::expect_true(names(andromeda) == 'firstTable')

  testthat::expect_identical(
    as.data.frame(andromeda$firstTable),
    data.frame(
      column1 = 1:5,
      column2 = rep('empty', 5)
    )

  )

  file.remove(file.path(tempFolder, 'result'))

})


test_that("exportAndromedaSubfilesToCsv", {

  # inside tempFolder create folders with Andromeda result file

  dir.create(file.path(tempFolder, 'test_1'))
  on.exit(unlink(file.path(tempFolder, 'test_1'), recursive = TRUE), add = TRUE)
  dir.create(file.path(tempFolder, 'test_2'))
  on.exit(unlink(file.path(tempFolder, 'test_1'), recursive = TRUE), add = TRUE)

  andromeda1 <- Andromeda::andromeda()
  andromeda1$analysisRef <- data.frame(
    analysisId = 1:3,
    databaseId = 1,
    settingId = rep('hello', 3)
  )
  andromeda1$covariateRef <- data.frame(
    covariateId = 1:5,
    databaseId = 1,
    settingId = rep('none', 5)
  )
  andromeda1$targetCovariates <- data.frame(
    covariateId = 1:5,
    databaseId = 1,
    settingId = rep('none', 5),
    sumValue = c(4,20,100,2,25),
    averageValue = c(4,20,100,2,25)/200
  )

  andromeda2 <- Andromeda::andromeda()
  andromeda2$analysisRef <- data.frame(
    analysisId = 1,
    databaseId = 1,
    settingId = rep('hi', 1)
  )
  andromeda2$covariateRef <- data.frame(
    covariateId = 1,
    databaseId = 1,
    settingId = rep('none', 1)
  )

  Andromeda::saveAndromeda(
    andromeda = andromeda1,
    fileName = file.path(tempFolder, 'test_1', 'result')
      )
  Andromeda::saveAndromeda(
    andromeda = andromeda2,
    fileName = file.path(tempFolder, 'test_2', 'result')
  )

  # 1) General test

  exportAndromedaSubfilesToCsv(
    executionPath = tempFolder,
    outputFolder = tempFolder2,
    csvFilePrefix = '',
    batchSize = 100000,
    minCellCount = 0,
    tablesToExport = c("analysisRef", "covariateRef")
  )

  # make sure two csv files exist
  testthat::expect_true(sum(c("analysis_ref.csv", "covariate_ref.csv") %in% dir(tempFolder2)) == 2)

  # make sure nrows are correct
  analysisRef <- read.csv(file.path(tempFolder2, "analysis_ref.csv"))
  testthat::expect_true(nrow(analysisRef) == 4)

  covRef <- read.csv(file.path(tempFolder2, "covariate_ref.csv"))
  testthat::expect_true(nrow(covRef) == 5) # Not correct

  # check tracker
  #readRDS(file.path(tempFolder2, 'tracker.rds'))

  # make sure all columns are there
  testthat::expect_true(sum(colnames(analysisRef) %in% c('analysis_id', 'database_id', 'setting_id')) == 3)
  testthat::expect_true(sum(colnames(covRef) %in% c('covariate_id', 'database_id', 'setting_id')) == 3)


  # 2) Testing csv prefix
  exportAndromedaSubfilesToCsv(
    executionPath = tempFolder,
    outputFolder = tempFolder2,
    csvFilePrefix = 'c_',
    batchSize = 100000,
    minCellCount = 0,
    tablesToExport = c("analysisRef", "covariateRef")
  )

  testthat::expect_true(sum(c("c_analysis_ref.csv", "c_covariate_ref.csv") %in% dir(tempFolder2)) == 2)


  # 3) Testing batchSize
  exportAndromedaSubfilesToCsv(
    executionPath = tempFolder,
    outputFolder = tempFolder2,
    csvFilePrefix = 'c_',
    batchSize = 1,
    minCellCount = 0,
    tablesToExport = c("analysisRef", "covariateRef")
  )
  # make sure two csv files exist
  testthat::expect_true(sum(c("c_analysis_ref.csv", "c_covariate_ref.csv") %in% dir(tempFolder2)) == 2)

  # make sure nrows are correct
  analysisRef <- read.csv(file.path(tempFolder2, "c_analysis_ref.csv"))
  testthat::expect_true(nrow(analysisRef) == 4)

  covRef <- read.csv(file.path(tempFolder2, "c_covariate_ref.csv"))
  testthat::expect_true(nrow(covRef) == 5) # Not correct

  # check tracker
  #readRDS(file.path(tempFolder2, 'tracker.rds'))

  # make sure all columns are there
  testthat::expect_true(sum(colnames(analysisRef) %in% c('analysis_id', 'database_id', 'setting_id')) == 3)
  testthat::expect_true(sum(colnames(covRef) %in% c('covariate_id', 'database_id', 'setting_id')) == 3)


  # 4) Testing minCellCount - will test more extensively below in censorResults test
  exportAndromedaSubfilesToCsv(
    executionPath = tempFolder,
    outputFolder = tempFolder2,
    csvFilePrefix = 'c_',
    batchSize = 10,
    minCellCount = 10000,
    tablesToExport = c("analysisRef", "covariateRef", "targetCovariates")
  )

  testthat::expect_true(sum(c("c_analysis_ref.csv", "c_covariate_ref.csv", "c_target_covariates.csv") %in% dir(tempFolder2)) == 3)

  targetCovs <- read.csv(file.path(tempFolder2, "c_target_covariates.csv"))
  testthat::expect_true(nrow(targetCovs) == 5)

  testthat::expect_true(unique(targetCovs$sum_value) == -10000)
  testthat::expect_true(is.na(unique(targetCovs$average_value)))

})


test_that("removeRedundant", {
  dir.create(file.path(tempFolder, 'test_3'))
  on.exit(unlink(file.path(tempFolder, 'test_3'), recursive = TRUE), add = TRUE)

  andromeda1 <- Andromeda::andromeda()
  andromeda1$analysisRef <- data.frame(
    analysisId = 1:100,
    databaseId = 1,
    settingId = rep('hello', 100)
  )
  andromeda1$covariateRef <- data.frame(
    covariateId = 1:20,
    databaseId = 1,
    settingId = rep('none', 20)
  )


  anIds <- as.data.frame(
    andromeda1$analysisRef
  ) %>%
    dplyr::filter(.data$analysisId %in% c(1,99)) %>%
    dplyr::mutate(
      uniqueId = paste0(.data$settingId, "-", as.character(format(as.double(.data$analysisId), nsmall = 0,  scientific = FALSE, trim = TRUE )))
    ) %>%
    dplyr::select("uniqueId") %>%
    dplyr::pull()

  covIds <- as.data.frame(
    andromeda1$covariateRef
  ) %>%
    dplyr::filter(.data$covariateId %in% c(5,6,18)) %>%
    dplyr::mutate(
      uniqueId = paste0(.data$settingId, "-", as.character(format(as.double(.data$covariateId), nsmall = 0,  scientific = FALSE, trim = TRUE )))
    ) %>%
    dplyr::select("uniqueId") %>%
    dplyr::pull()

  tracker <- list(
    covariateRefTracker = covIds,
    analysisRefTracker = anIds
  )

  saveRDS(tracker, file.path(tempFolder2, 'tracker.rds'))


  # 1) covariateRef

  covref <- removeRedundant(
    andromeda = andromeda1,
    tableName = 'covariateRef',
    csvTrackerFile = file.path(tempFolder2, 'tracker.rds')
  )

  # 17 new ids should be added
  updatedTracker <- readRDS(file.path(tempFolder2, 'tracker.rds'))
  testthat::expect_true(length(updatedTracker$covariateRefTracker) == 20)
  # covariateIds c(5,6,18) should be gone
  testthat::expect_true(nrow(as.data.frame(covref)) == 17)
  testthat::expect_true(sum(as.data.frame(covref)$covariateId %in% c(5,6,18)) == 0)


  # 1) analysisRef

  anref <- removeRedundant(
    andromeda = andromeda1,
    tableName = 'analysisRef',
    csvTrackerFile = file.path(tempFolder2, 'tracker.rds')
  )

  # 17 new ids should be added
  updatedTracker <- readRDS(file.path(tempFolder2, 'tracker.rds'))
  testthat::expect_true(length(updatedTracker$analysisRefTracker) == 100)
  # analysisIds c(1,99) should be gone
  testthat::expect_true(nrow(as.data.frame(anref)) == 98)
  testthat::expect_true(sum(as.data.frame(anref)$analysisId %in% c(1,99)) == 0)


})

test_that("censorResults", {

  # test targetCovariates
  data <- data.frame(
    covariateId = 1:5,
    databaseId = '1',
    settingId = 'madeup',
    sumValue = c(4,1,11,14,150),
    averageValue = c(4,1,11,14,150)/200
  )

  newdata <- censorResults(
    data = data,
    tableName = 'targetCovariates',
    minCellCount = 0
  )

  # check minCellCount 0 does nothing
  testthat::expect_identical(data, newdata)

  newdata <- censorResults(
    data = data,
    tableName = 'targetCovariates',
    minCellCount = 10
  )
   censored <- data$sumValue < 10
   testthat::expect_true(unique(newdata$sumValue[censored]) == -10)
   testthat::expect_true(is.na(unique(newdata$averageValue[censored])))

   notcensored <- data$sumValue >= 10
   testthat::expect_identical(newdata$sumValue[notcensored], data$sumValue[notcensored])

  # test riskFactorCovariates
  data <- data.frame(
    covariateId = 1:5,
    databaseId = '1',
    settingId = 'madeup',
    caseSumValue = c(4,1,11,14,150),
    caseAverageValue = c(4,1,11,14,150)/200,
    nonCaseSumValue = c(1,0,100,90,50),
    nonCaseAverageValue = c(1,0,100,90,50)/100
  )

  newdata <- censorResults(
    data = data,
    tableName = 'riskFactorCovariates',
    minCellCount = 10
  )

  censored <- data$caseSumValue < 10 & data$caseSumValue !=0
  testthat::expect_true(unique(newdata$caseSumValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$caseAverageValue[censored])))
  notcensored <- data$caseSumValue >= 10 | data$caseSumValue == 0
  testthat::expect_identical(newdata$caseSumValue[notcensored], data$caseSumValue[notcensored])

  censored <- data$nonCaseSumValue < 10 & data$nonCaseSumValue !=0
  testthat::expect_true(unique(newdata$nonCaseSumValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$nonCaseAverageValue[censored])))
  notcensored <- data$nonCaseSumValue >= 10 | data$nonCaseSumValue == 0
  testthat::expect_identical(newdata$nonCaseSumValue[notcensored], data$nonCaseSumValue[notcensored])


  # test caseSeriesCovariates
  data <- data.frame(
    covariateId = 1:5,
    databaseId = '1',
    settingId = 'madeup',
    beforeSumValue = c(4,1,11,14,150),
    beforeAverageValue = c(4,1,11,14,150)/200,
    duringSumValue = c(1,0,100,90,50),
    duringAverageValue = c(1,0,100,90,50)/200,
    afterSumValue = c(1,0,200,9,50),
    afterAverageValue = c(1,0,200,9,50)/200
  )

  newdata <- censorResults(
    data = data,
    tableName = 'caseSeriesCovariates',
    minCellCount = 10
  )

  censored <- data$beforeSumValue < 10 & data$beforeSumValue !=0
  testthat::expect_true(unique(newdata$beforeSumValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$beforeAverageValue[censored])))
  notcensored <- data$beforeSumValue >= 10 | data$beforeSumValue == 0
  testthat::expect_identical(newdata$beforeSumValue[notcensored], data$beforeSumValue[notcensored])

  censored <- data$duringSumValue < 10 & data$duringSumValue !=0
  testthat::expect_true(unique(newdata$duringSumValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$duringAverageValue[censored])))
  notcensored <- data$duringSumValue >= 10 | data$duringSumValue == 0
  testthat::expect_identical(newdata$duringSumValue[notcensored], data$duringSumValue[notcensored])

  censored <- data$afterSumValue < 10 & data$afterSumValue !=0
  testthat::expect_true(unique(newdata$afterSumValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$afterAverageValue[censored])))
  notcensored <- data$afterumValue >= 10 | data$afterSumValue == 0
  testthat::expect_identical(newdata$afterSumValue[notcensored], data$afterSumValue[notcensored])


  # CONTINIOUS COVARIATES


  # Time to event
  data <- data.frame(
    databaseId = '1',
    targetCohortDefinitionId = 1,
    outcomeCohortDefinitionId = 3,
    outcomeType = 'first',
    targetOutcomeType = 'after last',
    timeToEvent = c(1,2,3,4,5,10),
    numEvents = c(10,8,12,100,0,5),
    timeScale = 'per 1-day'
  )

  newdata <- censorResults(
    data = data,
    tableName = 'timeToEvent',
    minCellCount = 10
  )
  censored <- data$numEvents < 10 & data$numEvents !=0
  testthat::expect_true(unique(newdata$numEvents[censored]) == -10)
  notcensored <- data$numEvents  >= 10 | data$numEvents == 0
  testthat::expect_identical(newdata$numEvents[notcensored], data$numEvents[notcensored])


  # Dechallenge- rechall
  data <- data.frame(
    databaseId = '1',
    dechallengeStopInterval = 30,
    dechallengeEvaluationWindow = 30,
    targetCohortDefinitionId = 1,
    outcomeCohortDefinitionId = 3,
    numExposureEras = c(100,10,3,5),
    numPersonsExposed = c(50,10,3,5),
    numCases = c(12,5,1,0),
    dechallengeAttempt = c(12,5,1,0),
    dechallengeFail = c(6,5,1,0),
    dechallengeSuccess = c(6,0,0,0),
    rechallengeAttempt = c(12,5,1,0),
    rechallengeFail = c(7,2,1,0),
    rechallengeSuccess = c(5,3,0,0),
    pctDechallengeAttempt = 0.2,
    pctDechallengeSuccess = 0.2,
    pctDechallengeFail = 0.2,
    pctRechallengeAttempt = 0.2,
    pctRechallengeSuccess = 0.2,
    pctRechallengeFail = 0.2
  )
  newdata <- censorResults(
    data = data,
    tableName = 'dechallengeRechallenge',
    minCellCount = 5
  )

  censored <- data$numExposureEras < 5 & data$numExposureEras !=0
  testthat::expect_true(unique(newdata$numExposureEras[censored]) == -5)
  notcensored <- data$numExposureEras  >= 5 | data$numExposureEras == 0
  testthat::expect_identical(newdata$numExposureEras[notcensored], data$numExposureEras[notcensored])

  #numPersonsExposed
  censored <- data$numExposureEras < 5 & data$numPersonsExposed !=0
  testthat::expect_true(unique(newdata$numPersonsExposed[censored]) == -5)
  notcensored <- data$numPersonsExposed  >= 5 | data$numPersonsExposed == 0
  testthat::expect_identical(newdata$numPersonsExposed[notcensored], data$numPersonsExposed[notcensored])

  # numCases
  censored <- data$numCases < 5 & data$numCases !=0
  testthat::expect_true(unique(newdata$numCases[censored]) == -5)
  notcensored <- data$numCases  >= 5 | data$numCases == 0
  testthat::expect_identical(newdata$numCases[notcensored], data$numCases[notcensored])

  #dechallengeAttempt - links to pctDechallengeAttempt
  censored <- data$dechallengeAttempt < 5 & data$dechallengeAttempt !=0
  testthat::expect_true(unique(newdata$dechallengeAttempt[censored]) == -5)
  testthat::expect_true(is.na(unique(newdata$pctDechallengeAttempt[censored])))
  notcensored <- data$dechallengeAttempt  >= 5 | data$dechallengeAttempt == 0
  testthat::expect_identical(newdata$dechallengeAttempt[notcensored], data$dechallengeAttempt[notcensored])

  #dechallengeFail - links to dechallengeSuccess, pctDechallengeFail, pctDechallengeSuccess
  censored <- data$dechallengeFail < 5 & data$dechallengeFail !=0
  testthat::expect_true(unique(newdata$dechallengeFail[censored]) == -5)
  testthat::expect_true(unique(newdata$dechallengeSuccess[censored]) == -5)
  testthat::expect_true(is.na(unique(newdata$pctDechallengeFail[censored])))
  testthat::expect_true(is.na(unique(newdata$pctDechallengeSuccess[censored])))
  notcensored <- data$dechallengeFail  >= 5 | data$dechallengeFail == 0
  testthat::expect_identical(newdata$dechallengeFail[notcensored], data$dechallengeFail[notcensored])

  #rechallengeAttempt - links to pctRechallengeAttempt
  censored <- data$rechallengeAttempt < 5 & data$rechallengeAttempt !=0
  testthat::expect_true(unique(newdata$rechallengeAttempt[censored]) == -5)
  testthat::expect_true(is.na(unique(newdata$pctRechallengeAttempt[censored])))
  notcensored <- data$rechallengeAttempt  >= 5 | data$rechallengeAttempt == 0
  testthat::expect_identical(newdata$rechallengeAttempt[notcensored], data$rechallengeAttempt[notcensored])

  #rechallengeFail - links to rechallengeSuccess, pctRechallengeFail, pctRechallengeSuccess
  censored <- data$rechallengeFail < 5 & data$rechallengeFail !=0
  testthat::expect_true(unique(newdata$rechallengeFail[censored]) == -5)
  testthat::expect_true(unique(newdata$rechallengeSuccess[censored]) == -5)
  testthat::expect_true(is.na(unique(newdata$pctRechallengeFail[censored])))
  testthat::expect_true(is.na(unique(newdata$pctRechallengeSuccess[censored])))
  notcensored <- data$rechallengeFail  >= 5 | data$rechallengeFail == 0
  testthat::expect_identical(newdata$rechallengeFail[notcensored], data$rechallengeFail[notcensored])


  # CONTINUOUS COVS
  #caseSeriesCovariatesContinuous
  data <- data.frame(
    covariateId = 1:5,
    databaseId = '1',
    settingId = 'madeup',
    beforeCountValue = c(4,1,11,14,150),
    beforeAverageValue = c(4,1,11,14,150)/200,
    beforeMinValue = rep(1,5),
    beforeMaxValue = rep(1,5),
    beforeStandardDeviation = rep(1,5),
    beforeMedianValue = rep(1,5),
    beforeP10Value = rep(1,5),
    beforeP25Value = rep(1,5),
    beforeP75Value = rep(1,5),
    beforeP90Value =rep(1,5),

    duringCountValue = c(1,0,100,90,50),
    duringAverageValue = c(1,0,100,90,50)/200,
    duringMinValue = rep(1,5),
    duringMaxValue = rep(1,5),
    duringStandardDeviation = rep(1,5),
    duringMedianValue = rep(1,5),
    duringP10Value = rep(1,5),
    duringP25Value = rep(1,5),
    duringP75Value = rep(1,5),
    duringP90Value =rep(1,5),

    afterCountValue = c(1,0,200,9,50),
    afterAverageValue = c(1,0,200,9,50)/200,
    afterMinValue = rep(1,5),
    afterMaxValue = rep(1,5),
    afterStandardDeviation = rep(1,5),
    afterMedianValue = rep(1,5),
    afterP10Value = rep(1,5),
    afterP25Value = rep(1,5),
    afterP75Value = rep(1,5),
    afterP90Value =rep(1,5)
  )

  newdata <- censorResults(
    data = data,
    tableName = 'caseSeriesCovariatesContinuous',
    minCellCount = 10
  )

  censored <- data$beforeCountValue < 10 & data$beforeCountValue !=0
  testthat::expect_true(unique(newdata$beforeCountValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$beforeAverageValue[censored])))
  testthat::expect_true(is.na(unique(newdata$beforeMinValue[censored])))
  testthat::expect_true(is.na(unique(newdata$beforeMaxValue[censored])))
  testthat::expect_true(is.na(unique(newdata$beforeMedianValue[censored])))
  testthat::expect_true(is.na(unique(newdata$beforeP10Value[censored])))
  testthat::expect_true(is.na(unique(newdata$beforeP25Value[censored])))
  testthat::expect_true(is.na(unique(newdata$beforeP75Value[censored])))
  testthat::expect_true(is.na(unique(newdata$beforeP90Value[censored])))
  testthat::expect_true(is.na(unique(newdata$beforeStandardDeviation[censored])))
  notcensored <- data$beforeCountValue >= 10 | data$beforeCountValue == 0
  testthat::expect_identical(newdata$beforeCountValue[notcensored], data$beforeCountValue[notcensored])

  censored <- data$duringCountValue < 10 & data$duringCountValue !=0
  testthat::expect_true(unique(newdata$duringCountValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$duringAverageValue[censored])))
  testthat::expect_true(is.na(unique(newdata$duringMinValue[censored])))
  testthat::expect_true(is.na(unique(newdata$duringMaxValue[censored])))
  testthat::expect_true(is.na(unique(newdata$duringMedianValue[censored])))
  testthat::expect_true(is.na(unique(newdata$duringP10Value[censored])))
  testthat::expect_true(is.na(unique(newdata$duringP25Value[censored])))
  testthat::expect_true(is.na(unique(newdata$duringP75Value[censored])))
  testthat::expect_true(is.na(unique(newdata$duringP90Value[censored])))
  testthat::expect_true(is.na(unique(newdata$duringStandardDeviation[censored])))
  notcensored <- data$duringCountValue >= 10 | data$duringCountValue == 0
  testthat::expect_identical(newdata$duringCountValue[notcensored], data$duringCountValue[notcensored])

  censored <- data$afterCountValue < 10 & data$afterCountValue !=0
  testthat::expect_true(unique(newdata$afterCountValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$afterAverageValue[censored])))
  testthat::expect_true(is.na(unique(newdata$afterMinValue[censored])))
  testthat::expect_true(is.na(unique(newdata$afterMaxValue[censored])))
  testthat::expect_true(is.na(unique(newdata$afterMedianValue[censored])))
  testthat::expect_true(is.na(unique(newdata$afterP10Value[censored])))
  testthat::expect_true(is.na(unique(newdata$afterP25Value[censored])))
  testthat::expect_true(is.na(unique(newdata$afterP75Value[censored])))
  testthat::expect_true(is.na(unique(newdata$afterP90Value[censored])))
  testthat::expect_true(is.na(unique(newdata$afterStandardDeviation[censored])))
  notcensored <- data$afterCountValue >= 10 | data$afterCountValue == 0
  testthat::expect_identical(newdata$afterCountValue[notcensored], data$afterCountValue[notcensored])


  # Risk Factor continuous
  data <- data.frame(
    covariateId = 1:5,
    databaseId = '1',
    settingId = 'madeup',

    caseCountValue = c(4,1,11,14,150),
    caseAverageValue = c(4,1,11,14,150)/200,
    caseMinValue = rep(1,5),
    caseMaxValue = rep(1,5),
    caseStandardDeviation = rep(1,5),
    caseMedianValue = rep(1,5),
    caseP10Value = rep(1,5),
    caseP25Value = rep(1,5),
    caseP75Value = rep(1,5),
    caseP90Value =rep(1,5),

    nonCaseCountValue = c(1,0,100,90,50),
    nonCaseAverageValue = c(1,0,100,90,50)/200,
    nonCaseMinValue = rep(1,5),
    nonCaseMaxValue = rep(1,5),
    nonCaseStandardDeviation = rep(1,5),
    nonCaseMedianValue = rep(1,5),
    nonCaseP10Value = rep(1,5),
    nonCaseP25Value = rep(1,5),
    nonCaseP75Value = rep(1,5),
    nonCaseP90Value =rep(1,5)
  )

  newdata <- censorResults(
    data = data,
    tableName = 'riskFactorCovariatesContinuous',
    minCellCount = 10
  )

  censored <- data$caseCountValue < 10 & data$caseCountValue !=0
  testthat::expect_true(unique(newdata$caseCountValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$caseAverageValue[censored])))
  testthat::expect_true(is.na(unique(newdata$caseMinValue[censored])))
  testthat::expect_true(is.na(unique(newdata$caseMaxValue[censored])))
  testthat::expect_true(is.na(unique(newdata$caseMedianValue[censored])))
  testthat::expect_true(is.na(unique(newdata$caseP10Value[censored])))
  testthat::expect_true(is.na(unique(newdata$caseP25Value[censored])))
  testthat::expect_true(is.na(unique(newdata$caseP75Value[censored])))
  testthat::expect_true(is.na(unique(newdata$caseP90Value[censored])))
  testthat::expect_true(is.na(unique(newdata$caseStandardDeviation[censored])))
  notcensored <- data$caseCountValue >= 10 | data$caseCountValue == 0
  testthat::expect_identical(newdata$caseCountValue[notcensored], data$caseCountValue[notcensored])

  censored <- data$nonCaseCountValue < 10 & data$nonCaseCountValue !=0
  testthat::expect_true(unique(newdata$nonCaseCountValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$nonCaseAverageValue[censored])))
  testthat::expect_true(is.na(unique(newdata$nonCaseMinValue[censored])))
  testthat::expect_true(is.na(unique(newdata$nonCaseMaxValue[censored])))
  testthat::expect_true(is.na(unique(newdata$nonCaseMedianValue[censored])))
  testthat::expect_true(is.na(unique(newdata$nonCaseP10Value[censored])))
  testthat::expect_true(is.na(unique(newdata$nonCaseP25Value[censored])))
  testthat::expect_true(is.na(unique(newdata$nonCaseP75Value[censored])))
  testthat::expect_true(is.na(unique(newdata$nonCaseP90Value[censored])))
  testthat::expect_true(is.na(unique(newdata$nonCaseStandardDeviation[censored])))
  notcensored <- data$nonCaseCountValue >= 10 | data$nonCaseCountValue == 0
  testthat::expect_identical(newdata$nonCaseCountValue[notcensored], data$nonCaseCountValue[notcensored])


  # target continuous
  data <- data.frame(
    covariateId = 1:5,
    databaseId = '1',
    settingId = 'madeup',
    countValue = c(4,1,11,14,150),
    averageValue = c(4,1,11,14,150)/200,
    minValue = rep(1,5),
    maxValue = rep(1,5),
    standardDeviation = rep(1,5),
    medianValue = rep(1,5),
    p10Value = rep(1,5),
    p25Value = rep(1,5),
    p75Value = rep(1,5),
    p90Value =rep(1,5)
  )

  newdata <- censorResults(
    data = data,
    tableName = 'targetCovariatesContinuous',
    minCellCount = 10
  )

  censored <- data$countValue < 10 & data$countValue !=0
  testthat::expect_true(unique(newdata$countValue[censored]) == -10)
  testthat::expect_true(is.na(unique(newdata$averageValue[censored])))
  testthat::expect_true(is.na(unique(newdata$minValue[censored])))
  testthat::expect_true(is.na(unique(newdata$maxValue[censored])))
  testthat::expect_true(is.na(unique(newdata$medianValue[censored])))
  testthat::expect_true(is.na(unique(newdata$p10Value[censored])))
  testthat::expect_true(is.na(unique(newdata$p25Value[censored])))
  testthat::expect_true(is.na(unique(newdata$p75Value[censored])))
  testthat::expect_true(is.na(unique(newdata$p90Value[censored])))
  testthat::expect_true(is.na(unique(newdata$standardDeviation[censored])))
  notcensored <- data$countValue >= 10 | data$countValue == 0
  testthat::expect_identical(newdata$countValue[notcensored], data$countValue[notcensored])

})


test_that("exportAttrition", {

  # create example attrition
  andromeda <- Andromeda::andromeda()
  andromeda$attrition <- data.frame(
    cohortDefinitionId = c(10,20,30,40, 11,21,31,12,22,32),
    attrReason = c('Target first in 365 - 365 prior obs',
                   'Target first in 365 - 365 prior obs',
                   'Target first in 365 - 365 prior obs',
                   'Target first in 365 - 365 prior obs',
                   'Cases','Cases','Cases',
                   '3. Has outcome during TAR',
                   '3. Has outcome during TAR',
                   '3. Has outcome during TAR'
                   ),
    n = c(1000,50,400,350,
          50,10,60,
          50,10,60
          ),
    databaseId = 'db',
    settingId = 'set1'
  )

  # save to temp folder
  saveCharacterizationAndromeda(
    andromeda = andromeda,
    outputFolder = file.path(tempFolder3,'attrition')
      )

  # now create target and case settings
  target_settings <- data.frame(
    target_id	= c(1,2,3,4),
    limit_to_first_in_n_days	= rep(365, 4),
    min_prior_observation	= rep(365, 4),
    setting_id	= 'set1',
    characterization_target_id	= c(10,20,30,40),
    database_id = 'db'
  )

  case_settings <- data.frame(
    outcome_id	= rep(3,3),
    outcome_washout_days	= rep(90,3),
    risk_window_start	= rep(1,3),
    start_anchor = rep('cohort_start',3),
    risk_window_end	= rep(365,3),
    end_anchor = rep('cohort_start',3),
    runtype = rep('PLP',3),
    characterization_case_id	= c(1,2,3),
    setting_id	= 'set1',
    characterization_target_id	= c(10,20,40),
    database_id = 'db'
  )

  utils::write.csv(
    x = target_settings,
    file = file.path(tempFolder3, 'c_target_settings.csv')
      )

  utils::write.csv(
    x = case_settings,
    file = file.path(tempFolder3, 'c_case_settings.csv')
  )

exportAttrition(
    executionPath = tempFolder3,
    outputFolder = tempFolder3,
    csvFilePrefix = 'c_',
    minCellCount = 0
)

# load attrition
testthat::expect_true(file.exists(file.path(tempFolder3, 'c_attrition.csv')))

attrition <- utils::read.csv(file.path(tempFolder3, 'c_attrition.csv'))
testthat::expect_true(nrow(attrition) == 16)
testthat::expect_true(sum(colnames(attrition) %in% c('cohort_definition_id', 'attr_reason', 'n', 'database_id', 'setting_id')) == 5)


# now test the minCellCount
exportAttrition(
  executionPath = tempFolder3,
  outputFolder = tempFolder3,
  csvFilePrefix = 'c_',
  minCellCount = 50
)
attrition <- utils::read.csv(file.path(tempFolder3, 'c_attrition.csv'))
testthat::expect_true(sum(attrition$n < 50 & attrition$n != -50) == 0)
testthat::expect_true(sum(colnames(attrition) %in% c('cohort_definition_id', 'attr_reason', 'n', 'database_id', 'setting_id')) == 5)

# now test csvFilePrefix
utils::write.csv(
  x = target_settings,
  file = file.path(tempFolder3, 'cccd_target_settings.csv')
)

utils::write.csv(
  x = case_settings,
  file = file.path(tempFolder3, 'cccd_case_settings.csv')
)
exportAttrition(
  executionPath = tempFolder3,
  outputFolder = tempFolder3,
  csvFilePrefix = 'cccd_',
  minCellCount = 12
)
attrition <- utils::read.csv(file.path(tempFolder3, 'cccd_attrition.csv'))
testthat::expect_true(sum(attrition$n < 12 & attrition$n != -12) == 0)
testthat::expect_true(sum(colnames(attrition) %in% c('cohort_definition_id', 'attr_reason', 'n', 'database_id', 'setting_id')) == 5)


# test when no case_settings
utils::write.csv(
  x = target_settings,
  file = file.path(tempFolder3, 'c2_target_settings.csv')
)
return <- exportAttrition(
  executionPath = tempFolder3,
  outputFolder = tempFolder3,
  csvFilePrefix = 'c2_',
  minCellCount = 12
)
attrition <- utils::read.csv(file.path(tempFolder3, 'c2_attrition.csv'))
testthat::expect_true(sum(attrition$n < 12 & attrition$n != -12) == 0)
testthat::expect_true(sum(colnames(attrition) %in% c('cohort_definition_id', 'attr_reason', 'n', 'database_id', 'setting_id')) == 5)
testthat::expect_true(nrow(attrition) == 4)

# test no target or case settings
return <- exportAttrition(
  executionPath = tempFolder3,
  outputFolder = tempFolder3,
  csvFilePrefix = 'c3_',
  minCellCount = 12
)
testthat::expect_false(return)
testthat::expect_true(!file.exists(file.path(tempFolder3, 'c3_attrition.csv')))

})