test_sql_cooc = function() { library(RSQLite) df_ehr = data.frame(Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4), Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4), Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2', 'C3', 'C4'), Count = 1:9) test_db_path = tempfile() test_db = dbConnect(SQLite(), test_db_path) dbWriteTable(test_db, 'df_monthly', df_ehr, overwrite = TRUE) dbExecute(test_db, "CREATE INDEX patient_idx ON df_monthly (Patient)") df_uniq_codes = unique(df_ehr['Parent_Code']) dbWriteTable(test_db, 'df_uniq_codes', df_uniq_codes, overwrite = TRUE) dbDisconnect(test_db) output_db_path = tempfile() sql_cooc(test_db_path, output_db_path) test_db = dbConnect(SQLite(), output_db_path) df_spm_cooc = dbGetQuery(test_db, 'select * from df_monthly;') dbDisconnect(test_db) expect_equal(df_spm_cooc, data.frame(V1 = c('C1', 'C1', 'C2', 'C3', 'C4'), V2 = c('C1', 'C2', 'C2', 'C3', 'C4'), value = c(16, 7, 12, 8, 9))) # exclude patterns, codes_dict, autoindex, overwrite_output df_ehr$Parent_Code %<>% ifelse(. == 'C1', 'C0000545', .) df_ehr$Parent_Code %<>% ifelse(. == 'C2', 'C0000578', .) file.remove(test_db_path) test_db = dbConnect(SQLite(), test_db_path) dbWriteTable(test_db, 'df_monthly', df_ehr) dbDisconnect(test_db) codes_dict_fpaths = list.files(system.file('dictionaries', package = 'nlpembeds'), full.names = TRUE) sql_cooc(test_db_path, output_db_path, exclude_code_pattern = 'C4', exclude_dict_pattern = 'C[0-9]', codes_dict_fpaths = codes_dict_fpaths, autoindex = TRUE, overwrite_output = TRUE) test_db = dbConnect(SQLite(), output_db_path) df_spm_cooc = dbGetQuery(test_db, 'select * from df_monthly;') dbDisconnect(test_db) expect_equal(df_spm_cooc, data.frame(V1 = c('C0000545', 'C0000545', 'C0000578'), V2 = c('C0000545', 'C0000578', 'C0000578'), value = c(16, 7, 12))) } test_that('sql_cooc', test_sql_cooc()) # debug test_cooc_batch = function() { df_ehr_batch1 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4), Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4), Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2', 'C3', 'C4'), Count = 1:9) df_ehr_batch2 = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4), Patient = c(5, 5, 6, 5, 6, 5, 5, 7, 8), Parent_Code = c('C5', 'C6', 'C6', 'C5', 'C5', 'C1', 'C2', 'C3', 'C4'), Count = 1:9) df_ehr = df_ehr_batch1 df_ehr = df_ehr_batch2 l_pat_ids = nlpembeds:::batch_splits(1:8, n_batch = 3) ## df_cooc = data.frame(Var1 = c(rep('C1', 3), rep('C2', 4)), Var2 = c(rep(c('C1', 'C2', 'C2'), 2), 'C3'), value = 1:7) ## spm_cooc = spm_cooc_batch code_freqs = Matrix::diag(spm_cooc) expect_true(!any(is.na(code_freqs))) expect_true(!any(code_freqs == 0)) # only appears when adding uniq_codes, expected for batches but not at end } #test_that('cooc_batch', test_cooc_batch())