df_ehr = data.frame(Month = c(1, 1, 1, 2, 2, 3, 3, 4, 4), Patient = c(1, 1, 2, 1, 2, 1, 1, 3, 4), Parent_Code = c('C1', 'C2', 'C2', 'C1', 'C1', 'C1', 'C2', 'C3', 'C4'), Count = 1:9) spm_cooc = build_df_cooc(df_ehr) # having codes that sort differently as numerics or chars df_ehr$Parent_Code = c(1, 10, 10, 1, 1, 1, 10, 2, 3) spm_cooc_rev = build_df_cooc(df_ehr) m_pmi = get_pmi(spm_cooc) m_pmi_rev = get_pmi(spm_cooc_rev) test_get_pmi = function() { sum_cooc = sum(as.matrix(build_spm_cooc_sym(nlpembeds:::spm_to_df(spm_cooc)))) expect_equal(m_pmi[1, 1], log(16 * sum_cooc / ((16 + 7) ^ 2))) expect_equal(m_pmi[1, 2], log(7 * sum_cooc / ((16 + 7) * (12 + 7)))) expect_equal(m_pmi[2, 2], log(12 * sum_cooc / ((12 + 7) ^ 2))) expect_equal(colnames(m_pmi), paste0('C', 1:4)) expect_equal(colnames(m_pmi_rev), as.character(c(1, 2, 3, 10))) df_pmi = reshape2::melt(m_pmi_rev) df_pmi$Var1 %<>% gsub('3', 'C4', .) %>% gsub('2', 'C3', .) %>% gsub('10', 'C2', .) %>% gsub('1', 'C1', .) %>% factor df_pmi$Var2 %<>% gsub('3', 'C4', .) %>% gsub('2', 'C3', .) %>% gsub('10', 'C2', .) %>% gsub('1', 'C1', .) %>% factor df_pmi = df_pmi[order(df_pmi$Var2, df_pmi$Var1), ] rownames(df_pmi) = seq_len(nrow(df_pmi)) expect_equal(reshape2::melt(m_pmi), df_pmi) } test_that('get_pmi', test_get_pmi()) test_get_svd = function() { #m_svd = get_svd(m_pmi, 1) #expect_equal(round(m_svd[1, 1], 3), -0.466) m_data = scale(iris[1:4]) m_svd = get_svd(m_data, 2) } test_that('get_pmi', test_get_pmi())