# Number of pair of categories to test n_category_pair <- 3 # Number of samples of the condition to test n_sample_conditions <- 2 # Probability of a variable being in the condition prob_in_condition <- 0.5 #' Test the function `poLCA.table()` given a fitted model #' #' Test the function `poLCA.table()` given a fitted model (can be non-regression #' or regression model). #' #' This test will test all possible one-way and two-way tables by cycling #' through the column names. In addition, it will test a sample of conditions, #' randomly selected, to pass to the function, one of which is empty. #' #' The test compares the results with the original poLCA. The original code can #' produce NaN of Inf, these should be ignored as the poLCAParallel #' implementation should be more robust #' #' @param columns Vector of strings, names of the columns of the responses #' @param n_outcomes Vector of integers, number of outcomes for each category #' @param lc A model object estimated using the `poLCA` function (or a list #' which mocks it) test_table_given_model <- function(columns, n_outcomes, lc) { # sample pairs of categories to test # the first pair is [1, 1] to ensure a one way relationship is tested at least category_pairs <- matrix( sample( seq_len(length(n_outcomes)), 2 * (n_category_pair - 1), TRUE ), 2, n_category_pair - 1 ) category_pairs <- cbind(c(1, 1), category_pairs) # for each sampled pair for (i_pair in seq_len(n_category_pair)) { i_category <- category_pairs[1, i_pair] j_category <- category_pairs[2, i_pair] if (i_category == j_category) { # one way formula_ <- formula(paste0(columns[i_category], "~1")) } else { # two way formula_ <- formula( paste0(columns[i_category], "~", columns[j_category]) ) } for (i in seq_len(n_sample_conditions)) { condition <- list() # for the first iteration, the condition is empty if (i != 0) { # randomly sample a condition for (k_category in seq_len(length(n_outcomes))) { if (k_category != i_category && k_category != j_category) { if (stats::runif(1) < prob_in_condition) { condition[[columns[k_category]]] <- sample(seq_len(n_outcomes[k_category]), 1) } } } } # test function here table_polca <- poLCA::poLCA.table(formula_, condition, lc) table_polcaparallel <- poLCAParallel::poLCA.table( formula_, condition, lc ) # original poLCA::poLCA.table() can produce NaN or Inf, ignore them is_finite_index <- is.finite(table_polca) expect_equal( table_polcaparallel[is_finite_index], table_polca[is_finite_index] ) # test if all values are finite expect_identical(all(is.finite(table_polcaparallel)), TRUE) } } } #' Test the function `poLCA.table()`for the non-regression problem #' #' Test the function `poLCA.table()` for the non-regression problem. The model #' is fitted onto simulated data and then passed to the function. The test #' compares the results with the original poLCA code #' #' See test_table_given_model() for further details #' #' @param n_data Number of data points #' @param n_outcomes Vector of integers, number of outcomes for each category #' @param n_cluster Number of clusters fitted #' @param n_rep Number of different initial values to try #' @param na_rm Logical, if to remove NA responses #' @param n_thread Number of threads to use #' @param maxiter Number of iterations used in the EM algorithm #' @param tol Tolerance used in the EM algorithm #' @param prob_na_train Probability of missing data in the training data #' @param n_data_test Number of data points in the unseen test data #' @param prob_na_test Probability of missing data in the unseen test data test_non_regress_table <- function(n_data, n_outcomes, n_cluster, n_rep, na_rm, n_thread, maxiter, tol, prob_na_train, n_data_test, prob_na_test) { responses <- as.data.frame( random_response(n_data, n_outcomes, prob_na_train, NaN) ) formula_ <- get_non_regression_formula(responses) lc <- poLCAParallel::poLCA(formula_, responses, n_cluster, maxiter = maxiter, tol = tol, na.rm = na_rm, nrep = n_rep, verbose = FALSE, n.thread = n_thread ) test_table_given_model(colnames(responses), n_outcomes, lc) } #' Test the function `poLCA.table()` for the non-regression problem #' #' Test the function `poLCA.table()` for the non-regression problem. The model #' is fitted onto simulated data and then passed to the function. The test #' compares the results with the original poLCA code #' #' See test_table_given_model() for further details #' #' @param n_data Number of data points #' @param n_feature Number of features #' @param n_outcomes Vector of integers, number of outcomes for each category #' @param n_cluster Number of clusters fitted #' @param n_rep Number of different initial values to try #' @param na_rm Logical, if to remove NA responses #' @param n_thread Number of threads to use #' @param maxiter Number of iterations used in the EM algorithm #' @param tol Tolerance used in the EM algorithm #' @param prob_na_train Probability of missing data in the training data #' @param n_data_test Number of data points in the unseen test data #' @param prob_na_test Probability of missing data in the unseen test data test_regress_table <- function(n_data, n_feature, n_outcomes, n_cluster, n_rep, na_rm, n_thread, maxiter, tol, prob_na_train, n_data_test, prob_na_test) { features <- random_features(n_data, n_feature) responses <- random_response(n_data, n_outcomes, prob_na_train, NaN) formula_ <- get_regression_formula(responses, features) data <- cbind(responses, features) model <- poLCAParallel::poLCA(formula_, data, n_cluster, maxiter = maxiter, tol = tol, na.rm = na_rm, nrep = n_rep, verbose = FALSE, n.thread = n_thread ) test_table_given_model(colnames(responses), n_outcomes, model) } test_that("non-regression-full-data", { # test using na_rm = TRUE and FALSE set.seed(-507817496) seeds <- sample.int(.Machine$integer.max, N_REPEAT) for (i in seq_len(N_REPEAT)) { set.seed(seeds[i]) expect_no_error(test_non_regress_table( 100, c(2, 3, 5, 2, 2), 3, 4, TRUE, N_THREAD, DEFAULT_MAXITER, DEFAULT_TOL, 0, 50, 0.01 )) } set.seed(-2093133234) seeds <- sample.int(.Machine$integer.max, N_REPEAT) for (i in seq_len(N_REPEAT)) { set.seed(seeds[i]) expect_no_error(test_non_regress_table( 100, c(2, 3, 5, 2, 2), 3, 4, FALSE, N_THREAD, DEFAULT_MAXITER, DEFAULT_TOL, 0, 50, 0.01 )) } }) test_that("non-regression-missing-data", { # test using na_rm = TRUE and FALSE set.seed(1354513976) seeds <- sample.int(.Machine$integer.max, N_REPEAT) for (i in seq_len(N_REPEAT)) { set.seed(seeds[i]) expect_no_error(test_non_regress_table( 100, c(2, 3, 5, 2, 2), 3, 4, TRUE, N_THREAD, DEFAULT_MAXITER, DEFAULT_TOL, 0.1, 50, 0.01 )) } set.seed(-647551612) seeds <- sample.int(.Machine$integer.max, N_REPEAT) for (i in seq_len(N_REPEAT)) { set.seed(seeds[i]) expect_no_error(test_non_regress_table( 100, c(2, 3, 5, 2, 2), 3, 4, FALSE, N_THREAD, DEFAULT_MAXITER, DEFAULT_TOL, 0.1, 50, 0.01 )) } }) test_that("regression-full-data", { # test using na_rm = TRUE and FALSE set.seed(24029611) seeds <- sample.int(.Machine$integer.max, N_REPEAT) for (i in seq_len(N_REPEAT)) { set.seed(seeds[i]) expect_no_error(test_regress_table( 100, 4, c(2, 3, 5, 2, 2), 3, 4, TRUE, N_THREAD, DEFAULT_MAXITER, DEFAULT_TOL, 0, 50, 0.01 )) } set.seed(-1281069548) seeds <- sample.int(.Machine$integer.max, N_REPEAT) for (i in seq_len(N_REPEAT)) { set.seed(seeds[i]) expect_no_error(test_regress_table( 100, 4, c(2, 3, 5, 2, 2), 3, 4, FALSE, N_THREAD, DEFAULT_MAXITER, DEFAULT_TOL, 0, 50, 0.01 )) } }) test_that("regression-missing-data", { # test using na_rm = TRUE and FALSE set.seed(-749216122) seeds <- sample.int(.Machine$integer.max, N_REPEAT) for (i in seq_len(N_REPEAT)) { set.seed(seeds[i]) expect_no_error(test_regress_table( 100, 4, c(2, 3, 5, 2, 2), 3, 4, TRUE, N_THREAD, DEFAULT_MAXITER, DEFAULT_TOL, 0.1, 50, 0.01 )) } set.seed(-1284213522) seeds <- sample.int(.Machine$integer.max, N_REPEAT) for (i in seq_len(N_REPEAT)) { set.seed(seeds[i]) expect_no_error(test_regress_table( 100, 4, c(2, 3, 5, 2, 2), 3, 4, FALSE, N_THREAD, DEFAULT_MAXITER, DEFAULT_TOL, 0.1, 50, 0.01 )) } })