# test_simulate.R # Created by: Sajal Kumar # # Modified by : Ruby Sharma # Date : February 27 2017 # # Modified by : Ruby Sharma # Date : December 2 2018 # Modified Update : Dr. Joe Song and Ruby Sharma # Date : May 26 2020 # : Added new test cases to test marginal # : distribution # # Modified Update : Ruby Sharma # Date : May 19 2023 # : Modified to examine pattern.tables instead of sampled tables # : for the maintenance of table type properties. # # # Modified Update : Ruby Sharma # Date : June 21 2023 # : Set seed before simulating discrete tables to test the reproducible tables # : # library(testthat) library(FunChisq) # context("Testing simulate_tables()") #Attributes to test #1) y = f(x) #2) Not a constant function - All populated samples are not supposed to be on the same column. Test_Functional_table = function(iter) { func.flag = FALSE for(i in seq(iter)) { set.seed(i) Get.Stats = Construct_Table("functional") conti.table = Get.Stats$conti.table noise.table = Get.Stats$noise.table pattern.table = Get.Stats$pattern.table # check if all samples are populated check.all.samples = All.sample.check(conti.table,Get.Stats$sample.size) if(check.all.samples$flag) { func.flag = TRUE failure.summary = check.all.samples$failure.table break } # Check for non zero cells check.non.zero = No.Non.Zero.Check(conti.table) if(check.non.zero$flag) { func.flag = TRUE failure.summary = check.non.zero$failure.table break } # Check y = f(x) check.functional = Functional.check(pattern.table) if(check.functional$flag) { func.flag = TRUE failure.summary = check.functional$failure.table break } # check constant functions check.constant = Constant.check(pattern.table) if(check.constant$flag) { func.flag = TRUE failure.summary = check.constant$failure.table break } # check.margin = margin.check(conti.table, noise.table) # if(check.margin$flag) # { # func.flag = TRUE # failure.summary = check.margin$failure.table # break # } } #if any table was flagged, the test failed. expect_identical(func.flag, FALSE) } #Attributes to test #1) y = f(x) #2) Not a constant function - All populated samples are not supposed to be on the same column. Test_Functional_Discontinuous_table = function(iter) { func.flag = FALSE for(i in seq(iter)) { set.seed(i) Get.Stats = Construct_Table("discontinuous") conti.table = Get.Stats$conti.table pattern.table = Get.Stats$pattern.table # check if all samples are populated check.all.samples = All.sample.check(conti.table,Get.Stats$sample.size) if(check.all.samples$flag) { func.flag = TRUE failure.summary = check.all.samples$failure.table break } # Check for non zero cells check.non.zero = No.Non.Zero.Check(conti.table) if(check.non.zero$flag) { func.flag = TRUE failure.summary = check.non.zero$failure.table break } # Check y = f(x) check.functional = Functional.check(pattern.table) if(check.functional$flag) { func.flag = TRUE failure.summary = check.functional$failure.table break } # check constant functions check.constant = Constant.check(pattern.table) if(check.functional$flag) { func.flag = TRUE failure.summary = check.constant$failure.table break } } #if any table was flagged, the test failed. expect_identical(func.flag, FALSE) } #Attributes to test #1) y = f(x) #2) x ! = f(y) #2) Not a constant function - All populated samples are not supposed to be on the same column. Test_Functional_Many_to_one_table = function(iter) { non.mono.func.flag = FALSE for(i in seq(iter)) { set.seed(i) Get.Stats = Construct_Table("many.to.one") conti.table = Get.Stats$conti.table pattern.table = Get.Stats$pattern.table # check if all samples are populated check.all.samples = All.sample.check(conti.table,Get.Stats$sample.size) if(check.all.samples$flag) { non.mono.func.flag = TRUE failure.summary = check.all.samples$failure.table break } # Check for non zero cells check.non.zero = No.Non.Zero.Check(conti.table) if(check.non.zero$flag) { non.mono.func.flag = TRUE failure.summary = check.non.zero$failure.table break } # Check y = f(x) check.functional = Functional.check(pattern.table) if(check.functional$flag) { non.mono.func.flag = TRUE failure.summary = check.functional$failure.table break } # check x != f(y) check.non.functional.invert = Non.functional.check(t(pattern.table)) if(check.non.functional.invert$flag) { non.mono.func.flag = TRUE failure.summary = conti.table break } # check constant functions check.constant = Constant.check(pattern.table) if(check.constant$flag) { non.mono.func.flag = TRUE failure.summary = check.constant$failure.table break } } #if any table was flagged, the test failed. expect_identical(non.mono.func.flag, FALSE) } #Attributes to test #1) y ! = f(x) Test_Non_Functional_table = function(iter) { non.func.flag = FALSE for(i in seq(iter)) { set.seed(i) Get.Stats = Construct_Table("dependent.non.functional") conti.table = Get.Stats$conti.table pattern.table = Get.Stats$pattern.table # check if all samples are populated check.all.samples = All.sample.check(conti.table,Get.Stats$sample.size) if(check.all.samples$flag) { non.func.flag = TRUE failure.summary = check.all.samples$failure.table break } # Check for non zero cells check.non.zero = No.Non.Zero.Check(conti.table) if(check.non.zero$flag) { non.func.flag = TRUE failure.summary = check.non.zero$failure.table break } # check y != f(x) check.non.functional = Non.functional.check(pattern.table) if(check.non.functional$flag) { non.func.flag = TRUE failure.summary = conti.table break } } #if any table was flagged, the test failed. expect_identical(non.func.flag, FALSE) } #Attributes to test #1) No non zero cell #2) No dependency #3) y != f(x) AND x != f(y) Test_Independent_table = function(iter) { ind.flag = FALSE for(i in seq(iter)) { set.seed(i) Get.Stats = Construct_Table("independent") conti.table = Get.Stats$conti.table # check if all samples are populated check.all.samples = All.sample.check(conti.table,Get.Stats$sample.size) if(check.all.samples$flag) { ind.flag = TRUE failure.summary = check.all.samples$failure.table break } } #if any table was flagged, the test failed. expect_identical(ind.flag, FALSE) } #Setup random parameters and construct a table using simulate.tables Construct_Table = function(type) { row_col.equality = sample(c(TRUE,FALSE),1) if(row_col.equality) { nrows = sample(c(2:15),1) ncols = nrows } else { nrows = sample(c(2:15),1) ncols = sample(c(2:15),1) } if(type=="many.to.one" && nrows == 2) nrows = 3 sample.size = ((sample(c(1:100),1) * sample(c(1:100),1))) + nrows row.marginal = rep(0,nrows) if(type=="independent" && sample.size < (nrows * ncols)) sample.size = nrows * ncols if(type=="dependent.non.functional" && sample.size < (nrows * ncols)) sample.size = nrows * ncols row.marginal.set = sample(c(TRUE,FALSE),1) if(row.marginal.set){ row.marginal = runif(nrows) row.marginal = row.marginal/sum(row.marginal) conti.table = simulate_tables(n=sample.size, nrow = nrows, ncol = ncols, type = type, n.tables = 1, row.marginal = row.marginal, col.marginal = NULL, noise = 0.2) } else { conti.table = simulate_tables(n=sample.size, nrow = nrows, ncol = ncols, type = type, n.tables = 1, noise =0.2) } list(conti.table = conti.table$sample.list[[1]], nrows = nrows, ncols = ncols, sample.size = sample.size, row.marginal = row.marginal, noise.table = conti.table$noise.list[[1]], pattern.table = conti.table$pattern.list[[1]]) } # The table should contain all the samples specified in sample size All.sample.check = function(conti.table, sample.size) { flag=FALSE failure.summary = NULL if(sum(conti.table)!=sample.size){ flag = TRUE failure.summary = conti.table } list(flag=flag,failure.table = failure.summary) } # If there are no non-zero elements in the table then something's wrong. No.Non.Zero.Check = function(conti.table) { index = which(conti.table!=0, arr.ind=TRUE) flag=FALSE failure.summary = NULL if(length(index)==0){ flag = TRUE failure.summary = conti.table } list(flag=flag, failure.table = failure.summary) } # If there are any zero element in the table then something's wrong. All.Non.Zero.Check = function(conti.table) { index = which(conti.table==0, arr.ind=TRUE) flag=FALSE failure.summary = NULL if(length(index)!=0){ flag = TRUE failure.summary = conti.table } list(flag=flag, failure.table = failure.summary) } # Testing y = f(x) #If there exists a row such that it contains samples for more than one y then the table is invalid Functional.check = function(conti.table) { index = which(conti.table!=0, arr.ind=TRUE) flag=FALSE failure.summary = NULL if(length(index[,1])!=length(unique(index[,1]))){ flag = TRUE failure.summary = conti.table } list(flag=flag, failure.table = failure.summary) } # Testing y != f(X) Non.functional.check = function(conti.table) { status = Functional.check(conti.table) flag=FALSE failure.summary = NULL if(!status$flag){ flag = TRUE failure.summary = conti.table } list(flag=flag, failure.table = failure.summary) } # If there exists a column (or y) such that it contains all the samples then the table is invalid Constant.check = function(conti.table) { index = which(conti.table!=0, arr.ind=TRUE) flag=FALSE failure.summary = NULL if(length(unique(index[,2]))==1){ flag=TRUE failure.summary = conti.table } list(flag=flag, failure.table = failure.summary) } #If there exists a zero cell after removing all zero rows and columns then the table is invalid Dependency.check = function(conti.table) { index.zero = which(conti.table==0, arr.ind=TRUE) index.non.zero = which(conti.table!=0, arr.ind=TRUE) flag=FALSE failure.summary = NULL if(length(index.zero) == 0) { flag = TRUE failure.summary = conti.table } else { index.zero.copy = index.zero for(j in 1:length(index.zero.copy[,1])) { if(length(which(index.zero[,1]==index.zero.copy[j,1]))==ncol(conti.table)){ index.zero = index.zero[-which(index.zero[,1]==index.zero.copy[j,1]),,drop=FALSE] } if(length(which(index.zero[,2]==index.zero.copy[j,2]))==nrow(conti.table)){ index.zero = index.zero[-which(index.zero[,2]==index.zero.copy[j,2]),,drop=FALSE] } } if(nrow(index.zero)==0){ flag = TRUE failure.summary = conti.table } } list(flag=flag, failure.table = failure.summary) } margin.check = function(conti.table, noise.table) { flag=FALSE failure.summary = NULL if(any(rowSums(conti.table)!= rowSums(noise.table))) { flag = TRUE failure.summary = noise.table } list(flag=flag, failure.table = failure.summary) } test_that("Testing simulate_tables()", { Test_Functional_table(2) Test_Independent_table(2) Test_Non_Functional_table(2) Test_Functional_Many_to_one_table(2) Test_Functional_Discontinuous_table(2) }) test_that("Testing marginals in simulate_tables()", { col.mar <- c(4, 2, 15) col.mar <- col.mar / sum(col.mar) row.mar <- c(15, 1, 1) row.mar <- row.mar / sum(row.mar) # Testing for row marginals for functional tables set.seed(23) tables <- simulate_tables( 100, type="fun", n.tables = 10, row.marginal=row.mar ) p.values <- vector("numeric", 10) for(i in 1:10) { p.values[i] <- chisq.test(rowSums(tables$sample.list[[i]]), p=row.mar)$p.value } expect_equal(sum(p.values > 0.05) > 5, TRUE) # Testing for column marginals, this would fail everytime # because of zero column, so only comparing probabities where the observed # colsum is non-zero. set.seed(24) tables <- simulate_tables( 100, type="fun", n.tables = 10, col.marginal = col.mar ) for(i in 1:10) { sumcols = colSums(tables$sample.list[[i]]) nonzero.sum = which(sumcols!=0) sumcols = sumcols[nonzero.sum] scaled.mar = col.mar[nonzero.sum]/sum(col.mar[nonzero.sum]) p.values[i] <- chisq.test(sumcols, p=scaled.mar)$p.value } expect_equal(sum(p.values > 0.05) > 5, TRUE) # Testing for indepdendent tables, both row and column row marginal are begin tested col.mar <- c(10, 20, 15) col.mar <- col.mar / sum(col.mar) row.mar <- c(15, 1, 30) row.mar <- row.mar / sum(row.mar) set.seed(25) tables <- simulate_tables( 100, type="in", n.tables = 10, row.marginal=row.mar, col.marginal = col.mar ) p.values <- vector("numeric", 10) for(i in 1:10) { p.values[i] <- suppressWarnings( chisq.test( rowSums(tables$sample.list[[i]]), p=row.mar )$p.value ) } expect_equal(sum(p.values > 0.05) > 5, TRUE) for(i in 1:10) { p.values[i] <- chisq.test(colSums(tables$sample.list[[i]]), p=col.mar)$p.value } expect_equal(sum(p.values > 0.05) > 5, TRUE) # Testing for many.to.one tables, row marginal test row.mar <- c(15, 1, 30) row.mar <- row.mar / sum(row.mar) col.mar <- c(20, 40, 30) col.mar <- col.mar / sum(col.mar) set.seed(26) tables <- simulate_tables( 100, type="many.to.one", n.tables = 10, row.marginal=row.mar ) p.values <- vector("numeric", 10) for(i in 1:10) { p.values[i] <- suppressWarnings( chisq.test( rowSums(tables$sample.list[[i]]), p=row.mar )$p.value ) } expect_equal(sum(p.values > 0.05) > 5, TRUE) # Testing for many.to.one tables, column marginal test set.seed(27) tables <- simulate_tables( 100, type="many.to.one", n.tables = 10, col.marginal = col.mar ) for(i in 1:10) { sumcols = colSums(tables$sample.list[[i]]) nonzero.sum = which(sumcols!=0) sumcols = sumcols[ nonzero.sum] scaled.mar = col.mar[nonzero.sum]/sum(col.mar[nonzero.sum]) p.values[i] <- chisq.test(sumcols, p=scaled.mar)$p.value } expect_equal(sum(p.values > 0.05) > 5, TRUE) # Testing for discontinuous tables, row marginal test row.mar <- c(10, 25, 30) row.mar <- row.mar / sum(row.mar) col.mar <- c(20, 40, 30) col.mar <- col.mar / sum(col.mar) set.seed(28) tables <- simulate_tables( 100, type="discontinuous", n.tables = 10, row.marginal=row.mar ) p.values <- vector("numeric", 10) for(i in 1:10) { p.values[i] <- chisq.test(rowSums(tables$sample.list[[i]]), p=row.mar)$p.value } expect_equal(sum(p.values > 0.05) > 5, TRUE) # Testing for discontinuous tables, column marginal test set.seed(29) tables <- simulate_tables( 100, type="discontinuous", n.tables = 10, col.marginal = col.mar ) for(i in 1:10) { sumcols = colSums(tables$sample.list[[i]]) nonzero.sum = which(sumcols!=0) sumcols = sumcols[ nonzero.sum] scaled.mar = col.mar[nonzero.sum]/sum(col.mar[ nonzero.sum]) p.values[i] <- chisq.test(sumcols, p=scaled.mar)$p.value } expect_equal(sum(p.values > 0.05) > 5, TRUE) # Testing for dependent.non.functional tables, row marginal test row.mar <- c(10, 25, 30) row.mar <- row.mar / sum(row.mar) col.mar <- c(20, 40, 30) col.mar <- col.mar / sum(col.mar) set.seed(30) tables <- simulate_tables( 100, type="dep", n.tables = 10, row.marginal=row.mar ) p.values <- vector("numeric", 10) for(i in 1:10) { p.values[i] <- chisq.test(rowSums(tables$sample.list[[i]]), p=row.mar)$p.value } expect_equal(sum(p.values > 0.05) > 5, TRUE) })