R version 4.4.1 (2024-06-14) -- "Race for Your Life" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. [Previously saved workspace restored] > > ######## test.haplo.scan.q: test the haplo.scan function ########## > # Jason Sinnwell 3/2005 > # > # test haplo.scan under various settings > # sink all of the results into a file > # check them against known checked results by a diff command (unix) > # > ##################################################################### > > ## package: haplo.stats > ## test script: haplo.scan > > verbose=TRUE > Sys.setlocale("LC_ALL", "C") [1] "LC_CTYPE=C;LC_NUMERIC=C;LC_TIME=C;LC_COLLATE=C;LC_MONETARY=C;LC_MESSAGES=en_US.UTF-8;LC_PAPER=en_US.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=en_US.UTF-8;LC_IDENTIFICATION=C" > Sys.getlocale() [1] "LC_CTYPE=C;LC_NUMERIC=C;LC_TIME=C;LC_COLLATE=C;LC_MONETARY=C;LC_MESSAGES=en_US.UTF-8;LC_PAPER=en_US.UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=en_US.UTF-8;LC_IDENTIFICATION=C" > > require(haplo.stats) Loading required package: haplo.stats Loading required package: arsenal > > > # test different options for haplo.scan, > # sink all of the results into a file > # check them against known checked results by a diff command (unix) > > # use check.haplo.scan.s for splus and check.haplo.scan.r for r versions > > > #if(update) sink(goldfile) else sink("sink.haplo.scan.out") > > seed <- c(45, 16, 22, 24, 15, 3, 11, 47, 24, 40, 18, 0) > > set.seed(seed) > runif(10) [1] 0.63337281 0.31753665 0.24092185 0.37841413 0.35214430 0.29775855 [7] 0.22781938 0.55484192 0.18456417 0.00528478 > > > cat("\n Using a random numeric dataset, 10 loci, width=4 sim=100 \n\n") Using a random numeric dataset, 10 loci, width=4 sim=100 > ## regular case, default parameters for 10-locus dataset > set.seed(seed) > tmp <- ifelse(runif(2000)>.3, 1, 2) > geno <- matrix(tmp, ncol=20) > y <- rep(c(0,1),c(50,50)) > ## show verbose on simulations. > > set.seed(seed) > scan1 <- haplo.scan(y = y, geno = geno, miss.val=c(0,NA), width=4, + em.control=haplo.em.control(loci.insert.order = NULL, + insert.batch.size = 6, min.posterior = + 1e-07, tol = 1e-05, max.iter = 5000, + random.start = 0, n.try = 10, iseed = + NULL, max.haps.limit = 2000000, verbose=0), + sim.control=score.sim.control(p.threshold = 0.25, min.sim = 50, + max.sim = 1000, verbose = FALSE)) > > print.haplo.scan(scan1,digits=5) Call: haplo.scan(y = y, geno = geno, width = 4, miss.val = c(0, NA), em.control = haplo.em.control(loci.insert.order = NULL, insert.batch.size = 6, min.posterior = 1e-07, tol = 1e-05, max.iter = 5000, random.start = 0, n.try = 10, iseed = NULL, max.haps.limit = 2e+06, verbose = 0), sim.control = score.sim.control(p.threshold = 0.25, min.sim = 50, max.sim = 1000, verbose = FALSE)) ================================================================================ Locus Scan-statistic Simulated P-values ================================================================================ loc-1 loc-2 loc-3 loc-4 loc-5 loc-6 loc-7 loc-8 loc-9 loc-10 sim.p-val 0.74 0.94 0.9 0.74 0.72 0.7 0.66 0.92 0.8 0.58 Loci with max scan statistic: 4 5 6 7 Max-Stat Simulated Global p-value: 0.86 Number of Simulations: 50 > > cat("\n Using an all character dataset, 7 loci, width=3 \n\n") Using an all character dataset, 7 loci, width=3 > > ## a character allele dataset > geno.char <- matrix(c('a','a','b','b','c','C','d','d','F','f','g','G','h1','h1', + 'a','A','B','B','c','c','d','d','F','F','g','G','H','h1', + 'a','a','b','b','c','c','d','d','F','f','g','G','h2','h2', + 'a','a','B','B','C','c','d','d','f','f','g','G','h2','h1', + 'a','A','B','B','c','c','d','d','F','F','g','G','H','h2', + 'a','a','b','B','C','C','D','d','f','f','G','G','h1','h2', + 'a','a','B','B','c','c','d','d','F','f','G','g','h2','h2', + 'a','a','z','z','c','c','d','d','F','F','g','G','h1','z', + 'a','A','B','B','c','z','z','z','F','f','z','z','h1','h1', + 'a','a','B','B','c','c','d','d','F','f','G','g','h1','h2'), + nrow=10,byrow=T) > > y.char<- c(0,1,0,0,1,0,0,1,0,0) > > set.seed(seed) > ## use all default parameters except for miss.val > scan.char <- haplo.scan(y=y.char, geno=geno.char, miss.val="z", width=3, + em.control=haplo.em.control(loci.insert.order = NULL, + insert.batch.size = 6, min.posterior = + 1e-07, tol = 1e-05, max.iter = 5000, + random.start = 0, n.try = 10, + max.haps.limit = 2000000., verbose = 0), + sim.control=score.sim.control(p.threshold = 0.25, + min.sim = 200, max.sim = 1000., verbose = FALSE)) > print.haplo.scan(scan.char, digits=5) Call: haplo.scan(y = y.char, geno = geno.char, width = 3, miss.val = "z", em.control = haplo.em.control(loci.insert.order = NULL, insert.batch.size = 6, min.posterior = 1e-07, tol = 1e-05, max.iter = 5000, random.start = 0, n.try = 10, max.haps.limit = 2e+06, verbose = 0), sim.control = score.sim.control(p.threshold = 0.25, min.sim = 200, max.sim = 1000, verbose = FALSE)) ================================================================================ Locus Scan-statistic Simulated P-values ================================================================================ loc-1 loc-2 loc-3 loc-4 loc-5 loc-6 loc-7 sim.p-val 0.66 0.825 0.515 0.5 0.4 0.345 0.335 Loci with max scan statistic: 5 6 7 Max-Stat Simulated Global p-value: 0.525 Number of Simulations: 200 > > cat("\n Using hla dataset, 8 loci, sim=10, width=3 \n\n") Using hla dataset, 8 loci, sim=10, width=3 > ## use the hla dataset, just the first 8 markers. slide=3, sim=10 > > data(hla.demo) > > geno.11 <- hla.demo[,-c(1:4)] > y.bin <- 1*(hla.demo$resp.cat=="low") > hla.summary <- summaryGeno(geno.11[,1:16], miss.val=c(0,NA)) > > ## track those subjects with too many possible haplotype pairs ( > 10,000) > many.haps <- (1:length(y.bin))[hla.summary[,4]>10000] > > length(many.haps) [1] 6 > many.haps [1] 11 33 124 137 167 181 > > ## For speed, or even just so it will finish, make y.bin and geno.scan > ## for genotypes that don't have too many ambigous haplotypes > geno.scan <- geno.11[-many.haps,] > y.scan <- y.bin[-many.haps] > > set.seed(seed) > > runif(10) [1] 0.63337281 0.31753665 0.24092185 0.37841413 0.35214430 0.29775855 [7] 0.22781938 0.55484192 0.18456417 0.00528478 > > set.seed(seed) > scan.hla <- haplo.scan(y.scan, geno.scan, width=3, + em.control=haplo.em.control(loci.insert.order = NULL, + insert.batch.size = 3, min.posterior = + 1e-07, tol = 1e-05, max.iter = 5000, + random.start = 0, n.try = 10, + max.haps.limit = 2000000, verbose = 0), + sim.control=score.sim.control(p.threshold = 0.25, + min.sim = 10, max.sim = 10, verbose = TRUE)) h.count: 0.94118 nsim: 1 glob.rej: 1 loc.rej: 1 1 1 1 1 1 0 1 1 1 1 h.count: 1.77778 nsim: 2 glob.rej: 1 loc.rej: 1 1 1 1 1 1 0 1 1 1 1 h.count: 2.52632 nsim: 3 glob.rej: 1 loc.rej: 1 1 1 1 1 1 0 1 1 1 1 h.count: 3.2 nsim: 4 glob.rej: 1 loc.rej: 1 1 1 1 1 1 0 1 1 1 1 h.count: 3.80952 nsim: 5 glob.rej: 1 loc.rej: 1 1 1 1 1 1 0 1 1 1 1 h.count: 4.36364 nsim: 6 glob.rej: 1 loc.rej: 1 1 1 1 1 1 0 1 1 1 1 h.count: 4.86957 nsim: 7 glob.rej: 1 loc.rej: 1 1 1 1 1 1 0 1 1 1 1 h.count: 5.33333 nsim: 8 glob.rej: 1 loc.rej: 1 1 1 1 1 1 0 1 1 1 1 h.count: 5.76 nsim: 9 glob.rej: 2 loc.rej: 2 2 2 2 2 2 1 2 2 2 2 h.count: 6.15385 nsim: 10 glob.rej: 2 loc.rej: 2 2 2 2 2 2 1 2 2 2 2 > > print.haplo.scan(scan.hla, digits=5) Call: haplo.scan(y = y.scan, geno = geno.scan, width = 3, em.control = haplo.em.control(loci.insert.order = NULL, insert.batch.size = 3, min.posterior = 1e-07, tol = 1e-05, max.iter = 5000, random.start = 0, n.try = 10, max.haps.limit = 2e+06, verbose = 0), sim.control = score.sim.control(p.threshold = 0.25, min.sim = 10, max.sim = 10, verbose = TRUE)) ================================================================================ Locus Scan-statistic Simulated P-values ================================================================================ loc-1 loc-2 loc-3 loc-4 loc-5 loc-6 loc-7 loc-8 loc-9 loc-10 loc-11 sim.p-val 0.2 0.2 0.2 0.2 0.2 0.2 0.1 0.2 0.2 0.2 0.2 Loci with max scan statistic: 2 Max-Stat Simulated Global p-value: 0.2 Number of Simulations: 10 > > > > > > > > proc.time() user system elapsed 20.536 0.267 21.222