R version 4.4.1 (2024-06-14) -- "Race for Your Life" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > ## package: haplo.stats > ## test script: haplo.em > > ## settings > verbose=TRUE > > require(haplo.stats) Loading required package: haplo.stats Loading required package: arsenal > packageVersion("haplo.stats") [1] ‘1.9.6’ > options(stringsAsFactors=FALSE) > tmp <- Sys.setlocale("LC_COLLATE", "C") > tmp <- Sys.getlocale("LC_COLLATE") > > if(verbose) cat("prepare two datasets, one with char alleles, the other 3 loci from hla data\n") prepare two datasets, one with char alleles, the other 3 loci from hla data > > # make ficticious data set with an intention of some trend in > # haplotypes having H-allele at locus-H with F-allele at locus-F > geno.char <- matrix(c('F','f','g','G','h1','h1', + 'F','F','g','G','H','h1', + 'F','f','g','G','h2','h2', + 'f','f','g','G','h2','h1', + 'F','F','g','G','H','h2', + 'f','f','G','G','h1','h2', + 'F','f','G','g','h2','h2', + 'F','F','g','G','h1','z', + 'F','f','z','z','h1','h1', + 'F','f','G','g','h1','h2', + 'F','f','G','G','h1','h2', + 'F','F','g','G','h1','z', + 'F','f','z','z','h1','h1', + 'f','f','G','g','h1','h2'), nrow=14,byrow=T) > char.label <- c("F","G","H") > > data(hla.demo) > > hla.sub <- hla.demo[,c(1,2,3,4,17,18,21:24)] > geno.hla <- hla.sub[,-c(1:4)] > hla.label=c("DQB","DRB","HLA.B") > > seed <- c(33, 10, 39, 6, 16, 0, 40, 24, 12, 60, 7, 1) > > if(verbose) cat("character alleles\n") character alleles > > geno.char.recode <- setupGeno(geno.char, miss.val="z") > > geno.char.recode loc-1.a1 loc-1.a2 loc-2.a1 loc-2.a2 loc-3.a1 loc-3.a2 [1,] 1 2 2 1 2 2 [2,] 1 1 2 1 1 2 [3,] 1 2 2 1 3 3 [4,] 2 2 2 1 3 2 [5,] 1 1 2 1 1 3 [6,] 2 2 1 1 2 3 [7,] 1 2 1 2 3 3 [8,] 1 1 2 1 2 NA [9,] 1 2 NA NA 2 2 [10,] 1 2 1 2 2 3 [11,] 1 2 1 1 2 3 [12,] 1 1 2 1 2 NA [13,] 1 2 NA NA 2 2 [14,] 2 2 1 2 2 3 attr(,"class") [1] "model.matrix" attr(,"unique.alleles") attr(,"unique.alleles")[[1]] [1] "F" "f" attr(,"unique.alleles")[[2]] [1] "G" "g" attr(,"unique.alleles")[[3]] [1] "H" "h1" "h2" > > set.seed(seed) > em.char <- haplo.em(geno.char, miss.val='z',locus.label=char.label, + control = haplo.em.control()) > > print.haplo.em(em.char, digits=3) ================================================================================ Haplotypes ================================================================================ F G H hap.freq 1 F G H 0.089 2 F G h1 0.143 3 F G h2 0.000 4 F g H 0.000 5 F g h1 0.163 6 F g h2 0.140 7 f G h1 0.087 8 f G h2 0.242 9 f g h1 0.135 10 f g h2 0.000 ================================================================================ Details ================================================================================ lnlike = -37.162 lr stat for no LD = 10.987 , df = 2 , p-val = 0.004 > summary(em.char, digits=2, show.haplo=TRUE) ================================================================================ Subjects: Haplotype Codes and Posterior Probabilities ================================================================================ subj.id hap1.F hap1.G hap1.H hap2.F hap2.G hap2.H posterior 1 1 F G h1 f g h1 0.58 2 1 F g h1 f G h1 0.42 3 2 F G H F g h1 1.00 4 2 F G h1 F g H 0.00 5 3 F g h2 f G h2 1.00 6 4 f G h2 f g h1 1.00 7 4 f G h1 f g h2 0.00 8 5 F G H F g h2 1.00 9 6 f G h1 f G h2 1.00 10 7 F g h2 f G h2 1.00 11 8 F G h1 F g h1 0.40 12 8 F G h1 F g h2 0.35 13 8 F G H F g h1 0.25 14 8 F G h1 F g H 0.00 15 8 F G h2 F g h1 0.00 16 9 F g h1 f g h1 0.32 17 9 F G h1 f g h1 0.28 18 9 F g h1 f G h1 0.21 19 9 F G h1 f G h1 0.18 20 10 F g h1 f G h2 0.76 21 10 F g h2 f G h1 0.24 22 10 F G h1 f g h2 0.00 23 10 F G h2 f g h1 0.00 24 11 F G h1 f G h2 1.00 25 11 F G h2 f G h1 0.00 26 12 F G h1 F g h1 0.40 27 12 F G h1 F g h2 0.35 28 12 F G H F g h1 0.25 29 12 F G h1 F g H 0.00 30 12 F G h2 F g h1 0.00 31 13 F g h1 f g h1 0.32 32 13 F G h1 f g h1 0.28 33 13 F g h1 f G h1 0.21 34 13 F G h1 f G h1 0.18 35 14 f G h2 f g h1 1.00 36 14 f G h1 f g h2 0.00 ================================================================================ Number of haplotype pairs: max vs used ================================================================================ x 1 2 4 5 1 1 0 0 0 2 3 5 0 0 4 0 0 3 0 5 0 0 0 2 > > summary.haplo.em(em.char, digits=2) ================================================================================ Subjects: Haplotype Codes and Posterior Probabilities ================================================================================ subj.id hap1 hap2 posterior 1 1 2 9 0.58 2 1 5 7 0.42 3 2 1 5 1.00 4 2 2 4 0.00 5 3 6 8 1.00 6 4 8 9 1.00 7 4 7 10 0.00 8 5 1 6 1.00 9 6 7 8 1.00 10 7 6 8 1.00 11 8 2 5 0.40 12 8 2 6 0.35 13 8 1 5 0.25 14 8 2 4 0.00 15 8 3 5 0.00 16 9 5 9 0.32 17 9 2 9 0.28 18 9 5 7 0.21 19 9 2 7 0.18 20 10 5 8 0.76 21 10 6 7 0.24 22 10 2 10 0.00 23 10 3 9 0.00 24 11 2 8 1.00 25 11 3 7 0.00 26 12 2 5 0.40 27 12 2 6 0.35 28 12 1 5 0.25 29 12 2 4 0.00 30 12 3 5 0.00 31 13 5 9 0.32 32 13 2 9 0.28 33 13 5 7 0.21 34 13 2 7 0.18 35 14 8 9 1.00 36 14 7 10 0.00 ================================================================================ Number of haplotype pairs: max vs used ================================================================================ x 1 2 4 5 1 1 0 0 0 2 3 5 0 0 4 0 0 3 0 5 0 0 0 2 > > > if(verbose) cat("hla data, 3 loci\n") hla data, 3 loci > set.seed(seed) > em.hla3 <- haplo.em(geno.hla, locus.label=hla.label, miss.val=0, + control = haplo.em.control()) > > print.haplo.em(em.hla3, digits=3) ================================================================================ Haplotypes ================================================================================ DQB DRB HLA.B hap.freq 1 21 1 8 0.002 2 21 2 7 0.002 3 21 2 18 0.002 4 21 3 8 0.104 5 21 3 18 0.002 6 21 3 35 0.006 7 21 3 44 0.004 8 21 3 45 0.002 9 21 3 49 0.002 10 21 3 57 0.002 11 21 3 70 0.002 12 21 4 62 0.005 13 21 7 7 0.014 14 21 7 13 0.011 15 21 7 18 0.005 16 21 7 35 0.002 17 21 7 44 0.021 18 21 7 50 0.005 19 21 7 57 0.002 20 21 7 62 0.008 21 21 8 18 0.002 22 21 8 63 0.002 23 21 9 51 0.002 24 21 10 8 0.002 25 31 1 27 0.005 26 31 2 44 0.002 27 31 2 57 0.002 28 31 3 8 0.005 29 31 4 13 0.005 30 31 4 14 0.002 31 31 4 27 0.005 32 31 4 35 0.005 33 31 4 41 0.002 34 31 4 44 0.028 35 31 4 60 0.007 36 31 7 44 0.005 37 31 7 61 0.002 38 31 8 35 0.002 39 31 8 39 0.002 40 31 8 45 0.002 41 31 8 52 0.002 42 31 8 60 0.005 43 31 11 7 0.002 44 31 11 18 0.005 45 31 11 27 0.006 46 31 11 35 0.018 47 31 11 37 0.007 48 31 11 38 0.007 49 31 11 44 0.010 50 31 11 49 0.005 51 31 11 51 0.011 52 31 11 56 0.002 53 31 11 60 0.002 54 31 11 61 0.005 55 31 11 62 0.006 56 31 13 8 0.005 57 31 13 14 0.002 58 31 13 41 0.005 59 31 13 57 0.002 60 31 14 58 0.002 61 31 14 63 0.002 62 32 1 7 0.002 63 32 1 35 0.002 64 32 1 62 0.000 65 32 2 44 0.003 66 32 3 35 0.002 67 32 3 51 0.002 68 32 4 7 0.020 69 32 4 8 0.005 70 32 4 14 0.005 71 32 4 35 0.000 72 32 4 44 0.002 73 32 4 45 0.002 74 32 4 51 0.002 75 32 4 55 0.002 76 32 4 60 0.031 77 32 4 62 0.023 78 32 7 39 0.005 79 32 7 57 0.005 80 32 8 7 0.007 81 32 9 51 0.002 82 32 10 39 0.002 83 33 7 7 0.005 84 33 7 57 0.007 85 33 9 8 0.002 86 33 9 46 0.002 87 33 9 48 0.002 88 33 9 60 0.007 89 33 10 51 0.002 90 42 4 35 0.002 91 42 8 18 0.002 92 42 8 55 0.002 93 42 8 60 0.002 94 51 1 8 0.005 95 51 1 14 0.002 96 51 1 18 0.002 97 51 1 27 0.014 98 51 1 35 0.030 99 51 1 39 0.002 100 51 1 44 0.018 101 51 1 47 0.002 102 51 1 48 0.002 103 51 1 51 0.008 104 51 1 58 0.002 105 51 1 60 0.002 106 51 2 51 0.002 107 51 4 27 0.002 108 51 8 7 0.005 109 51 10 7 0.003 110 51 10 14 0.002 111 51 10 18 0.002 112 51 10 35 0.002 113 51 10 37 0.005 114 51 14 56 0.002 115 52 2 27 0.002 116 52 2 42 0.002 117 52 2 56 0.005 118 52 2 62 0.005 119 52 4 39 0.002 120 52 8 60 0.002 121 53 8 57 0.002 122 53 9 51 0.002 123 53 14 35 0.002 124 53 14 38 0.005 125 53 14 44 0.002 126 53 14 51 0.002 127 53 14 55 0.005 128 61 2 44 0.002 129 61 2 60 0.002 130 61 2 61 0.005 131 61 2 62 0.002 132 61 4 52 0.002 133 61 8 44 0.002 134 61 13 39 0.002 135 61 13 44 0.002 136 62 2 7 0.049 137 62 2 8 0.005 138 62 2 18 0.015 139 62 2 27 0.005 140 62 2 35 0.011 141 62 2 44 0.014 142 62 2 60 0.005 143 62 4 7 0.002 144 62 4 45 0.003 145 62 7 57 0.002 146 62 8 35 0.002 147 62 8 37 0.002 148 62 10 51 0.002 149 62 11 55 0.002 150 62 13 51 0.002 151 62 14 51 0.002 152 63 2 7 0.014 153 63 2 44 0.002 154 63 4 48 0.002 155 63 8 18 0.002 156 63 8 58 0.002 157 63 10 44 0.002 158 63 13 7 0.013 159 63 13 35 0.003 160 63 13 38 0.007 161 63 13 44 0.017 162 63 13 55 0.005 163 63 13 60 0.006 164 63 13 62 0.009 165 64 2 51 0.002 166 64 11 38 0.002 167 64 13 7 0.010 168 64 13 14 0.005 169 64 13 35 0.007 170 64 13 44 0.002 171 64 13 51 0.002 172 64 13 57 0.002 173 64 13 60 0.004 174 64 13 61 0.002 175 64 13 63 0.007 ================================================================================ Details ================================================================================ lnlike = -1846.904 lr stat for no LD = 585.113 , df = 124 , p-val = 0 > > if(verbose) cat("snap SNP data, unphased\n") snap SNP data, unphased > snapDF <- read.table("snapData.csv",header=TRUE, sep=",", stringsAsFactors=FALSE) > > geno.snap <- setupGeno(geno=snapDF[,-1]) > set.seed(seed) > em.snap <- haplo.em(geno=geno.snap) > > print(em.snap, digits=3) ================================================================================ Haplotypes ================================================================================ loc-1 loc-2 loc-3 loc-4 loc-5 loc-6 loc-7 hap.freq 1 1 1 1 1 1 1 1 0.052 2 1 1 1 1 1 1 2 0.105 3 1 1 1 1 2 1 1 0.051 4 1 1 1 1 2 2 1 0.049 5 1 1 1 1 2 2 2 0.085 6 1 1 1 2 1 1 2 0.063 7 1 1 1 2 2 2 2 0.016 8 1 1 2 1 1 1 1 0.014 9 1 1 2 1 1 1 2 0.034 10 1 1 2 1 2 1 1 0.022 11 1 1 2 1 2 2 1 0.027 12 1 1 2 1 2 2 2 0.016 13 1 1 2 2 1 1 2 0.021 14 1 1 2 2 2 2 2 0.017 15 1 2 1 1 1 1 1 0.009 16 1 2 1 1 1 1 2 0.014 17 1 2 1 1 2 1 1 0.015 18 1 2 1 1 2 2 2 0.000 19 1 2 1 2 1 1 2 0.014 20 1 2 1 2 2 2 2 0.004 21 1 2 2 1 1 1 1 0.021 22 1 2 2 1 1 1 2 0.026 23 1 2 2 1 2 1 1 0.032 24 1 2 2 1 2 2 1 0.016 25 1 2 2 1 2 2 2 0.000 26 1 2 2 2 1 1 2 0.023 27 1 2 2 2 2 2 2 0.015 28 2 1 1 1 1 1 1 0.005 29 2 1 1 1 1 1 2 0.019 30 2 1 1 1 2 1 1 0.032 31 2 1 1 1 2 2 2 0.014 32 2 1 1 2 1 1 2 0.034 33 2 1 1 2 2 2 2 0.008 34 2 2 1 1 1 1 1 0.016 35 2 2 1 1 1 1 2 0.029 36 2 2 1 1 2 1 1 0.025 37 2 2 1 1 2 2 1 0.017 38 2 2 1 1 2 2 2 0.011 39 2 2 1 2 2 2 2 0.028 ================================================================================ Details ================================================================================ lnlike = -1210.329 lr stat for no LD = 139.722 , df = 30 , p-val = 0 > > > if(verbose) cat("Check Phase against SNaP data, phased\n") Check Phase against SNaP data, phased > snapfile <- "snap.sim.phased.dat" > source("snapFUN.s") > set.seed(seed) > block1.phase <- checkPhase(snapfile, blocknum=1) > set.seed(seed) > block2.phase <- checkPhase(snapfile, blocknum=2) > > > print(block1.phase[[2]], digits=4) haplotype em.freq snap.freq 1 11112211 5.516e-02 0.0545 2 11121112 3.348e-01 0.3355 3 11122122 1.565e-01 0.1565 4 12212211 1.553e-01 0.1560 5 12221112 9.816e-02 0.0975 6 21112212 1.120e-01 0.1120 7 21221112 8.800e-02 0.0880 8 22212212 1.644e-09 0.0000 > print(block1.phase[[1]][1:50,],digits=4) # long output 1120 lines subj emhap1 emhap2 post snaphap1 snaphap2 1 1 11121112 21221112 1.000e+00 21221112 11121112 2 2 11122122 12212211 1.000e+00 12212211 11122122 3 3 11121112 11121112 1.000e+00 11121112 11121112 4 4 12212211 12212211 1.000e+00 12212211 12212211 5 5 11121112 12212211 9.057e-01 12212211 11121112 6 5 11112211 12221112 9.427e-02 12212211 11121112 7 6 11121112 21221112 1.000e+00 21221112 11121112 8 7 12212211 21221112 1.000e+00 12212211 21221112 9 8 11112211 11122122 1.000e+00 11112211 11122122 10 9 11121112 11122122 1.000e+00 11121112 11122122 11 10 12212211 12221112 1.000e+00 12212211 12221112 12 11 11121112 11122122 1.000e+00 11121112 11122122 13 12 12221112 21221112 1.000e+00 21221112 12221112 14 13 11121112 12212211 9.057e-01 11121112 12212211 15 13 11112211 12221112 9.427e-02 11121112 12212211 16 14 11112211 11121112 1.000e+00 11121112 11112211 17 15 11112211 21112212 1.000e+00 21112212 11112211 18 16 12221112 21112212 1.000e+00 12221112 21112212 19 16 11121112 22212212 1.098e-07 12221112 21112212 20 17 11122122 12212211 1.000e+00 12212211 11122122 21 18 11121112 11121112 1.000e+00 11121112 11121112 22 19 11121112 12221112 1.000e+00 12221112 11121112 23 20 11121112 11121112 1.000e+00 11121112 11121112 24 21 21112212 21221112 1.000e+00 21221112 21112212 25 22 11121112 11122122 1.000e+00 11121112 11122122 26 23 11121112 12212211 9.057e-01 11121112 12212211 27 23 11112211 12221112 9.427e-02 11121112 12212211 28 24 11121112 11121112 1.000e+00 11121112 11121112 29 25 11122122 21112212 1.000e+00 11122122 21112212 30 26 11121112 11121112 1.000e+00 11121112 11121112 31 27 11122122 12212211 1.000e+00 11122122 12212211 32 28 12212211 21221112 1.000e+00 12212211 21221112 33 29 11122122 12221112 1.000e+00 11122122 12221112 34 30 11121112 21112212 1.000e+00 21112212 11121112 35 31 11121112 11122122 1.000e+00 11122122 11121112 36 32 11122122 21112212 1.000e+00 11122122 21112212 37 33 11122122 21221112 1.000e+00 11122122 21221112 38 34 11121112 11122122 1.000e+00 11122122 11121112 39 35 21112212 21221112 1.000e+00 21221112 21112212 40 36 11121112 12221112 1.000e+00 11121112 12221112 41 37 12212211 12212211 1.000e+00 12212211 12212211 42 38 11121112 11121112 1.000e+00 11121112 11121112 43 39 11121112 11121112 1.000e+00 11121112 11121112 44 40 12212211 21112212 1.000e+00 21112212 12212211 45 40 11112211 22212212 1.143e-08 21112212 12212211 46 41 11121112 12212211 9.057e-01 12212211 11121112 47 41 11112211 12221112 9.427e-02 12212211 11121112 48 42 12212211 21221112 1.000e+00 12212211 21221112 49 43 11121112 11122122 1.000e+00 11121112 11122122 50 44 12212211 21112212 1.000e+00 21112212 12212211 > print(block2.phase[[2]],digits=4) haplotype em.freq snap.freq 1 1111221221 5.200e-02 0.0520 2 1121221221 1.965e-01 0.1965 3 1221112211 9.650e-02 0.0965 4 2112211122 2.105e-01 0.2105 5 2112221122 5.400e-02 0.0540 6 2211122211 9.300e-02 0.0930 7 2221111222 1.010e-01 0.1010 8 2221121222 1.886e-10 0.0000 9 2221222221 1.965e-01 0.1965 > > > > > > proc.time() user system elapsed 6.754 0.236 7.384