R Under development (unstable) (2024-02-16 r85931 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ########################################################### > # This tests: > # - Bootstrapping for PairedPSCBS objects > ########################################################### > library("PSCBS") PSCBS v0.67.0 successfully loaded. See ?PSCBS for help. > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Load SNP microarray data > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > data <- PSCBS::exampleData("paired.chr01") > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Paired PSCBS segmentation > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Drop single-locus outliers > dataS <- dropSegmentationOutliers(data) > dataS <- dataS[seq(from=1, to=nrow(data), by=5),] > nSegs <- 4L > str(dataS) 'data.frame': 14670 obs. of 6 variables: $ chromosome: int 1 1 1 1 1 1 1 1 1 1 ... $ x : int 1145994 2941694 3710825 4240737 4276892 4464544 4714611 5095111 5034491 5158416 ... $ CT : num 1.63 1.4 1.41 1.17 1.16 ... $ betaT : num 0.7574 0.169 0.2357 0.2604 0.0576 ... $ CN : num 2.36 2.13 2.26 2.01 2.32 ... $ betaN : num 0.8274 0.5072 0.1671 0.1609 0.0421 ... > # Segment known regions > knownSegments <- data.frame( + chromosome = c( 1, 1, 1), + start = c( -Inf, NA, 141510003), + end = c(120992603, NA, +Inf) + ) > fit <- segmentByPairedPSCBS(dataS, knownSegments=knownSegments, avgDH="median", seed=0xBEEF) > print(fit) chromosome tcnId dhId start end tcnNbrOfLoci tcnMean tcnNbrOfSNPs 1 1 1 1 554484 120992603 7586 1.385258 2108 2 NA 2 1 NA NA NA NA 0 3 1 3 1 141510003 185449813 2681 2.068861 777 4 1 4 1 185449813 247137334 4391 2.634110 1311 tcnNbrOfHets dhNbrOfLoci dhMean c1Mean c2Mean 1 2108 2108 0.54551245 0.3147912 1.070467 2 0 0 NA NA NA 3 777 777 0.07132277 0.9606521 1.108209 4 1311 1311 0.21663871 1.0317300 1.602380 > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Bootstrap > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > B <- 1L > seed <- 0xBEEF > probs <- c(0.025, 0.05, 0.95, 0.975) > > sets <- getBootstrapLocusSets(fit, B=B, seed=seed) > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Subset by first segment > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > ss <- 1L > > fitT <- extractSegment(fit, ss) > dataT <- getLocusData(fitT) > segsT <- getSegments(fitT) > > # Truth > bootT <- bootstrapSegmentsAndChangepoints(fitT, B=B, seed=seed) > bootT1 <- bootT$segments[1L,,,drop=FALSE] > types <- dimnames(bootT1)[[3L]] > dim(bootT1) <- dim(bootT1)[-1L] > colnames(bootT1) <- types > sumsT <- apply(bootT1, MARGIN=2L, FUN=quantile, probs=probs) > print(sumsT) tcn dh c1 c2 2.5% 1.383213 0.5466788 0.3135198 1.069693 5% 1.383213 0.5466788 0.3135198 1.069693 95% 1.383213 0.5466788 0.3135198 1.069693 97.5% 1.383213 0.5466788 0.3135198 1.069693 > > fitTB <- bootstrapTCNandDHByRegion(fitT, B=B, seed=seed) > segsTB <- getSegments(fitTB) > segsTB <- unlist(segsTB[,grep("_", colnames(segsTB))]) > dim(segsTB) <- dim(sumsT) > dimnames(segsTB) <- dimnames(sumsT) > print(segsTB) tcn dh c1 c2 2.5% 1.383213 0.5466788 0.3135198 1.069693 5% 1.383213 0.5466788 0.3135198 1.069693 95% 1.383213 0.5466788 0.3135198 1.069693 97.5% 1.383213 0.5466788 0.3135198 1.069693 > > # Sanity check > stopifnot(all.equal(segsTB, sumsT)) > > # Calculate summaries using the existing bootstrap samples > fitTBp <- bootstrapTCNandDHByRegion(fitT, .boot=bootT) > # Sanity check > all.equal(fitTBp, fitTB) [1] "Component \"tcn_2.5%\": Mean relative difference: 0.003070405" [2] "Component \"tcn_5%\": Mean relative difference: 0.002241362" [3] "Component \"tcn_95%\": Mean relative difference: 0.005458479" [4] "Component \"tcn_97.5%\": Mean relative difference: 0.006030363" [5] "Component \"dh_2.5%\": Mean relative difference: 0.02683423" [6] "Component \"dh_5%\": Mean relative difference: 0.02409533" [7] "Component \"dh_95%\": Mean relative difference: 0.0150081" [8] "Component \"dh_97.5%\": Mean relative difference: 0.01826461" [9] "Component \"c1_2.5%\": Mean relative difference: 0.02397349" [10] "Component \"c1_5%\": Mean relative difference: 0.01800948" [11] "Component \"c1_95%\": Mean relative difference: 0.0303456" [12] "Component \"c1_97.5%\": Mean relative difference: 0.03420614" [13] "Component \"c2_2.5%\": Mean relative difference: 0.008723378" [14] "Component \"c2_5%\": Mean relative difference: 0.006834962" [15] "Component \"c2_95%\": Mean relative difference: 0.00741949" [16] "Component \"c2_97.5%\": Mean relative difference: 0.008743911" attr(,"what") [1] "getSegments()" > > > # Bootstrap from scratch > setsT <- getBootstrapLocusSets(fitT, B=B, seed=seed) > lociT <- setsT$locusSet[[1L]]$bootstrap$loci > idxs <- lociT$tcn > tcnT <- array(dataT$CT[idxs], dim=dim(idxs)) > tcnT <- apply(tcnT, MARGIN=2L, FUN=mean, na.rm=TRUE) > idxs <- lociT$dh > dhT <- array(dataT$rho[idxs], dim=dim(idxs)) > dhT <- apply(dhT, MARGIN=2L, FUN=median, na.rm=TRUE) > c1T <- (1-dhT) * tcnT / 2 > c2T <- tcnT - c1T > bootT2 <- array(c(tcnT, dhT, c1T, c2T), dim=c(1L, 4L)) > colnames(bootT2) <- colnames(bootT1) > print(bootT2) tcn dh c1 c2 [1,] 1.383213 0.5466788 0.3135198 1.069693 > > # This comparison is only valid if B == 1L > if (B == 1L) { + # Sanity check + stopifnot(all.equal(bootT2, bootT1)) + } > > proc.time() user system elapsed 2.73 0.20 3.48