R Under development (unstable) (2024-02-16 r85931 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> ###########################################################
> # This tests:
> # - Bootstrapping for PairedPSCBS objects
> ###########################################################
> library("PSCBS")
PSCBS v0.67.0 successfully loaded. See ?PSCBS for help.
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # Load SNP microarray data
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> data <- PSCBS::exampleData("paired.chr01")
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # Paired PSCBS segmentation
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # Drop single-locus outliers
> dataS <- dropSegmentationOutliers(data)
> dataS <- dataS[seq(from=1, to=nrow(data), by=5),]
> nSegs <- 4L
> str(dataS)
'data.frame':	14670 obs. of  6 variables:
 $ chromosome: int  1 1 1 1 1 1 1 1 1 1 ...
 $ x         : int  1145994 2941694 3710825 4240737 4276892 4464544 4714611 5095111 5034491 5158416 ...
 $ CT        : num  1.63 1.4 1.41 1.17 1.16 ...
 $ betaT     : num  0.7574 0.169 0.2357 0.2604 0.0576 ...
 $ CN        : num  2.36 2.13 2.26 2.01 2.32 ...
 $ betaN     : num  0.8274 0.5072 0.1671 0.1609 0.0421 ...
> # Segment known regions
> knownSegments <- data.frame(
+   chromosome = c(        1,  1,         1),
+   start      = c(     -Inf, NA, 141510003),
+   end        = c(120992603, NA,      +Inf)
+ )
> fit <- segmentByPairedPSCBS(dataS, knownSegments=knownSegments, avgDH="median", seed=0xBEEF)
> print(fit)
  chromosome tcnId dhId     start       end tcnNbrOfLoci  tcnMean tcnNbrOfSNPs
1          1     1    1    554484 120992603         7586 1.385258         2108
2         NA     2    1        NA        NA           NA       NA            0
3          1     3    1 141510003 185449813         2681 2.068861          777
4          1     4    1 185449813 247137334         4391 2.634110         1311
  tcnNbrOfHets dhNbrOfLoci     dhMean    c1Mean   c2Mean
1         2108        2108 0.54551245 0.3147912 1.070467
2            0           0         NA        NA       NA
3          777         777 0.07132277 0.9606521 1.108209
4         1311        1311 0.21663871 1.0317300 1.602380
> 
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # Bootstrap
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> B <- 1L
> seed <- 0xBEEF
> probs <- c(0.025, 0.05, 0.95, 0.975)
> 
> sets <- getBootstrapLocusSets(fit, B=B, seed=seed)
> 
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> # Subset by first segment
> # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
> ss <- 1L
> 
> fitT <- extractSegment(fit, ss)
> dataT <- getLocusData(fitT)
> segsT <- getSegments(fitT)
> 
> # Truth
> bootT <- bootstrapSegmentsAndChangepoints(fitT, B=B, seed=seed)
> bootT1 <- bootT$segments[1L,,,drop=FALSE]
> types <- dimnames(bootT1)[[3L]]
> dim(bootT1) <- dim(bootT1)[-1L]
> colnames(bootT1) <- types
> sumsT <- apply(bootT1, MARGIN=2L, FUN=quantile, probs=probs)
> print(sumsT)
           tcn        dh        c1       c2
2.5%  1.383213 0.5466788 0.3135198 1.069693
5%    1.383213 0.5466788 0.3135198 1.069693
95%   1.383213 0.5466788 0.3135198 1.069693
97.5% 1.383213 0.5466788 0.3135198 1.069693
> 
> fitTB <- bootstrapTCNandDHByRegion(fitT, B=B, seed=seed)
> segsTB <- getSegments(fitTB)
> segsTB <- unlist(segsTB[,grep("_", colnames(segsTB))])
> dim(segsTB) <- dim(sumsT)
> dimnames(segsTB) <- dimnames(sumsT)
> print(segsTB)
           tcn        dh        c1       c2
2.5%  1.383213 0.5466788 0.3135198 1.069693
5%    1.383213 0.5466788 0.3135198 1.069693
95%   1.383213 0.5466788 0.3135198 1.069693
97.5% 1.383213 0.5466788 0.3135198 1.069693
> 
> # Sanity check
> stopifnot(all.equal(segsTB, sumsT))
> 
> # Calculate summaries using the existing bootstrap samples
> fitTBp <- bootstrapTCNandDHByRegion(fitT, .boot=bootT)
> # Sanity check
> all.equal(fitTBp, fitTB)
 [1] "Component \"tcn_2.5%\": Mean relative difference: 0.003070405" 
 [2] "Component \"tcn_5%\": Mean relative difference: 0.002241362"   
 [3] "Component \"tcn_95%\": Mean relative difference: 0.005458479"  
 [4] "Component \"tcn_97.5%\": Mean relative difference: 0.006030363"
 [5] "Component \"dh_2.5%\": Mean relative difference: 0.02683423"   
 [6] "Component \"dh_5%\": Mean relative difference: 0.02409533"     
 [7] "Component \"dh_95%\": Mean relative difference: 0.0150081"     
 [8] "Component \"dh_97.5%\": Mean relative difference: 0.01826461"  
 [9] "Component \"c1_2.5%\": Mean relative difference: 0.02397349"   
[10] "Component \"c1_5%\": Mean relative difference: 0.01800948"     
[11] "Component \"c1_95%\": Mean relative difference: 0.0303456"     
[12] "Component \"c1_97.5%\": Mean relative difference: 0.03420614"  
[13] "Component \"c2_2.5%\": Mean relative difference: 0.008723378"  
[14] "Component \"c2_5%\": Mean relative difference: 0.006834962"    
[15] "Component \"c2_95%\": Mean relative difference: 0.00741949"    
[16] "Component \"c2_97.5%\": Mean relative difference: 0.008743911" 
attr(,"what")
[1] "getSegments()"
> 
> 
> # Bootstrap from scratch
> setsT <- getBootstrapLocusSets(fitT, B=B, seed=seed)
> lociT <- setsT$locusSet[[1L]]$bootstrap$loci
> idxs <- lociT$tcn
> tcnT <- array(dataT$CT[idxs], dim=dim(idxs))
> tcnT <- apply(tcnT, MARGIN=2L, FUN=mean, na.rm=TRUE)
> idxs <- lociT$dh
> dhT <- array(dataT$rho[idxs], dim=dim(idxs))
> dhT <- apply(dhT, MARGIN=2L, FUN=median, na.rm=TRUE)
> c1T <- (1-dhT) * tcnT / 2
> c2T <- tcnT - c1T
> bootT2 <- array(c(tcnT, dhT, c1T, c2T), dim=c(1L, 4L))
> colnames(bootT2) <- colnames(bootT1)
> print(bootT2)
          tcn        dh        c1       c2
[1,] 1.383213 0.5466788 0.3135198 1.069693
> 
> # This comparison is only valid if B == 1L
> if (B == 1L) {
+   # Sanity check
+   stopifnot(all.equal(bootT2, bootT1))
+ }
> 
> proc.time()
   user  system elapsed 
   2.73    0.20    3.48