R Under development (unstable) (2024-02-16 r85931 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library("PSCBS") PSCBS v0.67.0 successfully loaded. See ?PSCBS for help. > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Simulating copy-number data > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > set.seed(0xBEEF) > > # Number of loci > J <- 1000 > > x <- sort(runif(J, max=J)) * 1e5 > > mu <- double(J) > mu[200:300] <- mu[200:300] + 1 > mu[350:400] <- NA # centromere > mu[650:800] <- mu[650:800] - 1 > eps <- rnorm(J, sd=1/2) > y <- mu + eps > > outliers <- seq(from=1L, to=J, length.out=0.2*J) > y[outliers] <- y[outliers] + 1.5 > > w <- rep(1.0, times=J) > w[outliers] <- 0.01 > > data <- data.frame(chromosome=1L, x=x, y=y) > dataW <- cbind(data, w=w) > > > par(mar=c(2,3,0.2,1)+0.1) > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Single-chromosome segmentation > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Segment without weights > fit <- segmentByCBS(data) > sampleName(fit) <- "CBS_Example" > print(fit) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example 1 136857.7 19138391 199 0.2712 2 CBS_Example 1 19138391.4 28682180 101 1.2168 3 CBS_Example 1 28682180.1 64690253 298 0.3027 4 CBS_Example 1 64690253.3 80738828 151 -0.7101 5 CBS_Example 1 80738828.3 99932904 200 0.3655 > plotTracks(fit) Warning message: In plotTracks.CBS(fit) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit) is unknown ('NA'). Use signalType(fit) <- 'ratio' to avoid this warning. > ## Highlight outliers (they pull up the mean levels) > points(x[outliers]/1e6, y[outliers], col="purple") > > # Segment with weights > fitW <- segmentByCBS(dataW) > sampleName(fitW) <- "CBS_Example (weighted)" > print(fitW) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example (weighted) 1 136857.7 19138391 199 -0.0041 2 CBS_Example (weighted) 1 19138391.4 28682180 101 0.8987 3 CBS_Example (weighted) 1 28682180.1 64690253 298 0.0159 4 CBS_Example (weighted) 1 64690253.3 80738828 151 -1.0215 5 CBS_Example (weighted) 1 80738828.3 99932904 200 0.0653 > drawLevels(fitW, col="red") NULL > > legend("topright", bg="white", legend=c("outliers", "non-weighted CBS", "weighted CBS"), col=c("purple", "purple", "red"), lwd=c(NA,3,3), pch=c(1,NA,NA)) > > ## Assert that weighted segment means are less biased > dmean <- getSegments(fit)$mean - getSegments(fitW)$mean > cat("Segment mean differences:\n") Segment mean differences: > print(dmean) [1] 0.2753 0.3181 0.2868 0.3114 0.3002 > stopifnot(all(dmean > 0, na.rm=TRUE)) > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Segmentation with some known change points > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > knownSegments <- data.frame( + chromosome=c( 1, 1), + start =x[c( 1, 401)], + end =x[c(349, J)] + ) > fit2 <- segmentByCBS(dataW, knownSegments=knownSegments, verbose=TRUE) Segmenting by CBS... Chromosome: 1 Segmenting by CBS...done > sampleName(fit2) <- "CBS_Example_2 (weighted)" > print(fit2) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_2 (weighted) 1 136857.7 19138391 199 -0.0041 2 CBS_Example_2 (weighted) 1 19138391.4 28682180 101 0.8987 3 CBS_Example_2 (weighted) 1 28682180.1 34062461 49 -0.0552 4 CBS_Example_2 (weighted) 1 38343432.8 64690253 249 0.0298 5 CBS_Example_2 (weighted) 1 64690253.3 80738828 151 -1.0215 6 CBS_Example_2 (weighted) 1 80738828.3 99932904 200 0.0653 > plotTracks(fit2) Warning message: In plotTracks.CBS(fit2) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit2) is unknown ('NA'). Use signalType(fit2) <- 'ratio' to avoid this warning. > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > > > # Chromosome boundaries can be specified as -Inf and +Inf > knownSegments <- data.frame( + chromosome=c( 1, 1), + start =c( -Inf, x[401]), + end =c(x[349], +Inf) + ) > fit2b <- segmentByCBS(dataW, knownSegments=knownSegments, verbose=TRUE) Segmenting by CBS... Chromosome: 1 Segmenting by CBS...done > sampleName(fit2b) <- "CBS_Example_2b (weighted)" > print(fit2b) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_2b (weighted) 1 136857.7 19138391 199 -0.0041 2 CBS_Example_2b (weighted) 1 19138391.4 28682180 101 0.8987 3 CBS_Example_2b (weighted) 1 28682180.1 34062461 49 -0.0552 4 CBS_Example_2b (weighted) 1 38343432.8 64690253 249 0.0298 5 CBS_Example_2b (weighted) 1 64690253.3 80738828 151 -1.0215 6 CBS_Example_2b (weighted) 1 80738828.3 99932904 200 0.0653 > plotTracks(fit2b) Warning message: In plotTracks.CBS(fit2b) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit2b) is unknown ('NA'). Use signalType(fit2b) <- 'ratio' to avoid this warning. > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > > > # As a proof of concept, it is possible to segment just the centromere, > # which contains no data. All statistics will be NAs. > knownSegments <- data.frame( + chromosome=c( 1), + start =x[c(350)], + end =x[c(400)] + ) > fit3 <- segmentByCBS(dataW, knownSegments=knownSegments, verbose=TRUE) Segmenting by CBS... Chromosome: 1 Segmenting by CBS...done > sampleName(fit3) <- "CBS_Example_3" > print(fit3) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_3 1 34108010 38257409 0 NA > plotTracks(fit3, Clim=c(0,5), xlim=c(0,100)) > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > > > # If one specify the (empty) centromere as a segment, then its > # estimated statistics will be NAs, which becomes a natural > # separator between the two "independent" arms. > knownSegments <- data.frame( + chromosome=c( 1, 1, 1), + start =x[c( 1, 350, 401)], + end =x[c(349, 400, J)] + ) > fit4 <- segmentByCBS(dataW, knownSegments=knownSegments, verbose=TRUE) Segmenting by CBS... Chromosome: 1 Segmenting by CBS...done > sampleName(fit4) <- "CBS_Example_4" > print(fit4) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_4 1 136857.7 19138391 199 -0.0041 2 CBS_Example_4 1 19138391.4 28682180 101 0.8987 3 CBS_Example_4 1 28682180.1 34062461 49 -0.0552 4 CBS_Example_4 1 34108009.8 38257409 0 NA 5 CBS_Example_4 1 38343432.8 64690253 249 0.0298 6 CBS_Example_4 1 64690253.3 80738828 151 -1.0215 7 CBS_Example_4 1 80738828.3 99932904 200 0.0653 > plotTracks(fit4) Warning message: In plotTracks.CBS(fit4) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit4) is unknown ('NA'). Use signalType(fit4) <- 'ratio' to avoid this warning. > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > > > fit5 <- segmentByCBS(dataW, knownSegments=knownSegments, undo=Inf, verbose=TRUE) Segmenting by CBS... Chromosome: 1 Segmenting by CBS...done > sampleName(fit5) <- "CBS_Example_5" > print(fit5) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_5 1 136857.7 34062461 349 0.54781248 2 CBS_Example_5 1 34108009.8 38257409 0 NA 3 CBS_Example_5 1 38343432.8 99932904 600 0.06959745 > plotTracks(fit5) Warning message: In plotTracks.CBS(fit5) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit5) is unknown ('NA'). Use signalType(fit5) <- 'ratio' to avoid this warning. > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > stopifnot(nbrOfSegments(fit5) == nrow(knownSegments)) > > > # One can also force a separator between two segments by setting > # 'start' and 'end' to NAs ('chromosome' has to be given) > knownSegments <- data.frame( + chromosome=c( 1, 1, 1), + start =x[c( 1, NA, 401)], + end =x[c(349, NA, J)] + ) > fit6 <- segmentByCBS(dataW, knownSegments=knownSegments, verbose=TRUE) Segmenting by CBS... Chromosome: 1 Segmenting by CBS...done > sampleName(fit6) <- "CBS_Example_6" > print(fit6) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_6 1 136857.7 19138391 199 -0.0041 2 CBS_Example_6 1 19138391.4 28682180 101 0.8987 3 CBS_Example_6 1 28682180.1 34062461 49 -0.0552 4 NA NA NA NA NA 5 CBS_Example_6 1 38343432.8 64690253 249 0.0298 6 CBS_Example_6 1 64690253.3 80738828 151 -1.0215 7 CBS_Example_6 1 80738828.3 99932904 200 0.0653 > plotTracks(fit6) Warning message: In plotTracks.CBS(fit6) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit6) is unknown ('NA'). Use signalType(fit6) <- 'ratio' to avoid this warning. > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Multi-chromosome segmentation > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > data2 <- data > data2$chromosome <- 2L > data <- rbind(data, data2) > dataW <- cbind(data, w=w) > > par(mar=c(2,3,0.2,1)+0.1) > # Segment without weights > fit <- segmentByCBS(data) > sampleName(fit) <- "CBS_Example" > print(fit) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example 1 136857.7 19138391 199 0.2712 2 CBS_Example 1 19138391.4 28682180 101 1.2168 3 CBS_Example 1 28682180.1 64690253 298 0.3027 4 CBS_Example 1 64690253.3 80738828 151 -0.7101 5 CBS_Example 1 80738828.3 99932904 200 0.3655 6 NA NA NA NA NA 7 CBS_Example 2 136857.7 19138391 199 0.2712 8 CBS_Example 2 19138391.4 28682180 101 1.2168 9 CBS_Example 2 28682180.1 64690253 298 0.3027 10 CBS_Example 2 64690253.3 80738828 151 -0.7101 11 CBS_Example 2 80738828.3 99932904 200 0.3655 > plotTracks(fit, Clim=c(-3,3)) > > # Segment with weights > fitW <- segmentByCBS(dataW) > sampleName(fitW) <- "CBS_Example (weighted)" > print(fitW) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example (weighted) 1 136857.7 19138391 199 -0.0041 2 CBS_Example (weighted) 1 19138391.4 28682180 101 0.8987 3 CBS_Example (weighted) 1 28682180.1 64690253 298 0.0159 4 CBS_Example (weighted) 1 64690253.3 80738828 151 -1.0215 5 CBS_Example (weighted) 1 80738828.3 99932904 200 0.0653 6 NA NA NA NA NA 7 CBS_Example (weighted) 2 136857.7 19138391 199 -0.0041 8 CBS_Example (weighted) 2 19138391.4 28682180 101 0.8987 9 CBS_Example (weighted) 2 28682180.1 64690253 298 0.0159 10 CBS_Example (weighted) 2 64690253.3 80738828 151 -1.0215 11 CBS_Example (weighted) 2 80738828.3 99932904 200 0.0653 > drawLevels(fitW, col="red") NULL > > legend("topright", bg="white", legend=c("outliers", "non-weighted CBS", "weighted CBS"), col=c("purple", "purple", "red"), lwd=c(NA,3,3), pch=c(1,NA,NA)) > > ## Assert that weighted segment means are less biased > dmean <- getSegments(fit)$mean - getSegments(fitW)$mean > cat("Segment mean differences:\n") Segment mean differences: > print(dmean) [1] 0.2753 0.3181 0.2868 0.3114 0.3002 NA 0.2753 0.3181 0.2868 0.3114 [11] 0.3002 > stopifnot(all(dmean > 0, na.rm=TRUE)) > > proc.time() user system elapsed 2.34 0.15 2.73