R Under development (unstable) (2024-02-16 r85931 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ########################################################### > # This tests: > # - segmentByCBS(...) > # - segmentByCBS(..., knownSegments) > # - tileChromosomes() > # - plotTracks() > ########################################################### > library("PSCBS") PSCBS v0.67.0 successfully loaded. See ?PSCBS for help. > subplots <- R.utils::subplots > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Simulating copy-number data > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > set.seed(0xBEEF) > > # Number of loci > J <- 1000 > > mu <- double(J) > mu[200:300] <- mu[200:300] + 1 > mu[350:400] <- NA # centromere > mu[650:800] <- mu[650:800] - 1 > eps <- rnorm(J, sd=1/2) > y <- mu + eps > x <- sort(runif(length(y), max=length(y))) * 1e5 > w <- runif(J) > w[650:800] <- 0.001 > > > subplots(8, ncol=1L) > par(mar=c(1.7,1,0.2,1)+0.1) > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Segmentation > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > fit <- segmentByCBS(y, x=x) > sampleName(fit) <- "CBS_Example" > print(fit) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example 0 55167.82 20774251 201 0.0164 2 CBS_Example 0 20774250.85 29320105 99 1.0474 3 CBS_Example 0 29320104.86 65874675 298 -0.0203 4 CBS_Example 0 65874675.06 81348129 151 -1.0813 5 CBS_Example 0 81348129.20 99910827 200 -0.0612 > plotTracks(fit) Warning message: In plotTracks.CBS(fit) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit) is unknown ('NA'). Use signalType(fit) <- 'ratio' to avoid this warning. > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Segmentation with some known change points > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > knownSegments <- data.frame( + chromosome=c( 0, 0), + start =x[c( 1, 401)], + end =x[c(349, J)] + ) > fit2 <- segmentByCBS(y, x=x, knownSegments=knownSegments, verbose=TRUE) Segmenting by CBS... Chromosome: 0 Segmenting by CBS...done > sampleName(fit2) <- "CBS_Example_2" > print(fit2) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_2 0 55167.82 20774251 201 0.0164 2 CBS_Example_2 0 20774250.85 29320105 99 1.0474 3 CBS_Example_2 0 29320104.86 34142178 49 -0.0193 4 CBS_Example_2 0 41080532.92 65874675 249 -0.0205 5 CBS_Example_2 0 65874675.06 81348129 151 -1.0813 6 CBS_Example_2 0 81348129.20 99910827 200 -0.0612 > plotTracks(fit2) Warning message: In plotTracks.CBS(fit2) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit2) is unknown ('NA'). Use signalType(fit2) <- 'ratio' to avoid this warning. > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > > > # Chromosome boundaries can be specified as -Inf and +Inf > knownSegments <- data.frame( + chromosome=c( 0, 0), + start =c( -Inf, x[401]), + end =c(x[349], +Inf) + ) > fit2b <- segmentByCBS(y, x=x, knownSegments=knownSegments, verbose=TRUE) Segmenting by CBS... Chromosome: 0 Segmenting by CBS...done > sampleName(fit2b) <- "CBS_Example_2b" > print(fit2b) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_2b 0 55167.82 20774251 201 0.0164 2 CBS_Example_2b 0 20774250.85 29320105 99 1.0474 3 CBS_Example_2b 0 29320104.86 34142178 49 -0.0193 4 CBS_Example_2b 0 41080532.92 65874675 249 -0.0205 5 CBS_Example_2b 0 65874675.06 81348129 151 -1.0813 6 CBS_Example_2b 0 81348129.20 99910827 200 -0.0612 > plotTracks(fit2b) Warning message: In plotTracks.CBS(fit2b) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit2b) is unknown ('NA'). Use signalType(fit2b) <- 'ratio' to avoid this warning. > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > > > # As a proof of concept, it is possible to segment just the centromere, > # which contains no data. All statistics will be NAs. > knownSegments <- data.frame( + chromosome=c( 0), + start =x[c(350)], + end =x[c(400)] + ) > fit3 <- segmentByCBS(y, x=x, knownSegments=knownSegments, verbose=TRUE) Segmenting by CBS... Chromosome: 0 Segmenting by CBS...done > sampleName(fit3) <- "CBS_Example_3" > print(fit3) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_3 0 34194740 41044125 0 NA > plotTracks(fit3, Clim=c(0,5), xlim=c(0,100)) > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > > > > # If one specify the (empty) centromere as a segment, then its > # estimated statistics will be NAs, which becomes a natural > # separator between the two "independent" arms. > knownSegments <- data.frame( + chromosome=c( 0, 0, 0), + start =x[c( 1, 350, 401)], + end =x[c(349, 400, J)] + ) > fit4 <- segmentByCBS(y, x=x, knownSegments=knownSegments, verbose=TRUE) Segmenting by CBS... Chromosome: 0 Segmenting by CBS...done > sampleName(fit4) <- "CBS_Example_4" > print(fit4) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_4 0 55167.82 20774251 201 0.0164 2 CBS_Example_4 0 20774250.85 29320105 99 1.0474 3 CBS_Example_4 0 29320104.86 34142178 49 -0.0193 4 CBS_Example_4 0 34194739.81 41044125 0 NA 5 CBS_Example_4 0 41080532.92 65874675 249 -0.0205 6 CBS_Example_4 0 65874675.06 81348129 151 -1.0813 7 CBS_Example_4 0 81348129.20 99910827 200 -0.0612 > plotTracks(fit4) Warning message: In plotTracks.CBS(fit4) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit4) is unknown ('NA'). Use signalType(fit4) <- 'ratio' to avoid this warning. > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > > > > fit5 <- segmentByCBS(y, x=x, knownSegments=knownSegments, undo=Inf, verbose=TRUE) Segmenting by CBS... Chromosome: 0 Segmenting by CBS...done > sampleName(fit5) <- "CBS_Example_5" > print(fit5) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_5 0 55167.82 34142178 349 0.3038785 2 CBS_Example_5 0 34194739.81 41044125 0 NA 3 CBS_Example_5 0 41080532.92 99910827 600 -0.3010285 > plotTracks(fit5) Warning message: In plotTracks.CBS(fit5) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit5) is unknown ('NA'). Use signalType(fit5) <- 'ratio' to avoid this warning. > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > stopifnot(nbrOfSegments(fit5) == nrow(knownSegments)) > > > # One can also force a separator between two segments by setting > # 'start' and 'end' to NAs ('chromosome' has to be given) > knownSegments <- data.frame( + chromosome=c( 0, 0, 0), + start =x[c( 1, NA, 401)], + end =x[c(349, NA, J)] + ) > fit6 <- segmentByCBS(y, x=x, knownSegments=knownSegments, verbose=TRUE) Segmenting by CBS... Chromosome: 0 Segmenting by CBS...done > sampleName(fit6) <- "CBS_Example_6" > print(fit6) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_6 0 55167.82 20774251 201 0.0164 2 CBS_Example_6 0 20774250.85 29320105 99 1.0474 3 CBS_Example_6 0 29320104.86 34142178 49 -0.0193 4 NA NA NA NA NA 5 CBS_Example_6 0 41080532.92 65874675 249 -0.0205 6 CBS_Example_6 0 65874675.06 81348129 151 -1.0813 7 CBS_Example_6 0 81348129.20 99910827 200 -0.0612 > plotTracks(fit6) Warning message: In plotTracks.CBS(fit6) : Setting default 'Clim' assuming the signal type is 'ratio' because signalType(fit6) is unknown ('NA'). Use signalType(fit6) <- 'ratio' to avoid this warning. > abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Segment multiple chromosomes > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Simulate multiple chromosomes > fit1 <- renameChromosomes(fit, from=0, to=1) > fit2 <- renameChromosomes(fit, from=0, to=2) > fitM <- c(fit1, fit2) > fitM <- segmentByCBS(fitM) > sampleName(fitM) <- "CBS_Example_M" > print(fitM) sampleName chromosome start end nbrOfLoci mean 1 CBS_Example_M 1 55167.82 20774251 201 0.0164 2 CBS_Example_M 1 20774250.85 29320105 99 1.0474 3 CBS_Example_M 1 29320104.86 65874675 298 -0.0203 4 CBS_Example_M 1 65874675.06 81348129 151 -1.0813 5 CBS_Example_M 1 81348129.20 99910827 200 -0.0612 6 NA NA NA NA NA 7 CBS_Example_M 2 55167.82 20774251 201 0.0164 8 CBS_Example_M 2 20774250.85 29320105 99 1.0474 9 CBS_Example_M 2 29320104.86 65874675 298 -0.0203 10 CBS_Example_M 2 65874675.06 81348129 151 -1.0813 11 CBS_Example_M 2 81348129.20 99910827 200 -0.0612 > plotTracks(fitM, Clim=c(-3,3)) > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Tiling multiple chromosomes > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Tile chromosomes > fitT <- tileChromosomes(fitM) > fitTb <- tileChromosomes(fitT) > stopifnot(identical(fitTb, fitT)) > > > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > # Write segmentation to file > # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - > pathT <- tempdir() > > ## Tab-delimited file > pathname <- writeSegments(fitM, path=pathT) Warning message: In write.table(file = pathnameT, data, append = TRUE, quote = FALSE, : appending column names to file > print(pathname) [1] "D:/temp/Rtmpg7hoco/CBS_Example_M.tsv" > > ## WIG file > pathname <- writeWIG(fitM, path=pathT) > print(pathname) [1] "D:/temp/Rtmpg7hoco/CBS_Example_M.wig" > > unlink(pathT, recursive=TRUE) > > proc.time() user system elapsed 2.46 0.14 4.45