library(ClassDiscovery) suppressWarnings( RNGversion("3.5.3") ) set.seed(147676) # simulate data from two different groups d1 <- matrix(rnorm(100*30, rnorm(100, 0.5)), nrow=100, ncol=30, byrow=FALSE) d2 <- matrix(rnorm(100*20, rnorm(100, 0.5)), nrow=100, ncol=20, byrow=FALSE) dd <- cbind(d1, d2) cols <- rep(c('red', 'green'), times=c(30,20)) # peform your basic hierarchical clustering... hc <- hclust(distanceMatrix(dd, 'pearson'), method='complete') # bootstrap the clusters arising from hclust bc <- PerturbationClusterTest(dd, cutHclust, nTimes=200, k=3, metric='pearson') summary(bc) # look at the distribution of agreement scores hist(bc, breaks=101) # let heatmap compute a new dendrogram from the agreement image(bc, col=blueyellow(64), RowSideColors=cols, ColSideColors=cols) # plot the agreement matrix with the original dendrogram image(bc, dendrogram=hc, col=blueyellow(64), RowSideColors=cols, ColSideColors=cols) # bootstrap the results of K-means kmc <- PerturbationClusterTest(dd, cutPam, nTimes=200, k=3) image(kmc, dendrogram=hc, col=blueyellow(64), RowSideColors=cols, ColSideColors=cols) # contrast the behavior when all the data comes from the same group xx <- matrix(rnorm(100*50, rnorm(100, 0.5)), nrow=100, ncol=50, byrow=FALSE) hct <- hclust(distanceMatrix(xx, 'pearson'), method='complete') bct <- PerturbationClusterTest(xx, cutHclust, nTimes=200, k=4, metric='pearson') summary(bct) image(bct, dendrogram=hct, col=blueyellow(64), RowSideColors=cols, ColSideColors=cols) # cleanup rm(d1, d2, dd, cols, hc, bc, kmc, xx, hct, bct)