R Under development (unstable) (2024-01-11 r85801 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> #  fastcluster: Fast hierarchical clustering routines for R and Python
> #
> #  Copyright:
> #    * Until package version 1.1.23: © 2011 Daniel Müllner <https://danifold.net>
> #    * All changes from version 1.1.24 on: © Google Inc. <https://www.google.com>
> #
> # Test script for the R interface
> 
> seed = as.integer(runif(1, 0, 1e9))
> set.seed(seed)
> cat(sprintf("Random seed: %d\n",seed))
Random seed: 445602103
> 
> print_seed <- function() {
+   return(sprintf('
+ Please send a report to the author of the \'fastcluster\' package, Daniel Müllner.
+ For contact details, see <https://danifold.net>. To make the error
+ reproducible, you must include the following number (the random seed value) in
+ your error report: %d.\n\n', seed))
+ }
> 
> hasWardD2 = getRversion() >= '3.1.0'
> 
> # Compare two dendrograms and check whether they are equal, except that
> # ties may be resolved differently.
> compare <- function(dg1, dg2) {
+   h1 <- dg1$height
+   h2 <- dg2$height
+   # "height" vectors may have small numerical errors.
+   rdiffs <- abs(h1-h2)/pmax(abs(h1),abs(h2))
+   rdiffs = rdiffs[complete.cases(rdiffs)]
+   rel_error <-  max(rdiffs)
+   # We allow a relative error of 1e-13.
+   if (rel_error>1e-13) {
+     print(h1)
+     print(h2)
+     cat(sprintf('Height vectors differ! The maximum relative error is %e.\n', rel_error))
+     return(FALSE)
+   }
+   # Filter the indices where consecutive merging distances are distinct.
+   d = diff(dg1$height)
+   b = (c(d,1)!=0 & c(1,d)!=0)
+   #cat(sprintf("Percentage of indices where we can test: %g.\n",100.0*length(b[b])/length(b)))
+   if (any(b)) {
+     m1 = dg1$merge[b,]
+     m2 = dg2$merge[b,]
+ 
+     r = function(i) {
+       if (i<0) {
+         return(1)
+       }
+       else {
+         return(b[i])
+       }
+     }
+ 
+     f = sapply(m1,r)
+     fm1 = m1*f
+     fm2 = m2*f
+     # The "merge" matrices must be identical whereever indices are not ambiguous
+     # due to ties.
+     if (!identical(fm1,fm2)) {
+       cat('Merge matrices differ!\n')
+       return(FALSE)
+     }
+     # Compare the "order" vectors only if all merging distances were distinct.
+     if (all(b) && !identical(dg1$order,dg2$order)) {
+       cat('Order vectors differ!\n')
+       return(FALSE)
+     }
+   }
+   return(TRUE)
+ }
> 
> # Generate uniformly distributed random data
> generate.uniform <- function() {
+   n = sample(10:1000,1)
+   range_exp = runif(1,min=-10, max=10)
+   cat(sprintf("Number of sample points: %d\n",n))
+   cat(sprintf("Dissimilarity range: [0,%g]\n",10^range_exp))
+   d = runif(n*(n-1)/2, min=0, max=10^range_exp)
+   # Fake a compressed distance matrix
+   attributes(d) <- NULL
+   attr(d,"Size") <- n
+   attr(d, "call") <- 'N/A'
+   class(d) <- "dist"
+   return(d)
+ }
> 
> # Generate normally distributed random data
> generate.normal <- function() {
+   n = sample(10:1000,1)
+   dim = sample(2:20,1)
+ 
+   cat (sprintf("Number of sample points: %d\n",n))
+   cat (sprintf("Dimension: %d\n",dim))
+ 
+   pcd = matrix(rnorm(n*dim), c(n,dim))
+   d = dist(pcd)
+   return(d)
+ }
> 
> # Test the clustering functions when a distance matrix is given.
> test.dm <-  function(d) {
+   d2 = d
+   if (hasWardD2) {
+     methods = c('single','complete','average','mcquitty','ward.D','ward.D2','centroid','median')
+   }
+   else {
+     methods = c('single','complete','average','mcquitty','ward','centroid','median')
+   }
+   for (method in methods) {
+     cat(paste('Method :', method, '\n'))
+     dg_stats       = stats::hclust(d, method=method)
+     if (method == 'ward') {
+       method = 'ward.D'
+     }
+     dg_fastcluster = fastcluster::hclust(d, method=method)
+     if (!identical(d,d2)) {
+       cat('Input array was corrupted!\n')
+       stop(print_seed())
+     }
+     if (!compare(dg_stats, dg_fastcluster)) {
+       stop(print_seed())
+     }
+   }
+   cat('Passed.\n')
+ }
> 
> # Test the clustering functions for vector input in Euclidean space.
> test.vector <-  function() {
+   # generate test data
+   n = sample(10:1000,1)
+   dim = sample(2:20,1)
+   cat (sprintf("Number of sample points: %d\n",n))
+   cat (sprintf("Dimension: %d\n",dim))
+ 
+   range_exp = runif(1,min=-10, max=10)
+   pcd = matrix(rnorm(n*dim, sd=10^range_exp), c(n,dim))
+   pcd2 = pcd
+   # test
+   method='single'
+   cat(paste('Method:', method, '\n'))
+   for (metric in c('euclidean', 'maximum', 'manhattan', 'canberra', 'minkowski')) {
+     cat(paste('    Metric:', metric, '\n'))
+     if (metric=='minkowski') {
+       p = runif(1, min=1.0, max=10.0)
+       cat (sprintf("    p: %g\n",p));
+       dg_fastcluster = fastcluster::hclust.vector(pcd, method=method, metric=metric, p=p)
+       d = dist(pcd, method=metric, p=p)
+     }
+     else {
+       dg_fastcluster = fastcluster::hclust.vector(pcd, method=method, metric=metric)
+       d = dist(pcd, method=metric)
+     }
+     d2 = d
+     dg_fastcluster_dist = fastcluster::hclust(d, method=method)
+     if (!identical(d,d2) || !identical(pcd,pcd2)) {
+       cat('Input array was corrupted!\n')
+       stop(print_seed())
+     }
+     if (!compare(dg_fastcluster_dist, dg_fastcluster)) {
+       stop(print_seed())
+     }
+   }
+   for (method in c('ward','centroid','median') ) {
+     cat(paste('Method:', method, '\n'))
+     dg_fastcluster = fastcluster::hclust.vector(pcd, method=method)
+     if (!identical(pcd,pcd2)) {
+       cat('Input array was corrupted!\n')
+       stop(print_seed())
+     }
+     d = dist(pcd)
+     if(method == "ward" && hasWardD2) {
+       method = "ward.D2"
+     }
+     else
+     {
+       # Workaround: fastcluster::hclust expects _squared_ euclidean distances.
+       d = d^2
+     }
+     d2 = d
+     dg_fastcluster_dist = fastcluster::hclust(d, method=method)
+     if (!identical(d,d2)) {
+       cat('Input array was corrupted!\n')
+       stop(print_seed())
+     }
+     if(method != "ward.D2") {
+       dg_fastcluster_dist$height = sqrt(dg_fastcluster_dist$height)
+     }
+     # The Euclidean methods may have small numerical errors due to squaring/
+     # taking the root in the Euclidean distances.
+     if (!compare(dg_fastcluster_dist, dg_fastcluster)) {
+       stop(print_seed())
+     }
+   }
+   cat('Passed.\n')
+ }
> 
> # Test the single linkage function with the "binary" metric
> test.vector.binary <- function() {
+   # generate test data
+   cat (sprintf("Uniform sampling for the 'binary' metric:\n"))
+   n = sample(10:400,1)
+   dim = sample(n:(2*n),1)
+   cat (sprintf("Number of sample points: %d\n",n))
+   cat (sprintf("Dimension: %d\n",dim))
+   pcd = matrix(sample(-1:2, n*dim, replace=T), c(n,dim))
+   pcd2 = pcd
+   # test
+   method='single'
+   metric='binary'
+   cat(paste('Method:', method, '\n'))
+   cat(paste('    Metric:', metric, '\n'))
+   dg_fastcluster = fastcluster::hclust.vector(pcd, method=method, metric=metric)
+   d = dist(pcd, method=metric)
+   d2 = d
+   dg_fastcluster_dist       = fastcluster::hclust(d, method=method)
+   if (!identical(d,d2) || !identical(d,d2)) {
+     cat('Input array was corrupted!\n')
+     stop(print_seed())
+   }
+   if (!compare(dg_fastcluster_dist, dg_fastcluster)) {
+     stop(print_seed())
+   }
+   cat('Passed.\n')
+ }
> 
> 
> N = 15
> for (i in (1:N)) {
+   if (i%%2==1) {
+     cat(sprintf('Random test %d of %d (uniform distribution of distances):\n',i,2*N))
+     d = generate.uniform()
+   }
+   else {
+     cat(sprintf('Random test %d of %d (Gaussian density):\n',i,2*N))
+     d = generate.normal()
+   }
+   test.dm(d)
+ }
Random test 1 of 30 (uniform distribution of distances):
Number of sample points: 557
Dissimilarity range: [0,1.63267e-06]
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 2 of 30 (Gaussian density):
Number of sample points: 329
Dimension: 4
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 3 of 30 (uniform distribution of distances):
Number of sample points: 168
Dissimilarity range: [0,2.47854e+09]
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 4 of 30 (Gaussian density):
Number of sample points: 477
Dimension: 16
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 5 of 30 (uniform distribution of distances):
Number of sample points: 220
Dissimilarity range: [0,2.2738e+07]
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 6 of 30 (Gaussian density):
Number of sample points: 865
Dimension: 6
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 7 of 30 (uniform distribution of distances):
Number of sample points: 712
Dissimilarity range: [0,3.02168]
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 8 of 30 (Gaussian density):
Number of sample points: 185
Dimension: 3
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 9 of 30 (uniform distribution of distances):
Number of sample points: 763
Dissimilarity range: [0,168684]
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 10 of 30 (Gaussian density):
Number of sample points: 36
Dimension: 6
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 11 of 30 (uniform distribution of distances):
Number of sample points: 435
Dissimilarity range: [0,3.2354e-05]
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 12 of 30 (Gaussian density):
Number of sample points: 111
Dimension: 3
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 13 of 30 (uniform distribution of distances):
Number of sample points: 317
Dissimilarity range: [0,273697]
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 14 of 30 (Gaussian density):
Number of sample points: 112
Dimension: 16
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
Random test 15 of 30 (uniform distribution of distances):
Number of sample points: 290
Dissimilarity range: [0,1.77159e-05]
Method : single 
Method : complete 
Method : average 
Method : mcquitty 
Method : ward.D 
Method : ward.D2 
Method : centroid 
Method : median 
Passed.
> for (i in (N+1:N)) {
+   cat(sprintf('Random test %d of %d (Gaussian density):\n',i,2*N))
+   test.vector()
+   test.vector.binary()
+ }
Random test 16 of 30 (Gaussian density):
Number of sample points: 125
Dimension: 4
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 3.88649
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 40
Dimension: 76
Method: single 
    Metric: binary 
Passed.
Random test 17 of 30 (Gaussian density):
Number of sample points: 133
Dimension: 9
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 3.77223
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 288
Dimension: 405
Method: single 
    Metric: binary 
Passed.
Random test 18 of 30 (Gaussian density):
Number of sample points: 579
Dimension: 13
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 4.78537
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 68
Dimension: 115
Method: single 
    Metric: binary 
Passed.
Random test 19 of 30 (Gaussian density):
Number of sample points: 348
Dimension: 10
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 5.52154
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 307
Dimension: 545
Method: single 
    Metric: binary 
Passed.
Random test 20 of 30 (Gaussian density):
Number of sample points: 648
Dimension: 17
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 5.56664
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 225
Dimension: 226
Method: single 
    Metric: binary 
Passed.
Random test 21 of 30 (Gaussian density):
Number of sample points: 354
Dimension: 14
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 7.0146
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 353
Dimension: 691
Method: single 
    Metric: binary 
Passed.
Random test 22 of 30 (Gaussian density):
Number of sample points: 707
Dimension: 20
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 2.1132
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 190
Dimension: 217
Method: single 
    Metric: binary 
Passed.
Random test 23 of 30 (Gaussian density):
Number of sample points: 812
Dimension: 18
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 1.0137
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 21
Dimension: 33
Method: single 
    Metric: binary 
Passed.
Random test 24 of 30 (Gaussian density):
Number of sample points: 885
Dimension: 5
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 2.73422
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 281
Dimension: 549
Method: single 
    Metric: binary 
Passed.
Random test 25 of 30 (Gaussian density):
Number of sample points: 100
Dimension: 18
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 9.89781
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 370
Dimension: 534
Method: single 
    Metric: binary 
Passed.
Random test 26 of 30 (Gaussian density):
Number of sample points: 472
Dimension: 11
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 8.37118
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 40
Dimension: 46
Method: single 
    Metric: binary 
Passed.
Random test 27 of 30 (Gaussian density):
Number of sample points: 793
Dimension: 9
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 9.82664
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 354
Dimension: 582
Method: single 
    Metric: binary 
Passed.
Random test 28 of 30 (Gaussian density):
Number of sample points: 806
Dimension: 4
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 2.23315
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 184
Dimension: 204
Method: single 
    Metric: binary 
Passed.
Random test 29 of 30 (Gaussian density):
Number of sample points: 445
Dimension: 7
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 5.13641
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 25
Dimension: 31
Method: single 
    Metric: binary 
Passed.
Random test 30 of 30 (Gaussian density):
Number of sample points: 436
Dimension: 10
Method: single 
    Metric: euclidean 
    Metric: maximum 
    Metric: manhattan 
    Metric: canberra 
    Metric: minkowski 
    p: 3.19936
Method: ward 
Method: centroid 
Method: median 
Passed.
Uniform sampling for the 'binary' metric:
Number of sample points: 201
Dimension: 258
Method: single 
    Metric: binary 
Passed.
> 
> cat('Done.\n')
Done.
> 
> proc.time()
   user  system elapsed 
  19.42    0.14   19.56