R Under development (unstable) (2024-04-27 r86487 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(Thresher) Loading required package: ClassDiscovery Loading required package: cluster Loading required package: oompaBase Loading required package: PCDimension > > # randomly generate data (reproducibly) with labels > suppressWarnings( RNGversion("3.5.0") ) > set.seed(782345) > N <- 25 > fix <- sample(LETTERS[1:3], N, replace=TRUE) > vary <- sample(LETTERS[4:6], N, replace=TRUE) > > # make nuemeric versions of the same data > X <- as.numeric(factor(fix)) > names(X) <- fix > table(X, names(X)) X A B C 1 8 0 0 2 0 7 0 3 0 0 10 > Y <- as.numeric(factor(vary)) > names(Y) <- vary > table(Y, names(Y)) Y D E F 1 10 0 0 2 0 8 0 3 0 0 7 > > # check label matching (alphabetically) > chTable <- table(fix, vary) > chTable vary fix D E F A 1 4 3 B 1 3 3 C 8 1 1 > matchLabels(chTable) vary fix D E F C 8 1 1 A 1 4 3 B 1 3 3 > # final matching should be A <-> E (1 <- 2), B <-> F (2 <- 3), C <-> D (3 <- 1) > > # check numerics > tab <- table(X, Y) > tab Y X 1 2 3 1 1 4 3 2 1 3 3 3 8 1 1 > matchLabels(tab) Y X 1 2 3 3 8 1 1 1 1 4 3 2 1 3 3 > > # What we really want to is change the arbitrary labels > # in the "vary" assignments > # First, test the numeric version > S <- remap(X, Y) > table(Y, S) # map from Y to S is 1->3, 2->1, 3->2, as it shoul be S Y 1 2 3 1 0 0 10 2 8 0 0 3 0 7 0 > table(X, S) S X 1 2 3 1 4 3 1 2 3 3 1 3 1 1 8 > > # Next the character/factor version > R <- remap(fix, vary) > table(vary, R) # maps D->F, E->D, F->E R vary D E F D 0 0 10 E 8 0 0 F 0 7 0 > table(fix, R) # better diagnal than the original, shown next R fix D E F A 4 3 1 B 3 3 1 C 1 1 8 > table(fix, vary) vary fix D E F A 1 4 3 B 1 3 3 C 8 1 1 > > proc.time() user system elapsed 0.73 0.14 0.87