R Under development (unstable) (2025-07-11 r88405 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2025 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> #devtools::install_github("psolymos/opticut")
> #spelling::spell_check_package("~/repos/opticut")
> library(opticut)
Loading required package: pbapply
opticut 0.1-4 	 2025-07-12
> 
> ## --- run examples with \dontrun sections ---
> 
> help_pages <- c("opticut-package",
+     "dolina", "birdrec",
+     "opticut", "optilevels", "multicut",
+     "sindex", "lorenz",
+     "beta2i",
+     "allComb", "rankComb",
+     #"bestmodel",
+     "uncertainty",
+     "occolors", "ocoptions")
> 
> for (i in help_pages) {
+     cat("\n\n---------- opticut example:", i, "----------\n\n")
+     eval(parse(text=paste0("example(", i,
+         ", package = 'opticut', run.dontrun = TRUE)")))
+ }


---------- opticut example: opticut-package ----------



---------- opticut example: dolina ----------


dolina> data(dolina)

dolina> str(dolina)
List of 3
 $ xtab: num [1:512, 1:42] 0 0 0 0 0 0 0 0 0 0 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:512] "10A1Q" "10A1T" "10A2Q" "10A2T" ...
  .. ..$ : chr [1:42] "aacu" "amin" "apol" "apur" ...
 $ samp:'data.frame':	512 obs. of  9 variables:
  ..$ sample  : Factor w/ 256 levels "10A1","10A2",..: 1 1 2 2 3 3 4 4 5 5 ...
  ..$ dolina  : int [1:512] 10 10 10 10 10 10 10 10 10 10 ...
  ..$ microhab: Factor w/ 4 levels "dead.wood","litter",..: 2 2 2 2 2 2 2 2 2 2 ...
  ..$ mhab    : Factor w/ 4 levels "LI","DW","TL",..: 1 1 1 1 1 1 1 1 1 1 ...
  ..$ method  : Factor w/ 2 levels "Q","T": 1 2 1 2 1 2 1 2 1 2 ...
  ..$ aspect  : Factor w/ 5 levels "eastern","flat",..: 4 4 4 4 4 4 2 2 3 3 ...
  ..$ stratum : Factor w/ 4 levels "1bottom","2middle",..: 4 4 3 3 2 2 1 1 2 2 ...
  ..$ lmoist  : num [1:512] 1 1 1 1 1 1 1.5 1.5 1 1 ...
  ..$ lthick  : num [1:512] 2 2 2.5 2.5 3 3 0.5 0.5 1.5 1.5 ...
 $ taxa:'data.frame':	42 obs. of  2 variables:
  ..$ scientific.name: Factor w/ 42 levels "Acanthinula aculeata",..: 1 2 32 3 4 5 10 8 13 11 ...
  ..$ family         : Factor w/ 17 levels "Aciculidae","Carychiidae",..: 16 10 1 10 3 3 3 3 17 3 ...

dolina> ## species richness by microhabitat and method
dolina> Richness <- rowSums(dolina$xtab > 0)

dolina> boxplot(Richness ~ mhab + method, dolina$samp,
dolina+     ylab="Species richness", main="Dolina data set",
dolina+     col=rep(c("#2C7BB6", "#D7191C"), each=4))


---------- opticut example: birdrec ----------


birdrc> data(birdrec)

birdrc> str(birdrec)
List of 3
 $ xtab: int [1:3967, 1:156] 0 0 0 0 0 0 0 0 0 0 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3967] "T_IG_ABMI_388_2015_1_STATION_NW_89:0:0" "T_IG_ABMI_599_2015_1_STATION_NE_89:0:0" "T_IG_ABMI_855_2015_1_STATION_NW_89:0:0" "T_IG_ABMI_855_2015_1_STATION_SE_89:0:0" ...
  .. ..$ : chr [1:156] "AlderFlycatcher" "AmericanBittern" "AmericanCoot" "AmericanCrow" ...
 $ samp:'data.frame':	3967 obs. of  16 variables:
  ..$ PKEY      : Factor w/ 3967 levels "T_IG_ABMI_388_2015_1_STATION_NW_89:0:0",..: 1 2 3 4 5 6 7 8 9 10 ...
  ..$ POINT     : Factor w/ 367 levels "T_IG_ABMI_1_2015_1_STATION_NE",..: 120 197 258 259 81 142 200 230 30 57 ...
  ..$ SITE      : Factor w/ 124 levels "1","1022","1023",..: 31 51 67 67 21 37 51 59 8 15 ...
  ..$ YEAR      : int [1:3967] 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ...
  ..$ MONTH     : int [1:3967] 3 3 3 3 4 4 4 4 4 4 ...
  ..$ MDAY      : int [1:3967] 31 31 31 31 1 1 1 1 2 2 ...
  ..$ HOUR      : int [1:3967] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ MINUTE    : int [1:3967] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ YDAY      : int [1:3967] 89 89 89 89 90 90 90 90 91 91 ...
  ..$ RAIN      : int [1:3967] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ WIND      : int [1:3967] 1 0 0 0 2 1 1 2 0 1 ...
  ..$ INDUSTRY  : int [1:3967] 2 1 0 0 0 2 1 2 1 0 ...
  ..$ NOISE     : int [1:3967] 2 0 0 0 0 0 0 0 0 0 ...
  ..$ MICROPHONE: int [1:3967] 0 0 0 0 0 7 0 0 0 0 ...
  ..$ TOY       : Factor w/ 8 levels "Early1","Early2",..: 1 1 1 1 1 1 1 1 1 1 ...
  ..$ TOD       : Factor w/ 2 levels "Morning","Midnight": 2 2 2 2 2 2 2 2 2 2 ...
 $ taxa:'data.frame':	156 obs. of  7 variables:
  ..$ Species           : Factor w/ 156 levels "AlderFlycatcher",..: 1 2 3 4 5 6 7 8 145 9 ...
  ..$ CommonName        : Factor w/ 156 levels "Alder Flycatcher",..: 1 2 3 4 5 6 7 8 145 9 ...
  ..$ ScientificName    : Factor w/ 156 levels "Acanthis flammea",..: 47 20 54 42 116 53 109 134 153 117 ...
  ..$ Family            : Factor w/ 43 levels " Vespertilionidae",..: 40 4 31 14 17 16 27 39 29 15 ...
  ..$ Order             : Factor w/ 18 levels "Anseriformes",..: 12 13 11 12 12 8 12 12 14 12 ...
  ..$ Class             : Factor w/ 3 levels "Amphibia","Aves",..: 2 2 2 2 2 2 2 2 2 2 ...
  ..$ MigratoryBehaviour: Factor w/ 3 levels "Neotropical migrant",..: 1 2 2 2 2 2 1 2 3 2 ...

birdrc> aggregate(rowSums(birdrec$xtab),
birdrc+     list(TOY=birdrec$samp$TOY, TOD=birdrec$samp$TOD), mean)
      TOY      TOD         x
1  Early1  Morning 0.7073171
2  Early2  Morning 1.7207207
3  Early3  Morning 4.7593052
4    Mid4  Morning 6.2938005
5    Mid5  Morning 6.8922652
6    Mid6  Morning 5.8924419
7    Mid7  Morning 6.0426136
8   Late8  Morning 4.0459364
9  Early1 Midnight 0.3896104
10 Early2 Midnight 0.8431373
11 Early3 Midnight 1.2451613
12   Mid4 Midnight 1.0800000
13   Mid5 Midnight 1.3566879
14   Mid6 Midnight 1.1585366
15   Mid7 Midnight 1.0639535
16  Late8 Midnight 0.5560748

birdrc> boxplot(rowSums(birdrec$xtab) ~ TOD + TOY, birdrec$samp,
birdrc+     col=c("gold", "tomato"), ylab="# detections")

birdrc> y <- ifelse(birdrec$xtab > 0, 1, 0)

birdrc> g <- paste0(gsub("[[:digit:]]", "", as.character(birdrec$samp$TOY)),
birdrc+     substr(as.character(birdrec$samp$TOD), 1, 4))

birdrc> g <- factor(g, levels=c("EarlyMorn", "MidMorn", "LateMorn",
birdrc+     "EarlyMidn", "MidMidn", "LateMidn"))

birdrc> ## binary response model
birdrc> oc <- opticut(y ~ 1, strata=g, dist="binomial")

birdrc> ## multi-level response model
birdrc> mc <- multicut(y ~ 1, strata=g, dist="binomial")

birdrc> ## testing equality of labels
birdrc> splito <- as.character(summary(oc)$summary$split)

birdrc> splitm <- as.character(summary(mc)$summary$split)

birdrc> table(splito == splitm)

FALSE  TRUE 
   38   118 

birdrc> ## seeing how much those differ
birdrc> bpo <- summary(oc)$bestpart

birdrc> bpm <- summary(mc)$bestpart

birdrc> rs <- rowSums(abs(bpo-bpm))

birdrc> table(rs)
rs
  0   1   2 
118  35   3 

birdrc> 10 * bpo[rs > 0,] + bpm[rs > 0,]
                        EarlyMorn MidMorn LateMorn EarlyMidn MidMidn LateMidn
AlderFlycatcher                 0      11       10         0       0        0
BlackbackedWoodpecker          10      11        0         0       0        0
BlackbilledMagpie              11      11       10         0       0        0
BlackcappedChickadee           11      10       11         0       0        0
BlackAndWhiteWarbler           10      11        0         0       0        0
BluewingedTeal                 10      11       11         0      11        0
BlueJay                        11      11       11         0       0       10
BonapartesGull                 10      11       11         0       0        0
BrewersBlackbird               11      10        0         0       0        0
BrownCreeper                   11      10        0        10       0        0
CommonRaven                    11      11       10         0       0        0
DarkeyedJunco                  10      11       11         0       0        0
ForstersTern                   10      11        0         0       0        0
FoxSparrow                     10      11        0         0       0        0
Gadwall                        11      11        0         0      10        0
GreatGrayOwl                    1       0        0        11       0        1
GreaterYellowlegs              10      11        0         0       0        0
GreenwingedTeal                11      10        0        11      11        0
Killdeer                       10      11        0         0      10        0
LeContesSparrow                 0      11       10         0      11       11
LesserYellowlegs               11      11       10         0       0        0
LincolnsSparrow                10      11       11         0       0        0
MagnoliaWarbler                 0      11       10         0       0        0
MourningDove                   11      11       10         0       0        0
NorthernFlicker                11      11       10         0       0        0
NorthernWaterthrush             0      11       10         0       0        0
OrangecrownedWarbler           10      11       11         0       0        0
RedbreastedNuthatch            10      11       11         0       0        0
RedwingedBlackbird             11      11       10         0       0        0
SandhillCrane                  11      11       10         0       0        0
SwampSparrow                   10      11       11         0       0        0
TreeSwallow                    10      11       11         0       0        0
WarblingVireo                  10      11       11         0       0        0
WesternKingbird                11      10        0         0      11        0
WesternMeadowlark              11      11       10         0       0        0
WinterWren                     10      11       11         0       0        0
YellowbelliedFlycatcher         0      11       11         0       0       10
YellowrumpedWarbler            10      11       11         0       0        0


---------- opticut example: opticut ----------


optict> ## --- Gaussian
optict> ## simple example from Legendre 2013
optict> ## Indicator Species: Computation, in
optict> ## Encyclopedia of Biodiversity, Volume 4
optict> ## https://dx.doi.org/10.1016/B978-0-12-384719-5.00430-5
optict> gr <- as.factor(paste0("X", rep(1:5, each=5)))

optict> spp <- cbind(Species1=rep(c(4,6,5,3,2), each=5),
optict+     Species2=c(rep(c(8,4,6), each=5), 4,4,2, rep(0,7)),
optict+     Species3=rep(c(18,2,0,0,0), each=5))

optict> rownames(spp) <- gr

optict> ## must add some noise to avoid perfect fit
optict> spp[6, "Species1"] <- 7

optict> spp[1, "Species3"] <- 17

optict> spp
   Species1 Species2 Species3
X1        4        8       17
X1        4        8       18
X1        4        8       18
X1        4        8       18
X1        4        8       18
X2        7        4        2
X2        6        4        2
X2        6        4        2
X2        6        4        2
X2        6        4        2
X3        5        6        0
X3        5        6        0
X3        5        6        0
X3        5        6        0
X3        5        6        0
X4        3        4        0
X4        3        4        0
X4        3        2        0
X4        3        0        0
X4        3        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0

optict> ## all partitions
optict> summary(ocall <- opticut(spp ~ 1, strata=gr, dist="gaussian", comb="all"))
Multivariate opticut results, comb = all, dist = gaussian

Call:
opticut.formula(formula = spp ~ 1, strata = gr, dist = "gaussian", 
    comb = "all")

Best supported models with logLR >= 2:
         split assoc      I mu0  mu1 logLR      w
Species2 X1+X3   +++ 0.9866 2.0  7.0 14.82 0.4995
Species1 X2+X3   +++ 0.8617 3.0  5.6 16.74 0.7035
Species3    X1   +++ 1.0000 0.5 17.8 54.26 1.0000
15 binary splits


optict> summary(opticut(spp, strata=gr, dist="gaussian", comb="all")) # alternative
Multivariate opticut results, comb = all, dist = gaussian

Call:
opticut.default(Y = spp, strata = gr, dist = "gaussian", comb = "all")

Best supported models with logLR >= 2:
         split assoc      I mu0  mu1 logLR      w
Species2 X1+X3   +++ 0.9866 2.0  7.0 14.82 0.4995
Species1 X2+X3   +++ 0.8617 3.0  5.6 16.74 0.7035
Species3    X1   +++ 1.0000 0.5 17.8 54.26 1.0000
15 binary splits


optict> ## rank based partitions
optict> summary(ocrank <- opticut(spp ~ 1, strata=gr, dist="gaussian", comb="rank"))
Multivariate opticut results, comb = rank, dist = gaussian

Call:
opticut.formula(formula = spp ~ 1, strata = gr, dist = "gaussian", 
    comb = "rank")

Best supported models with logLR >= 2:
         split assoc      I mu0  mu1 logLR      w
Species2 X1+X3   +++ 0.9866 2.0  7.0 14.82 0.4996
Species1 X2+X3   +++ 0.8617 3.0  5.6 16.74 0.7036
Species3    X1   +++ 1.0000 0.5 17.8 54.26 1.0000
4 binary splits


optict> summary(opticut(spp, strata=gr, dist="gaussian", comb="rank")) # alternative
Multivariate opticut results, comb = rank, dist = gaussian

Call:
opticut.default(Y = spp, strata = gr, dist = "gaussian", comb = "rank")

Best supported models with logLR >= 2:
         split assoc      I mu0  mu1 logLR      w
Species2 X1+X3   +++ 0.9866 2.0  7.0 14.82 0.4996
Species1 X2+X3   +++ 0.8617 3.0  5.6 16.74 0.7036
Species3    X1   +++ 1.0000 0.5 17.8 54.26 1.0000
4 binary splits


optict> ## --- Binomial
optict> ## simulated binary data
optict> set.seed(1234)

optict> n <- 200

optict> x0 <- sample(1:4, n, TRUE)

optict> x1 <- ifelse(x0 <= 2, 1, 0)

optict> x2 <- rnorm(n, 0.5, 1)

optict> p1 <- plogis(-0.5 + 2*x1 + -0.8*x2)

optict> Y1 <- rbinom(n, 1, p1)

optict> p2 <- plogis(-0.1 + 2*ifelse(x0==4,1,0) + -0.8*x2)

optict> Y2 <- rbinom(n, 1, p2)

optict> p3 <- plogis(-0.1 + -0.8*x2)

optict> Y3 <- rbinom(n, 1, p3)

optict> Y <- cbind(SPP1=Y1, SPP2=Y2, SPP3=Y3)

optict> X <- model.matrix(~x2)

optict> ## all partitions, single species
optict> Z <- allComb(x0)

optict> opticut1(Y1, X, Z, dist="binomial")
Univariate opticut results, comb = all, dist = binomial
I = 0.7139; w = 0.9989; H = 0.9978; logL_null = -132.3

Best supported models with logLR >= 2:
    assoc      I    mu0    mu1  logLR         w
1+2   +++ 0.7139 0.3389 0.7544 16.807 9.989e-01
4     --- 0.6557 0.6465 0.2755  9.764 8.724e-04
2     +++ 0.5952 0.4594 0.7700  8.336 2.091e-04
1      ++ 0.4260 0.4986 0.7118  3.165 1.188e-06
3      -- 0.3744 0.5914 0.3972  2.500 6.111e-07
7 binary splits (2 models not shown)


optict> ## rank based partitions, single species
optict> opticut1(Y1, X, as.factor(x0), dist="binomial")
Univariate opticut results, comb = rank, dist = binomial
I = 0.7139; w = 0.9989; H = 0.9978; logL_null = -132.3

Best supported models with logLR >= 2:
      assoc      I    mu0    mu1  logLR         w
1+2     +++ 0.7139 0.3389 0.7544 16.807 0.9989185
1+2+3   +++ 0.6557 0.2755 0.6465  9.764 0.0008724
2       +++ 0.5952 0.4594 0.7700  8.336 0.0002091
3 binary splits 


optict> ## all partitions, multiple species
optict> (m1 <- opticut(Y ~ x2, strata=x0, dist="poisson", comb="all"))
Multivariate opticut results, comb = all, dist = poisson

Call:
opticut.formula(formula = Y ~ x2, strata = x0, dist = "poisson", 
    comb = "all")

3 species, 7 binary splits


optict> summary(m1)
Multivariate opticut results, comb = all, dist = poisson

Call:
opticut.formula(formula = Y ~ x2, strata = x0, dist = "poisson", 
    comb = "all")

Best supported models with logLR >= 2:
     split assoc      I    mu0    mu1 logLR      w
SPP1   1+2   +++ 0.4137 0.3118 0.7518 8.340 0.9438
SPP2     4    ++ 0.3370 0.4550 0.9175 5.664 0.8333
7 binary splits
1 species not shown


optict> ## show all species
optict> summary(m1, cut=0)
Multivariate opticut results, comb = all, dist = poisson

Call:
opticut.formula(formula = Y ~ x2, strata = x0, dist = "poisson", 
    comb = "all")

Best supported models with logLR >= 2:
     split assoc      I    mu0    mu1 logLR      w
SPP1   1+2   +++ 0.4137 0.3118 0.7518 8.340 0.9438
SPP2     4    ++ 0.3370 0.4550 0.9175 5.664 0.8333
7 binary splits
1 species not shown


optict> ## plot best partitions and indicator values
optict> plot(m1)

optict> ## model weights for all species
optict> wplot(m1)

optict> ## different ways of plotting weights for single species
optict> wplot(m1$species[[1]])

optict> wplot(m1, which = 1)

optict> ## rank based partitions, multiple species
optict> summary(m2 <- opticut(Y ~ x2, strata=x0, dist="poisson", comb="rank"))
Multivariate opticut results, comb = rank, dist = poisson

Call:
opticut.formula(formula = Y ~ x2, strata = x0, dist = "poisson", 
    comb = "rank")

Best supported models with logLR >= 2:
     split assoc      I    mu0    mu1 logLR      w
SPP1   1+2   +++ 0.4137 0.3118 0.7518 8.340 0.9462
SPP2     4    ++ 0.3370 0.4550 0.9175 5.664 0.8687
3 binary splits
1 species not shown


optict> ## subset results
optict> summary(subset(m2, 1:2))
Multivariate opticut results, comb = rank, dist = poisson

Call:
opticut.formula(formula = Y ~ x2, strata = x0, dist = "poisson", 
    comb = "rank")

Best supported models with logLR >= 2:
     split assoc      I    mu0    mu1 logLR      w
SPP1   1+2   +++ 0.4137 0.3118 0.7518 8.340 0.9462
SPP2     4    ++ 0.3370 0.4550 0.9175 5.664 0.8687
3 binary splits


optict> ## best partition
optict> head(bestpart(m2))
  SPP1 SPP2 SPP3
4    0    1    1
4    0    1    1
2    1    0    0
2    1    0    0
1    1    0    1
4    0    1    1

optict> ## best model
optict> mods <- bestmodel(m2)

optict> mods
$SPP1

Call:  stats::glm(formula = Y ~ . - 1, family = Family, data = XX)

Coefficients:
`(Intercept)`             Z1             x2  
      -1.1653         0.8801        -0.2521  

Degrees of Freedom: 200 Total (i.e. Null);  197 Residual
Null Deviance:	    212 
Residual Deviance: 119.3 	AIC: 313.3

$SPP2

Call:  stats::glm(formula = Y ~ . - 1, family = Family, data = XX)

Coefficients:
`(Intercept)`             Z1             x2  
      -0.7875         0.7014        -0.3343  

Degrees of Freedom: 200 Total (i.e. Null);  197 Residual
Null Deviance:	    200 
Residual Deviance: 116.4 	AIC: 322.4

$SPP3

Call:  stats::glm(formula = Y ~ . - 1, family = Family, data = XX)

Coefficients:
`(Intercept)`             Z1             x2  
      -1.1635         0.3235        -0.5092  

Degrees of Freedom: 200 Total (i.e. Null);  197 Residual
Null Deviance:	    266 
Residual Deviance: 126.8 	AIC: 266.8


optict> ## explore further
optict> confint(mods[[1]])
Waiting for profiling to be done...
                   2.5 %      97.5 %
`(Intercept)` -1.5658931 -0.81050022
Z1             0.4493567  1.33725540
x2            -0.4472278 -0.05197309

optict> ## MLE and variance-covariance matrix (species 1)
optict> getMLE(m2, which=1, vcov=TRUE)
$coef
`(Intercept)`            Z1            x2 
   -1.1652948     0.8800652    -0.2520902 

$vcov
              `(Intercept)`            Z1            x2
`(Intercept)`   0.036867825 -0.0356672658 -0.0034330887
Z1             -0.035667266  0.0508649662 -0.0001321465
x2             -0.003433089 -0.0001321465  0.0101950604

$dist
[1] "poisson"


optict> ## fitted values
optict> head(fitted(m2))
       SPP1      SPP2      SPP3
1 0.2476256 0.6758558 0.2709819
2 0.3098497 0.9098139 0.4261917
3 0.6518694 0.3765488 0.2341692
4 0.7523114 0.4553558 0.3127900
5 0.8162386 0.5073633 0.5096759
6 0.2635700 0.7341598 0.3073866

optict> ## prediction for new data
optict> head(predict(m2, gnew=x0, xnew=data.frame(x2=x2)))
       SPP1      SPP2      SPP3
1 0.2476256 0.6758558 0.2709819
2 0.3098497 0.9098139 0.4261917
3 0.6518694 0.3765488 0.2341692
4 0.7523114 0.4553558 0.3127900
5 0.8162386 0.5073633 0.5096759
6 0.2635700 0.7341598 0.3073866

optict> ## --- Zero-inflated Negative Binomial
optict> ## dolina example
optict> data(dolina)

optict> ## stratum as ordinal
optict> dolina$samp$stratum <- as.integer(dolina$samp$stratum)

optict> ## filter species to speed up things a bit
optict> Y <- dolina$xtab[,colSums(dolina$xtab > 0) >= 20]

optict> ## opticut results, note the cloglog link function
optict> dol <- opticut(Y ~ stratum + lmoist + method, data=dolina$samp,
optict+     strata=dolina$samp$mhab, dist="zinb:cloglog")

optict> summary(dol)
Multivariate opticut results, comb = rank, dist = zinb:cloglog

Call:
opticut.formula(formula = Y ~ stratum + lmoist + method, data = dolina$samp, 
    strata = dolina$samp$mhab, dist = "zinb:cloglog")

Best supported models with logLR >= 2:
        split assoc      I       mu0      mu1  logLR      w
cort DW+TL+RO   +++ 1.0000 8.421e-10  0.20209 19.344 0.6425
hobv    DW+RO   +++ 0.9037 3.445e-02  0.68142 39.943 1.0000
bbip    DW+RO   +++ 0.7063 2.968e-01  1.72407 31.991 0.9995
vidi    DW+RO   +++ 0.4557 9.997e-01  2.67379  9.851 0.9955
clam    DW+RO    ++ 0.4209 1.414e-01  0.34702  3.556 0.4724
dper    DW+RO   +++ 0.4033 8.053e+00 18.93801 12.585 0.9897
ppyg    LI+RO    ++ 0.3438 4.854e+00  9.94144  5.172 0.9156
amin    LI+RO    ++ 0.2707 2.971e+00  5.17609  7.875 0.9581
mbor       DW   +++ 0.9350 1.666e-03  0.04959 19.852 0.9971
iiso       RO   +++ 0.8448 7.930e-02  0.94235 16.216 0.9994
ffau       RO   +++ 0.7776 7.499e-02  0.59934 25.852 1.0000
ctri       RO    ++ 0.6662 3.477e-01  1.73560  6.629 0.9111
estr       RO   +++ 0.6611 2.392e-01  1.17226 15.321 0.9952
pinc       RO   +++ 0.6255 3.555e-01  1.54316 14.571 0.5043
tuni       RO   +++ 0.5913 1.738e+00  6.76551 14.886 0.9997
pvic       RO    ++ 0.4202 4.646e-01  1.13801  6.200 0.5948
apur       RO    ++ 0.3766 5.196e+00 11.47426  4.975 0.6152
bcan       TL   +++ 0.7371 1.451e-01  0.95901 10.756 0.9647
3 binary splits
1 species not shown


optict> ## vertical plot orientation
optict> plot(dol, horizontal=FALSE, pos=1, upper=0.8)

optict> ## parallel computing comparisons
optict> library(parallel)

optict> cl <- makeCluster(2)

optict> ## sequential, all combinations (2^(K-1) - 1)
optict> system.time(opticut(Y ~ stratum + lmoist + method, data=dolina$samp,
optict+     strata=dolina$samp$mhab, dist="zinb", comb="all", cl=NULL))
   user  system elapsed 
  23.10    1.42   24.51 

optict> ## sequential, rank based combinations (K - 1)
optict> system.time(opticut(Y ~ stratum + lmoist + method, data=dolina$samp,
optict+     strata=dolina$samp$mhab, dist="zinb", comb="rank", cl=NULL))
   user  system elapsed 
  14.78    1.00   15.78 

optict> ## parallel, all combinations (2^(K-1) - 1)
optict> system.time(opticut(Y ~ stratum + lmoist + method, data=dolina$samp,
optict+     strata=dolina$samp$mhab, dist="zinb", comb="all", cl=cl))
   user  system elapsed 
   0.03    0.00   14.25 

optict> ## parallel, rank based combinations (K - 1)
optict> system.time(opticut(Y ~ stratum + lmoist + method, data=dolina$samp,
optict+     strata=dolina$samp$mhab, dist="zinb", comb="rank", cl=cl))
   user  system elapsed 
   0.00    0.00    7.98 

optict> stopCluster(cl)

optict> ## --- Customizing distributions
optict> ## we may want to expand the Zero-inflation component in a ZIP model
optict> ## see how the return value needs to be structured
optict> fun <- function(Y, X, linkinv, zi_term, ...) {
optict+     X <- as.matrix(X)
optict+     mod <- pscl::zeroinfl(Y ~ X-1 | zi_term, dist = "poisson", ...)
optict+     list(coef=coef(mod),
optict+         logLik=logLik(mod),
optict+         linkinv=mod$linkinv)
optict+ }

optict> Xdol <- model.matrix(~ stratum + lmoist + method, data=dolina$samp)

optict> ## this fits the null model (i.e. no partitions added)
optict> fun(Y[,"amin"], Xdol, zi_term=dolina$samp$method)
$coef
count_X(Intercept)     count_Xstratum      count_Xlmoist     count_XmethodT 
        1.30243481        -0.14266929         0.02460366        -0.64335270 
  zero_(Intercept)      zero_zi_termT 
       -0.56590398         0.61664601 

$logLik
'log Lik.' -788.9897 (df=6)

$linkinv
function (eta) 
.Call(C_logit_linkinv, eta)
<environment: namespace:stats>


optict> ## now we can use dist=fun
optict> opticut1(Y[,"amin"], Xdol, Z=dolina$samp$mhab,
optict+     dist=fun, zi_term=dolina$samp$method)
Univariate opticut results, comb = rank, dist = fun
I = 0.2652; w = 0.9849; H = 0.9703; logL_null = -789

Best supported models with logLR >= 2:
         assoc      I    mu0    mu1  logLR         w
LI+RO      +++ 0.2652 0.7754 0.8560 15.370 9.849e-01
RO         +++ 0.2358 0.7941 0.8618 11.189 1.505e-02
LI+DW+RO    ++ 0.2036 0.7409 0.8121  5.328 4.286e-05
3 binary splits 


optict> dol2 <- opticut(Y ~ stratum + lmoist + method, data=dolina$samp,
optict+     strata=dolina$samp$mhab, dist=fun, zi_term=dolina$samp$method)

optict> summary(dol2)
Multivariate opticut results, comb = rank, dist = fun

Call:
opticut.formula(formula = Y ~ stratum + lmoist + method, data = dolina$samp, 
    strata = dolina$samp$mhab, dist = fun, zi_term = dolina$samp$method)

Best supported models with logLR >= 2:
        split assoc      I       mu0    mu1  logLR      w
cort DW+TL+RO   +++ 1.0000 7.247e-10 0.1302 19.933 0.6502
hobv    DW+RO   +++ 0.9117 9.715e-02 0.6996 49.368 1.0000
bbip    DW+RO   +++ 0.7037 3.155e-01 0.7260 45.369 1.0000
vidi    DW+RO   +++ 0.4519 6.570e-01 0.8353 19.411 1.0000
ctri    LI+RO   +++ 0.3951 5.638e-01 0.7488 27.661 1.0000
amin    LI+RO   +++ 0.2652 7.754e-01 0.8560 15.370 0.9849
ppyg    LI+RO   +++ 0.2518 8.781e-01 0.9234 41.925 1.0000
aacu    TL+RO    ++ 0.3969 5.375e-01 0.7291  6.650 0.9738
mbor       DW   +++ 0.9423 4.190e-01 0.9605 23.691 0.9803
clam       DW    ++ 0.4736 3.755e-01 0.6273  5.582 0.7947
iiso       RO   +++ 0.8562 1.757e-01 0.7334 17.176 0.9998
ffau       RO   +++ 0.7118 1.149e-01 0.4355 20.143 0.9475
pinc       RO   +++ 0.6322 2.465e-01 0.5921 14.673 0.5292
estr       RO   +++ 0.6316 2.133e-01 0.5457 14.059 0.9903
tuni       RO   +++ 0.4617 8.441e-01 0.9363 13.350 0.9996
apur       RO   +++ 0.4326 7.859e-01 0.9026 37.612 0.9844
pvic       RO    ++ 0.4205 5.499e-01 0.7497  7.165 0.4985
dper       RO   +++ 0.3616 9.152e-01 0.9584 37.337 0.5118
bcan       TL   +++ 0.7402 3.854e-01 0.8077 11.543 0.9844
3 binary splits


optict> ## current collapse value
optict> getOption("ocoptions")$collapse
[1] "+"

optict> ## factor levels sometimes need to be manipulated
optict> ## before feeding it to opticut
optict> fix_levels(as.factor(c("A b", "C d")), sep=":")
[1] A b C d
Levels: A b C d

optict> fix_levels(as.factor(c("A b", "C d")), sep="")
[1] A b C d
Levels: A b C d


---------- opticut example: optilevels ----------


optlvl> ## --- Factor levels with Gaussian distribution
optlvl> ## simple example from Legendre 2013
optlvl> ## Indicator Species: Computation, in
optlvl> ## Encyclopedia of Biodiversity, Volume 4
optlvl> ## https://dx.doi.org/10.1016/B978-0-12-384719-5.00430-5
optlvl> gr <- as.factor(paste0("X", rep(1:5, each=5)))

optlvl> spp <- cbind(Species1=rep(c(4,6,5,3,2), each=5),
optlvl+     Species2=c(rep(c(8,4,6), each=5), 4,4,2, rep(0,7)),
optlvl+     Species3=rep(c(18,2,0,0,0), each=5))

optlvl> rownames(spp) <- gr

optlvl> ## must add some noise to avoid perfect fit
optlvl> spp[6, "Species1"] <- 7

optlvl> spp[1, "Species3"] <- 17

optlvl> spp
   Species1 Species2 Species3
X1        4        8       17
X1        4        8       18
X1        4        8       18
X1        4        8       18
X1        4        8       18
X2        7        4        2
X2        6        4        2
X2        6        4        2
X2        6        4        2
X2        6        4        2
X3        5        6        0
X3        5        6        0
X3        5        6        0
X3        5        6        0
X3        5        6        0
X4        3        4        0
X4        3        4        0
X4        3        2        0
X4        3        0        0
X4        3        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0

optlvl> ol <- optilevels(spp[,"Species3"], gr)

optlvl> ol[c("delta", "coef", "rank", "levels")]
$delta
[1]  0 -4 NA NA NA

$coef
       X1 X2 X3 X4 X5
[1,] 17.8  2  0  0  0
[2,] 17.8  2  0  0  0
[3,]   NA NA NA NA NA
[4,]   NA NA NA NA NA
[5,]   NA NA NA NA NA

$rank
     X1 X2 X3 X4 X5
[1,]  5  4  2  2  2
[2,]  3  2  1  1  1
[3,] NA NA NA NA NA
[4,] NA NA NA NA NA
[5,] NA NA NA NA NA

$levels
$levels[[1]]
        X1         X2         X3         X4         X5 
      "X1"       "X2" "X3+X4+X5" "X3+X4+X5" "X3+X4+X5" 

$levels[[2]]
        X1         X2         X3         X4         X5 
      "X1"       "X2" "X3+X4+X5" "X3+X4+X5" "X3+X4+X5" 



optlvl> ## get the final factor level
optlvl> gr1 <- gr

optlvl> levels(gr1) <- ol$level[[length(ol$level)]]

optlvl> table(gr, gr1)
    gr1
gr   X1 X2 X3+X4+X5
  X1  5  0        0
  X2  0  5        0
  X3  0  0        5
  X4  0  0        5
  X5  0  0        5

optlvl> ## compare the models
optlvl> o0 <- lm(spp[,"Species3"] ~ gr - 1)

optlvl> o1 <- lm(spp[,"Species3"] ~ gr1 - 1)

optlvl> data.frame(AIC(o0, o1), delta=AIC(o0, o1)$AIC - AIC(o0))
   df       AIC delta
o0  6 -3.103558     0
o1  4 -7.103558    -4

optlvl> ol$delta # should be identical
[1]  0 -4 NA NA NA

optlvl> ## --- Proportions with Poisson distribution
optlvl> ## simulation
optlvl> set.seed(123)

optlvl> n <- 500 # number of observations

optlvl> k <- 5 # number of habitat types

optlvl> b <- c(-1, -0.2, -0.2, 0.5, 1)

optlvl> names(b) <- LETTERS[1:k]

optlvl> x <- replicate(k, exp(rnorm(n)))

optlvl> x <- x / rowSums(x) # proportions

optlvl> X <- model.matrix(~.-1, data=data.frame(x))

optlvl> lam <- exp(drop(crossprod(t(X), b)))

optlvl> y <- rpois(n, lam)

optlvl> z <- optilevels(y, x, dist="poisson")

optlvl> ## best model refit
optlvl> bestmodel(z)

Call:  stats::glm(formula = Y ~ . - 1, family = Family, data = XX)

Coefficients:
     X1  `X2+X3`       X4       X5  
-0.8941  -0.3098   0.5044   1.0391  

Degrees of Freedom: 500 Total (i.e. Null);  496 Residual
Null Deviance:	    586.2 
Residual Deviance: 548.4 	AIC: 1297

optlvl> ## estimates
optlvl> plogis(z$coef)
            X1        X2        X3        X4       X5
[1,] 0.2880184 0.3761074 0.4665556 0.6289233 0.737896
[2,] 0.2902740 0.4231689 0.4231689 0.6234846 0.738678
[3,]        NA        NA        NA        NA       NA
[4,]        NA        NA        NA        NA       NA
[5,]        NA        NA        NA        NA       NA

optlvl> plogis(b)
        A         B         C         D         E 
0.2689414 0.4501660 0.4501660 0.6224593 0.7310586 

optlvl> ## optimal classification
optlvl> z$rank
     X1 X2 X3 X4 X5
[1,]  1  2  3  4  5
[2,]  1  2  2  3  4
[3,] NA NA NA NA NA
[4,] NA NA NA NA NA
[5,] NA NA NA NA NA

optlvl> ## get the final matrix
optlvl> x1 <- mefa4::groupSums(x, 2, z$levels[[length(z$levels)]])

optlvl> head(x)
          [,1]       [,2]       [,3]       [,4]       [,5]
[1,] 0.2258750 0.21671095 0.14615367 0.17407228 0.23718812
[2,] 0.2256225 0.10514521 0.10039234 0.20888497 0.35995493
[3,] 0.4995209 0.29345552 0.10323013 0.04264214 0.06115135
[4,] 0.1150706 0.22726090 0.09395935 0.20075919 0.36294998
[5,] 0.1998835 0.03883339 0.01372342 0.53850776 0.20905194
[6,] 0.3048449 0.04987853 0.15529267 0.46033369 0.02965023

optlvl> head(x1)
            X1      X2+X3         X4         X5
[1,] 0.2258750 0.36286462 0.17407228 0.23718812
[2,] 0.2256225 0.20553755 0.20888497 0.35995493
[3,] 0.4995209 0.39668566 0.04264214 0.06115135
[4,] 0.1150706 0.32122025 0.20075919 0.36294998
[5,] 0.1998835 0.05255681 0.53850776 0.20905194
[6,] 0.3048449 0.20517120 0.46033369 0.02965023

optlvl> ## compare the models
optlvl> m0 <- glm(y ~ x - 1, family="poisson")

optlvl> m1 <- glm(y ~ x1 - 1, family="poisson")

optlvl> data.frame(AIC(m0, m1), delta=AIC(m0, m1)$AIC - AIC(m0))
   df      AIC     delta
m0  5 1298.452  0.000000
m1  4 1297.381 -1.071501

optlvl> z$delta # should be identical
[1]  0.000000 -1.071501        NA        NA        NA

optlvl> ## dolina example with factor
optlvl> data(dolina)

optlvl> dolina$samp$stratum <- as.integer(dolina$samp$stratum)

optlvl> y <- dolina$xtab[dolina$samp$method == "Q", "ppyg"]

optlvl> x <- dolina$samp$mhab[dolina$samp$method == "Q"]

optlvl> z <- scale(model.matrix(~ stratum + lmoist - 1,
optlvl+     dolina$samp[dolina$samp$method == "Q",]))

optlvl> ## without additional covariates
optlvl> dol1 <- optilevels(y, x, z=NULL, dist="poisson")

optlvl> dol1$rank
     LI DW TL RO
[1,]  3  2  1  4
[2,]  2  1  1  3
[3,] NA NA NA NA
[4,] NA NA NA NA

optlvl> summary(bestmodel(dol1))

Call:
stats::glm(formula = Y ~ . - 1, family = Family, data = XX)

Coefficients:
        Estimate Std. Error z value Pr(>|z|)    
LI       2.16496    0.03201   67.64   <2e-16 ***
`DW+TL`  1.52470    0.04762   32.02   <2e-16 ***
RO       2.44596    0.04249   57.57   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 8332.5  on 256  degrees of freedom
Residual deviance: 3481.6  on 253  degrees of freedom
AIC: 4110

Number of Fisher Scoring iterations: 6


optlvl> ## with additional covariates
optlvl> dol2 <- optilevels(y, x, z, dist="poisson")

optlvl> dol2$rank
     LI DW TL RO
[1,]  4  1  2  3
[2,]  3  1  1  2
[3,]  2  1  1  2
[4,] NA NA NA NA

optlvl> summary(bestmodel(dol2))

Call:
stats::glm(formula = Y ~ . - 1, family = Family, data = XX)

Coefficients:
        Estimate Std. Error z value Pr(>|z|)    
`LI+RO`  2.24842    0.02698  83.324  < 2e-16 ***
`DW+TL`  1.46044    0.04997  29.227  < 2e-16 ***
stratum -0.08785    0.02514  -3.495 0.000475 ***
lmoist   0.20390    0.02031  10.038  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 8332.5  on 256  degrees of freedom
Residual deviance: 3377.2  on 252  degrees of freedom
AIC: 4007.6

Number of Fisher Scoring iterations: 6


optlvl> ## compare the two models
optlvl> AIC(bestmodel(dol1), bestmodel(dol2))
                df      AIC
bestmodel(dol1)  3 4110.004
bestmodel(dol2)  4 4007.575


---------- opticut example: multicut ----------


multct> ## --- Gaussian
multct> ## simple example from Legendre 2013
multct> ## Indicator Species: Computation, in
multct> ## Encyclopedia of Biodiversity, Volume 4
multct> ## https://dx.doi.org/10.1016/B978-0-12-384719-5.00430-5
multct> gr <- as.factor(paste0("X", rep(1:5, each=5)))

multct> spp <- cbind(Species1=rep(c(4,6,5,3,2), each=5),
multct+     Species2=c(rep(c(8,4,6), each=5), 4,4,2, rep(0,7)),
multct+     Species3=rep(c(18,2,0,0,0), each=5))

multct> rownames(spp) <- gr

multct> ## must add some noise to avoid perfect fit
multct> spp[6, "Species1"] <- 7

multct> spp[1, "Species3"] <- 17

multct> spp
   Species1 Species2 Species3
X1        4        8       17
X1        4        8       18
X1        4        8       18
X1        4        8       18
X1        4        8       18
X2        7        4        2
X2        6        4        2
X2        6        4        2
X2        6        4        2
X2        6        4        2
X3        5        6        0
X3        5        6        0
X3        5        6        0
X3        5        6        0
X3        5        6        0
X4        3        4        0
X4        3        4        0
X4        3        2        0
X4        3        0        0
X4        3        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0

multct> ## negative expected values are not good
multct> oco <- ocoptions(fix_fitted=TRUE)

multct> summary(ocall <- multicut(spp ~ 1, strata=gr, dist="gaussian"))
Multivariate multticut results, dist = gaussian

Call:
multicut.formula(formula = spp ~ 1, strata = gr, dist = "gaussian")

Species models with logLR >= 2:
         split assoc      I logLR
Species2 X1+X3   +++ 0.9993 32.53
Species1 X2+X3   +++ 0.9705 52.87
Species3    X1   +++ 1.0000 91.55


multct> summary(multicut(spp, strata=gr, dist="gaussian")) # alternative
Multivariate multticut results, dist = gaussian

Call:
multicut.default(Y = spp, strata = gr, dist = "gaussian")

Species models with logLR >= 2:
         split assoc      I logLR
Species2 X1+X3   +++ 0.9993 32.53
Species1 X2+X3   +++ 0.9705 52.87
Species3    X1   +++ 1.0000 91.55


multct> ocoptions(oco) # reset options

multct> ## --- Binomial
multct> ## simulated binary data
multct> set.seed(1234)

multct> n <- 200

multct> x0 <- sample(1:4, n, TRUE)

multct> x1 <- ifelse(x0 <= 2, 1, 0)

multct> x2 <- rnorm(n, 0.5, 1)

multct> p1 <- plogis(-0.5 + 2*x1 + -0.8*x2)

multct> Y1 <- rbinom(n, 1, p1)

multct> p2 <- plogis(-0.1 + 2*ifelse(x0==4,1,0) + -0.8*x2)

multct> Y2 <- rbinom(n, 1, p2)

multct> p3 <- plogis(-0.1 + -0.8*x2)

multct> Y3 <- rbinom(n, 1, p3)

multct> Y <- cbind(SPP1=Y1, SPP2=Y2, SPP3=Y3)

multct> X <- model.matrix(~x2)

multct> (m0 <- multicut1(Y1, X, as.factor(x0), dist="binomial"))
Univariate multicut results, dist = binomial
logLR = 17.77 (logL_null = -132.3)

Expected values:
     1      2      3      4 
0.7260 0.7771 0.4076 0.2789 


multct> lcplot(m0)

multct> summary(m1 <- multicut(Y ~ x2, strata=x0, dist="poisson"))
Multivariate multticut results, dist = poisson

Call:
multicut.formula(formula = Y ~ x2, strata = x0, dist = "poisson")

Species models with logLR >= 2:
     split assoc      I logLR
SPP1   1+2   +++ 0.5048 8.894
SPP2     4    ++ 0.3644 5.902
1 species not shown


multct> plot(m1)

multct> ## subset results
multct> summary(subset(m1, 1:2))
Multivariate multticut results, dist = poisson

Call:
multicut.formula(formula = Y ~ x2, strata = x0, dist = "poisson")

Species models with logLR >= 2:
     split assoc      I logLR
SPP1   1+2   +++ 0.5048 8.894
SPP2     4    ++ 0.3644 5.902


multct> ## best partition
multct> head(bestpart(m1))
  SPP1 SPP2 SPP3
4    0    1    1
4    0    1    1
2    1    0    0
2    1    0    0
1    1    0    1
4    0    1    1

multct> ## best model
multct> mods <- bestmodel(m1)

multct> mods
$SPP1

Call:  stats::glm(formula = Y ~ . - 1, family = Family, data = XX)

Coefficients:
`(Intercept)`             Z2             Z3             Z4             x2  
     -0.32787        0.07209       -0.65520       -1.03924       -0.25016  

Degrees of Freedom: 200 Total (i.e. Null);  195 Residual
Null Deviance:	    212 
Residual Deviance: 118.2 	AIC: 316.2

$SPP2

Call:  stats::glm(formula = Y ~ . - 1, family = Family, data = XX)

Coefficients:
`(Intercept)`             Z2             Z3             Z4             x2  
      -0.6518        -0.1977        -0.1922         0.5662        -0.3358  

Degrees of Freedom: 200 Total (i.e. Null);  195 Residual
Null Deviance:	    200 
Residual Deviance: 115.9 	AIC: 325.9

$SPP3

Call:  stats::glm(formula = Y ~ . - 1, family = Family, data = XX)

Coefficients:
`(Intercept)`             Z2             Z3             Z4             x2  
     -0.85213       -0.31133       -0.03570        0.06478       -0.51151  

Degrees of Freedom: 200 Total (i.e. Null);  195 Residual
Null Deviance:	    266 
Residual Deviance: 126.7 	AIC: 270.7


multct> ## explore further
multct> confint(mods[[1]])
Waiting for profiling to be done...
                   2.5 %      97.5 %
`(Intercept)` -0.7365077  0.03360653
Z2            -0.4137503  0.57263652
Z3            -1.2951680 -0.04816757
Z4            -1.7561319 -0.38232824
x2            -0.4438465 -0.05168407

multct> ## MLE and variance-covariance matrix (species 1)
multct> getMLE(m1, which = 1, vcov=TRUE)
$coef
`(Intercept)`            Z2            Z3            Z4            x2 
  -0.32787491    0.07209324   -0.65519918   -1.03924068   -0.25016397 

$vcov
              `(Intercept)`            Z2          Z3            Z4
`(Intercept)`   0.038311276 -3.706629e-02 -0.03722583 -0.0369859922
Z2             -0.037066290  6.267864e-02  0.03704133  0.0370358101
Z3             -0.037225827  3.704133e-02  0.09956289  0.0370294262
Z4             -0.036985992  3.703581e-02  0.03702943  0.1203610295
x2             -0.003575346  8.222807e-05  0.00052985 -0.0001430668
                         x2
`(Intercept)` -3.575346e-03
Z2             8.222807e-05
Z3             5.298500e-04
Z4            -1.430668e-04
x2             1.003152e-02

$dist
[1] "poisson"


multct> ## fitted values
multct> head(fitted(m1))
       SPP1      SPP2      SPP3
1 0.2027267 0.6752329 0.2850377
2 0.2532343 0.9101916 0.4492035
3 0.6720834 0.3536149 0.2338745
4 0.7747911 0.4279878 0.3127995
5 0.7816706 0.5813804 0.5038994
6 0.2156772 0.7337562 0.3235123

multct> ## prediction for new data
multct> head(predict(m1, gnew=x0, xnew=data.frame(x2=x2)))
       SPP1      SPP2      SPP3
1 0.2027267 0.6752329 0.2850377
2 0.2532343 0.9101916 0.4492035
3 0.6720834 0.3536149 0.2338745
4 0.7747911 0.4279878 0.3127995
5 0.7816706 0.5813804 0.5038994
6 0.2156772 0.7337562 0.3235123

multct> ## --- Zero-inflated Negative Binomial
multct> ## dolina example
multct> data(dolina)

multct> ## stratum as ordinal
multct> dolina$samp$stratum <- as.integer(dolina$samp$stratum)

multct> ## filter species to speed up things a bit
multct> Y <- dolina$xtab[,colSums(dolina$xtab > 0) >= 20]

multct> ## opticut results, note the cloglog link function
multct> dol <- multicut(Y ~ stratum + lmoist + method, data=dolina$samp,
multct+     strata=dolina$samp$mhab, dist="zinb:cloglog")

multct> summary(dol)
Multivariate multticut results, dist = zinb:cloglog

Call:
multicut.formula(formula = Y ~ stratum + lmoist + method, data = dolina$samp, 
    strata = dolina$samp$mhab, dist = "zinb:cloglog")

Species models with logLR >= 2:
     split assoc      I  logLR
hobv DW+RO   +++ 0.9302 41.753
bbip DW+RO   +++ 0.8204 38.877
pinc DW+RO   +++ 0.7474 18.724
clam DW+RO    ++ 0.5769  4.589
vidi DW+RO   +++ 0.5371 10.329
pvic DW+RO    ++ 0.5128  7.786
dper DW+RO   +++ 0.4695 13.555
ppyg LI+RO    ++ 0.3695  5.209
amin LI+RO   +++ 0.3264  8.692
cort TL+RO   +++ 1.0000 26.996
mbor    DW   +++ 1.0000 25.139
iiso    RO   +++ 0.8720 16.352
ffau    RO   +++ 0.8698 28.280
estr    RO   +++ 0.7096 15.917
ctri    RO    ++ 0.7088  6.916
tuni    RO   +++ 0.6401 15.430
apur    RO    ++ 0.5724  7.090
bcan    TL   +++ 0.8914 12.414
1 species not shown


multct> ## vertical plot orientation
multct> plot(dol, horizontal=FALSE, pos=1, upper=0.8)

multct> ## parallel
multct> library(parallel)

multct> cl <- makeCluster(2)

multct> multicut(Y ~ stratum + lmoist + method, data=dolina$samp,
multct+     strata=dolina$samp$mhab, dist="zip",cl=cl)
Multivariate multicut results, dist = zip

Call:
multicut.formula(formula = Y ~ stratum + lmoist + method, data = dolina$samp, 
    strata = dolina$samp$mhab, dist = "zip", cl = cl)

19 species


multct> stopCluster(cl)

multct> ## --- Customizing distributions
multct> ## we may want to expand the Zero-inflation component in a ZIP model
multct> ## see how the return value needs to be structured
multct> fun <- function(Y, X, linkinv, zi_term, ...) {
multct+     X <- as.matrix(X)
multct+     mod <- pscl::zeroinfl(Y ~ X-1 | zi_term, dist = "poisson", ...)
multct+     list(coef=coef(mod),
multct+         logLik=logLik(mod),
multct+         linkinv=mod$linkinv)
multct+ }

multct> Xdol <- model.matrix(~ stratum + lmoist + method, data=dolina$samp)

multct> ## this fits the null model (i.e. no partitions added)
multct> fun(Y[,"amin"], Xdol, zi_term=dolina$samp$method)
$coef
count_X(Intercept)     count_Xstratum      count_Xlmoist     count_XmethodT 
        1.30243481        -0.14266929         0.02460366        -0.64335270 
  zero_(Intercept)      zero_zi_termT 
       -0.56590398         0.61664601 

$logLik
'log Lik.' -788.9897 (df=6)

$linkinv
function (eta) 
.Call(C_logit_linkinv, eta)
<environment: namespace:stats>


multct> ## now we can use dist=fun
multct> multicut1(Y[,"amin"], Xdol, Z=dolina$samp$mhab,
multct+     dist=fun, zi_term=dolina$samp$method)
Univariate multicut results, dist = fun
logLR = 17.9 (logL_null = -789)

Expected values:
    LI     DW     TL     RO 
0.8460 0.7864 0.7782 0.8767 


multct> dol2 <- multicut(Y ~ stratum + lmoist + method, data=dolina$samp,
multct+     strata=dolina$samp$mhab, dist=fun, zi_term=dolina$samp$method)

multct> summary(dol2)
Multivariate multticut results, dist = fun

Call:
multicut.formula(formula = Y ~ stratum + lmoist + method, data = dolina$samp, 
    strata = dolina$samp$mhab, dist = fun, zi_term = dolina$samp$method)

Species models with logLR >= 2:
        split assoc      I  logLR
mbor DW+TL+RO   +++ 1.0000 28.969
bcan DW+TL+RO   +++ 0.8922 13.148
hobv    DW+RO   +++ 0.9351 51.488
ffau    DW+RO   +++ 0.8436 24.161
bbip    DW+RO   +++ 0.8080 54.198
pinc    DW+RO   +++ 0.7532 18.820
clam    DW+RO    ++ 0.6069  6.393
vidi    DW+RO   +++ 0.5604 20.579
pvic    DW+RO   +++ 0.5245  9.294
dper    DW+RO   +++ 0.5006 50.815
apur    LI+RO   +++ 0.6789 49.794
ctri    LI+RO   +++ 0.6475 32.208
amin    LI+RO   +++ 0.3393 17.897
ppyg    LI+RO   +++ 0.2812 43.916
cort    TL+RO   +++ 1.0000 27.399
aacu    TL+RO    ++ 0.4452  6.758
iiso       RO   +++ 0.8738 17.276
estr       RO   +++ 0.6762 14.571
tuni       RO   +++ 0.5201 13.684



---------- opticut example: sindex ----------



---------- opticut example: lorenz ----------


lorenz> set.seed(1)

lorenz> x <- c(rexp(100, 10), rexp(200, 1))

lorenz> l <- lorenz(x)

lorenz> head(l)
               p            L           x
[1,] 0.000000000 0.000000e+00 0.000000000
[2,] 0.003333333 8.256055e-06 0.001700975
[3,] 0.006666667 2.634516e-05 0.003726853
[4,] 0.010000000 4.819482e-05 0.004501631
[5,] 0.013333333 7.346108e-05 0.005205545
[6,] 0.016666667 1.008080e-04 0.005634216

lorenz> tail(l)
               p         L        x
[296,] 0.9833333 0.8954549 3.173340
[297,] 0.9866667 0.9112748 3.259337
[298,] 0.9900000 0.9279782 3.441357
[299,] 0.9933333 0.9470105 3.921174
[300,] 0.9966667 0.9692697 4.586020
[301,] 1.0000000 1.0000000 6.331284

lorenz> summary(l)
Lorenz curve summary

  x[t]   p[t]   L[t]      G      S      J 
0.6827 0.6433 0.1934 0.5909 0.8367 0.4500 


lorenz> summary(unclass(l))
       p              L                 x          
 Min.   :0.00   Min.   :0.00000   Min.   :0.00000  
 1st Qu.:0.25   1st Qu.:0.01708   1st Qu.:0.09968  
 Median :0.50   Median :0.08771   Median :0.36917  
 Mean   :0.50   Mean   :0.20551   Mean   :0.68448  
 3rd Qu.:0.75   3rd Qu.:0.32198   3rd Qu.:1.01262  
 Max.   :1.00   Max.   :1.00000   Max.   :6.33128  

lorenz> (q <- c(0.05, 0.5, 0.95))
[1] 0.05 0.50 0.95

lorenz> (p_i <- quantile(l, probs=q, type="p"))
        5%        50%        95% 
0.02035104 0.36917103 2.25115270 

lorenz> iquantile(l, values=p_i, type="p")
0.02035104 0.36917103 2.25115270 
      0.05       0.50       0.95 

lorenz> (p_i <- quantile(l, probs=q, type="L"))
       5%       50%       95% 
0.2198279 1.4773854 4.5860202 

lorenz> iquantile(l, values=p_i, type="L")
 0.2198279  1.4773854  4.5860202 
0.05039184 0.50430635 0.96926972 

lorenz> op <- par(mfrow=c(2,1))

lorenz> plot(l, lwd=2, tangent=2, h=3, v=4)

lorenz> abline(0, 1, lty=2, col="grey")

lorenz> abline(1, -1, lty=2, col="grey")

lorenz> plot(l, type="x", lwd=2, h=3, v=4)

lorenz> par(op)

lorenz> ## Lorenz-tangent approach to binarize a multi-level problem
lorenz> n <- 100

lorenz> g <- as.factor(sort(sample(LETTERS[1:4], n, replace=TRUE, prob=4:1)))

lorenz> x <- rpois(n, exp(as.integer(g)))

lorenz> mu <- aggregate(x, list(g), mean)

lorenz> (l <- lorenz(mu$x, table(g)))
     p          L         x
  0.00 0.00000000  0.000000
A 0.41 0.07932011  2.731707
B 0.70 0.22592068  7.137931
C 0.86 0.45467422 20.187500
D 1.00 1.00000000 55.000000
attr(,"summary")
        t      x[t]      p[t]      L[t]         G         S         J 
3.0000000 7.1379310 0.7000000 0.2259207 0.3700425 0.9259207 0.4740793 
attr(,"class")
[1] "lorenz" "matrix"

lorenz> (s <- summary(l))
Lorenz curve summary

  x[t]   p[t]   L[t]      G      S      J 
7.1379 0.7000 0.2259 0.3700 0.9259 0.4741 


lorenz> plot(l)

lorenz> abline(0, 1, lty=2)

lorenz> lines(rep(s["p[t]"], 2), c(s["p[t]"], s["L[t]"]), col=2)


---------- opticut example: beta2i ----------


beta2i> x <- seq(-5, 5, 0.1)

beta2i> Col <- occolors(c("red", "blue"))(10)

beta2i> plot(x, beta2i(x), type = "n")

beta2i> s <- seq(1, 0.1, -0.1)

beta2i> for (i in 1:10) {
beta2i+     lines(x, beta2i(x, scale = s[i]), col = Col[i])
beta2i+     text(1.5 - 0.2, beta2i(1.5, scale = s[i]), s[i], col = Col[i])
beta2i+ }


---------- opticut example: allComb ----------


allCmb> kComb(k = 2)
     [,1]
[1,]    1
[2,]    0

allCmb> kComb(k = 3)
     [,1] [,2] [,3]
[1,]    1    0    0
[2,]    0    1    0
[3,]    0    0    1

allCmb> kComb(k = 4)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,]    1    0    0    0    1    1    1
[2,]    0    1    0    0    1    0    0
[3,]    0    0    1    0    0    1    0
[4,]    0    0    0    1    0    0    1

allCmb> ## finding all combinations
allCmb> (f <- rep(LETTERS[1:4], each=2))
[1] "A" "A" "B" "B" "C" "C" "D" "D"

allCmb> (mc <- allComb(f, collapse = "_"))
  A B C D A_B A_C A_D
A 1 0 0 0   1   1   1
A 1 0 0 0   1   1   1
B 0 1 0 0   1   0   0
B 0 1 0 0   1   0   0
C 0 0 1 0   0   1   0
C 0 0 1 0   0   1   0
D 0 0 0 1   0   0   1
D 0 0 0 1   0   0   1
attr(,"collapse")
[1] "_"
attr(,"comb")
[1] "all"

allCmb> ## checking for complementary entries
allCmb> checkComb(mc) # TRUE
[1] TRUE
attr(,"comp")
     i j
attr(,"same")
     i j

allCmb> ## adding complementary entries to the matrix
allCmb> mc2 <- cbind(z = 1 - mc[,1], mc[,c(1:ncol(mc), 1)])

allCmb> colnames(mc2) <- 1:ncol(mc2)

allCmb> mc2
  1 2 3 4 5 6 7 8 9
A 0 1 0 0 0 1 1 1 1
A 0 1 0 0 0 1 1 1 1
B 1 0 1 0 0 1 0 0 0
B 1 0 1 0 0 1 0 0 0
C 1 0 0 1 0 0 1 0 0
C 1 0 0 1 0 0 1 0 0
D 1 0 0 0 1 0 0 1 0
D 1 0 0 0 1 0 0 1 0

allCmb> checkComb(mc2) # FALSE
[1] FALSE
attr(,"comp")
     i j
[1,] 1 2
[2,] 1 9
attr(,"same")
     i j
[1,] 9 2


---------- opticut example: rankComb ----------


rnkCmb> ## simulate some data
rnkCmb> set.seed(1234)

rnkCmb> n <- 200

rnkCmb> x0 <- sample(1:4, n, TRUE)

rnkCmb> x1 <- ifelse(x0 %in% 1:2, 1, 0)

rnkCmb> x2 <- rnorm(n, 0.5, 1)

rnkCmb> lam <- exp(0.5 + 0.5*x1 + -0.2*x2)

rnkCmb> Y <- rpois(n, lam)

rnkCmb> ## binary partitions
rnkCmb> head(rc <- rankComb(Y, model.matrix(~x2), as.factor(x0), dist="poisson"))
  2 1+2 1+2+4
4 0   0     1
4 0   0     1
2 1   1     1
2 1   1     1
1 0   1     1
4 0   0     1

rnkCmb> attr(rc, "est") # expected values in factor levels
       1        2        3        4 
2.767840 2.792267 1.563285 1.757870 

rnkCmb> aggregate(exp(0.5 + 0.5*x1), list(x0=x0), mean) # true values
  x0        x
1  1 2.718282
2  2 2.718282
3  3 1.648721
4  4 1.648721

rnkCmb> ## simple example
rnkCmb> oComb(1:4, "+")
  1 1+2 1+2+3
1 1   1     1
2 0   1     1
3 0   0     1
4 0   0     0

rnkCmb> ## using estimates
rnkCmb> oComb(attr(rc, "est"))
  3 3+4 1+3+4
1 0   0     1
2 0   0     0
3 1   1     1
4 0   1     1


---------- opticut example: uncertainty ----------


uncrtn> set.seed(2345)

uncrtn> n <- 50

uncrtn> x0 <- sample(1:4, n, TRUE)

uncrtn> x1 <- ifelse(x0 %in% 1:2, 1, 0)

uncrtn> x2 <- rnorm(n, 0.5, 1)

uncrtn> x3 <- ifelse(x0 %in% 2:4, 1, 0)

uncrtn> lam1 <- exp(0.5 + 1*x1 + -0.2*x2)

uncrtn> Y1 <- rpois(n, lam1)

uncrtn> lam2 <- exp(1 + 0.5*x3)

uncrtn> Y2 <- rpois(n, lam2)

uncrtn> Y3 <- rpois(n, exp(0))

uncrtn> Y <- cbind(Spp1=Y1, Spp2=Y2, Spp3=Y3)

uncrtn> oc <- opticut(Y ~ x2, strata=x0, dist="poisson", comb="rank")

uncrtn> ## asymptotic confidence intervals
uncrtn> (uc1 <- uncertainty(oc, type="asymp", B=999))
Multivariate multicut uncertainty results, type = asymp, B = 999


uncrtn> summary(uc1)
Multivariate multicut uncertainty results
type = asymp, B = 999, level = 0.95

     split R      I    Lower  Upper
Spp1   1+2 1 0.3756 0.205644 0.5150
Spp3 1+2+3 1 0.3438 0.016874 0.6984
Spp2 2+3+4 1 0.1354 0.007452 0.2944

uncrtn> ## bootstrap-based confidence intervals
uncrtn> (uc2 <- uncertainty(oc, type="boot", B=19))
Multivariate multicut uncertainty results, type = boot, B = 19


uncrtn> summary(uc2)
Multivariate multicut uncertainty results
type = boot, B = 19, level = 0.95

     split R      I    Lower  Upper
Spp1   1+2 1 0.3688 0.266965 0.4710
Spp3 1+2+3 1 0.3689 0.023399 0.8796
Spp2 2+3+4 1 0.1354 0.003183 0.2706

uncrtn> ## use user-supplied indices
uncrtn> ## multi-model bootstrap based uncertainties
uncrtn> B <- replicate(25, sample.int(n, replace=TRUE))

uncrtn> check_strata(oc, B) # check representation
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
attr(,"nx")
[1] 4
attr(,"nmat")
 [1] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4

uncrtn> (uc3 <- uncertainty(oc, type="multi", B=B))
Multivariate multicut uncertainty results, type = multi, B = 25


uncrtn> summary(uc3)
Multivariate multicut uncertainty results
type = multi, B = 25, level = 0.95

     split      R      I   Lower Upper
Spp1   1+2 0.7308 0.3980 0.27604 0.552
Spp3 1+2+3 0.3846 0.5624 0.21287 1.000
Spp2 2+3+4 0.4615 0.1760 0.09648 0.266

uncrtn> ## best partitions:
uncrtn> ## selection frequencies for strata and species
uncrtn> bestpart(uc3)
       Spp1       Spp2      Spp3
1 1.0000000 0.03846154 0.6538462
2 1.0000000 0.73076923 0.6538462
3 0.0000000 0.92307692 0.8461538
4 0.2692308 0.65384615 0.0000000

uncrtn> heatmap(bestpart(uc3), scale="none", col=occolors()(25))

uncrtn> ## bootstrap smoothed predictions per strata
uncrtn> bsmooth(uc3)
      Spp1     Spp2      Spp3
1 4.242728 3.413736 0.6399826
2 4.242728 4.330780 0.6884190
3 1.808303 4.583842 0.7642625
4 2.429689 4.188160 0.3545554

uncrtn> heatmap(bestpart(uc3), scale="none", col=occolors()(25))

uncrtn> ## individual species results
uncrtn> uc3$uncertainty
$Spp1
Univariate multicut uncertainty results, type = multi, B = 25

     best                 I               mu0             mu1       
 Length:26          Min.   :0.2675   Min.   :1.159   Min.   :3.436  
 Class :character   1st Qu.:0.3360   1st Qu.:1.502   1st Qu.:3.974  
 Mode  :character   Median :0.4088   Median :1.777   Median :4.184  
                    Mean   :0.4065   Mean   :1.808   Mean   :4.243  
                    3rd Qu.:0.4719   3rd Qu.:2.108   3rd Qu.:4.405  
                    Max.   :0.5892   Max.   :2.764   Max.   :5.502  

$Spp2
Univariate multicut uncertainty results, type = multi, B = 25

     best                 I               mu0             mu1       
 Length:26          Min.   :0.0258   Min.   :2.312   Min.   :3.844  
 Class :character   1st Qu.:0.1105   1st Qu.:3.097   1st Qu.:4.400  
 Mode  :character   Median :0.1344   Median :3.359   Median :4.659  
                    Mean   :0.1559   Mean   :3.394   Mean   :4.627  
                    3rd Qu.:0.2209   3rd Qu.:3.724   3rd Qu.:4.944  
                    Max.   :0.2813   Max.   :4.225   Max.   :5.321  

$Spp3
Univariate multicut uncertainty results, type = multi, B = 25

     best                 I               mu0                 mu1        
 Length:26          Min.   :0.1554   Min.   :1.000e-08   Min.   :0.6147  
 Class :character   1st Qu.:0.2951   1st Qu.:2.790e-01   1st Qu.:0.6926  
 Mode  :character   Median :0.3702   Median :3.674e-01   Median :0.7865  
                    Mean   :0.4280   Mean   :3.546e-01   Mean   :0.8334  
                    3rd Qu.:0.4884   3rd Qu.:4.624e-01   3rd Qu.:0.9216  
                    Max.   :1.0000   Max.   :6.937e-01   Max.   :1.7098  


uncrtn> bestpart(uc3$uncertainty[[1]])
  [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
1    1    1    1    1    1    1    1    1    1     1     1     1     1     1
2    1    1    1    1    1    1    1    1    1     1     1     1     1     1
3    0    0    0    0    0    0    0    0    0     0     0     0     0     0
4    0    0    0    0    0    0    0    0    0     0     1     0     0     1
  [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
1     1     1     1     1     1     1     1     1     1     1     1     1
2     1     1     1     1     1     1     1     1     1     1     1     1
3     0     0     0     0     0     0     0     0     0     0     0     0
4     0     1     0     1     0     1     1     0     0     0     1     0

uncrtn> bsmooth(uc3$uncertainty[[1]])
      [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
1 4.353309 4.123784 4.796411 5.502443 4.966979 3.956581 4.263636 4.066393
2 4.353309 4.123784 4.796411 5.502443 4.966979 3.956581 4.263636 4.066393
3 1.968847 1.752355 2.048311 1.802075 2.764274 1.307073 2.327590 2.026677
4 1.968847 1.752355 2.048311 1.802075 2.764274 1.307073 2.327590 2.026677
      [,9]    [,10]    [,11]    [,12]    [,13]    [,14]    [,15]    [,16]
1 4.547626 4.154616 3.436177 4.320181 4.421835 3.510415 4.258540 3.983873
2 4.547626 4.154616 3.436177 4.320181 4.421835 3.510415 4.258540 3.983873
3 2.368197 1.727303 1.399548 2.128446 1.869369 1.312151 1.990268 1.277240
4 2.368197 1.727303 3.436177 2.128446 1.869369 3.510415 1.990268 3.983873
     [,17]    [,18]    [,19]    [,20]    [,21]    [,22]    [,23]    [,24]
1 3.906009 3.714836 3.921941 4.242182 4.214182 4.605661 3.970598 4.998947
2 3.906009 3.714836 3.921941 4.242182 4.214182 4.605661 3.970598 4.998947
3 2.251073 2.147011 1.628562 2.208800 1.491471 1.532036 1.559550 1.292377
4 2.251073 3.714836 1.628562 4.242182 4.214182 1.532036 1.559550 1.292377
     [,25]    [,26]
1 4.049467 4.024312
2 4.049467 4.024312
3 1.158853 1.676410
4 4.049467 1.676410

uncrtn> ## block bootstrap
uncrtn> block_fun <- function()
uncrtn+     unlist(lapply(unique(x0), function(z) if (sum(x0==z) < 2)
uncrtn+         which(x0==z) else sample(which(x0==z), sum(x0==z), replace=TRUE)))

uncrtn> B <- replicate(25, block_fun())

uncrtn> check_strata(oc, B) # check representation
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
attr(,"nx")
[1] 4
attr(,"nmat")
 [1] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4

uncrtn> summary(uncertainty(oc, type="multi", B=B))
Multivariate multicut uncertainty results
type = multi, B = 25, level = 0.95

     split      R      I  Lower  Upper
Spp1   1+2 0.8077 0.3841 0.2339 0.5036
Spp3 1+2+3 0.4231 0.4046 0.2604 0.5733
Spp2 2+3+4 0.5000 0.2080 0.1272 0.3285

uncrtn> ## jackknife
uncrtn> B <- sapply(1:n, function(i) which((1:n) != i))

uncrtn> check_strata(oc, B) # check representation
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[46] TRUE TRUE TRUE TRUE TRUE
attr(,"nx")
[1] 4
attr(,"nmat")
 [1] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
[39] 4 4 4 4 4 4 4 4 4 4 4 4

uncrtn> summary(uncertainty(oc, type="multi", B=B))
Multivariate multicut uncertainty results
type = multi, B = 50, level = 0.95

     split      R      I  Lower  Upper
Spp1   1+2 1.0000 0.3772 0.3604 0.3995
Spp3 1+2+3 0.6471 0.3526 0.3227 0.4232
Spp2 2+3+4 1.0000 0.1328 0.1009 0.1714

uncrtn> ## multicut based uncertainty
uncrtn> mc <- multicut(Y ~ x2, strata=x0, dist="poisson")

uncrtn> ## asymptotic confidence intervals
uncrtn> (muc1 <- uncertainty(mc, type="asymp", B=999))
Multivariate multicut uncertainty results, type = asymp, B = 999


uncrtn> summary(muc1)
Multivariate multicut uncertainty results
type = asymp, B = 999, level = 0.95

     split     R      I   Lower  Upper
Spp1   1+2 0.916 0.4743 0.30552 0.6334
Spp3   2+3 0.303 0.4930 0.19709 0.7854
Spp2   2+3 0.212 0.2195 0.08524 0.3912

uncrtn> bestpart(muc1)
   Spp1  Spp2  Spp3
1 0.987 0.085 0.376
2 0.999 0.580 0.469
3 0.001 0.776 0.888
4 0.073 0.561 0.070

uncrtn> ## bootstrap-based confidence intervals
uncrtn> (muc2 <- uncertainty(mc, type="boot", B=19))
Multivariate multicut uncertainty results, type = boot, B = 19


uncrtn> summary(muc2)
Multivariate multicut uncertainty results
type = boot, B = 19, level = 0.95

     split   R      I  Lower  Upper
Spp1   1+2 0.9 0.4879 0.3619 0.6358
Spp3   2+3 0.4 0.5490 0.4102 0.9272
Spp2 2+3+4 0.4 0.2456 0.1542 0.3829

uncrtn> bestpart(muc2)
  Spp1 Spp2 Spp3
1 0.95 0.05 0.25
2 1.00 0.80 0.45
3 0.00 0.85 0.90
4 0.05 0.55 0.05

uncrtn> ## dolina example
uncrtn> data(dolina)

uncrtn> ## stratum as ordinal
uncrtn> dolina$samp$stratum <- as.integer(dolina$samp$stratum)

uncrtn> ## filter species to speed up things a bit
uncrtn> Y <- ifelse(dolina$xtab[,colSums(dolina$xtab > 0) >= 20] > 0, 1, 0)

uncrtn> ## opticut results, note the cloglog link function
uncrtn> dol <- opticut(Y ~ stratum + lmoist + method, data=dolina$samp,
uncrtn+     strata=dolina$samp$mhab, dist="binomial:cloglog")

uncrtn> ## parallel computing for uncertainty
uncrtn> library(parallel)

uncrtn> cl <- makeCluster(2)

uncrtn> ucdol <- uncertainty(dol, type="multi", B=25, cl=cl)

uncrtn> stopCluster(cl)

uncrtn> bestpart(ucdol)
         aacu       amin       apur      bbip      bcan       clam      cort
LI 0.92307692 0.50000000 0.03846154 0.0000000 0.0000000 0.07692308 0.0000000
DW 0.46153846 0.07692308 0.92307692 0.6923077 0.4615385 0.80769231 0.5384615
TL 0.03846154 0.03846154 0.07692308 0.0000000 1.0000000 0.50000000 1.0000000
RO 0.88461538 1.00000000 0.96153846 1.0000000 0.1923077 0.73076923 0.8076923
         ctri       dper      estr ffau hobv       iiso       mbor       pinc
LI 0.07692308 0.00000000 0.0000000    0    0 0.00000000 0.00000000 0.00000000
DW 0.61538462 0.92307692 0.0000000    0    1 0.03846154 1.00000000 0.42307692
TL 0.03846154 0.07692308 0.1153846    0    0 0.03846154 0.03846154 0.03846154
RO 1.00000000 0.96153846 1.0000000    1    1 1.00000000 0.30769231 1.00000000
         ppyg       pvic       tuni      vidi
LI 0.92307692 0.03846154 0.03846154 0.2692308
DW 0.07692308 0.61538462 0.00000000 1.0000000
TL 0.19230769 0.15384615 0.00000000 0.0000000
RO 1.00000000 0.96153846 1.00000000 1.0000000

uncrtn> heatmap(t(bestpart(ucdol)), scale="none", col=occolors()(25),
uncrtn+     distfun=function(x) dist(x, "manhattan"))

uncrtn> ## See how indicator value changes with different partitions
uncrtn> ## (and why it is the wrong metric to use in this calse)
uncrtn> with(ucdol$uncertainty[["pvic"]],
uncrtn+     boxplot(I ~ best, col="gold", ylab="Indicator value"))

uncrtn> ## What we should calculate is the bootstrap smoothed mean of the
uncrtn> ## expected value and its confidence intervals
uncrtn> bs <- bsmooth(ucdol$uncertainty[["pvic"]])

uncrtn> boxplot(t(bs), ylab="Expected value")

uncrtn> cbind(Mean=rowMeans(bs), t(apply(bs, 1, quantile, probs=c(0.025, 0.975))))
        Mean       2.5%     97.5%
LI 0.2001777 0.09004651 0.3267507
DW 0.2759182 0.15710556 0.3714238
TL 0.2080179 0.11342479 0.3181963
RO 0.3635844 0.14240696 0.6655275

uncrtn> ## A more interesting simulated example for bootstrap smoothing
uncrtn> ## and comparing opticut vs. multicut
uncrtn> set.seed(1)

uncrtn> n <- 2000

uncrtn> x <- sort(runif(n, -8, 8))

uncrtn> p <- plogis(0.5 + -0.1 * x + -0.2 * x^2)

uncrtn> y <- rbinom(n, 1, p)

uncrtn> d <- diff(range(x))/10

uncrtn> br <- seq(min(x), max(x), by=d)

uncrtn> g <- cut(x, br, include.lowest=TRUE)

uncrtn> levels(g) <- LETTERS[1:nlevels(g)]

uncrtn> o <- opticut(y ~ 1, strata=g, dist="binomial")

uncrtn> m <- multicut(y ~ 1, strata=g, dist="binomial")

uncrtn> library(parallel)

uncrtn> cl <- makeCluster(2)

uncrtn> uo <- uncertainty(o, type="multi", B=99, cl=cl)

uncrtn> um <- uncertainty(m, type="boot", B=99, cl=cl)

uncrtn> stopCluster(cl)

uncrtn> ## bootstrap average for opticut
uncrtn> bs <- bsmooth(uo$uncertainty[[1]])

uncrtn> stat <- cbind(Mean=rowMeans(bs),
uncrtn+     t(apply(bs, 1, quantile, probs=c(0.025, 0.975))))

uncrtn> ## bootstrap average for multicut
uncrtn> bsm <- as.matrix(um$uncertainty[[1]][,-(1:2)])

uncrtn> statm <- cbind(Mean=colMeans(bsm),
uncrtn+     t(apply(bsm, 2, quantile, probs=c(0.025, 0.975))))

uncrtn> op <- par(mfrow=c(2,1))

uncrtn> plot(p ~ x, type="l", ylim=c(0,1), main="Binary partitions (opticut)")

uncrtn> abline(v=br, col="grey", lty=3)

uncrtn> lines(br[-1]-0.5*d, stat[,1], col=4)

uncrtn> lines(br[-1]-0.5*d, stat[,2], col=4, lty=2)

uncrtn> lines(br[-1]-0.5*d, stat[,3], col=4, lty=2)

uncrtn> lines(br[-1]-0.5*d, bs[,1], col=2)

uncrtn> legend("topright", bty="n", lty=c(1,1,2,1), col=c(1,4,4,2),
uncrtn+     legend=c("True response","bsmooth","0.95 CI","Best partition"))

uncrtn> plot(p ~ x, type="l", ylim=c(0,1), main="Multi-level model (multicut)")

uncrtn> abline(v=br, col="grey", lty=3)

uncrtn> lines(br[-1]-0.5*d, statm[,1], col=4)

uncrtn> lines(br[-1]-0.5*d, statm[,2], col=4, lty=2)

uncrtn> lines(br[-1]-0.5*d, statm[,3], col=4, lty=2)

uncrtn> legend("topright", bty="n", lty=c(1,1,2), col=c(1,4,4),
uncrtn+     legend=c("True response","bsmooth","0.95 CI"))

uncrtn> par(op)


---------- opticut example: occolors ----------


occlrs> ## using palettes
occlrs> plot(1:100, rep(2, 100), pch = 15,
occlrs+     ylim = c(0, 21), axes = FALSE, ann = FALSE,
occlrs+     col = occolors()(100)) # default 'bg'

occlrs> text(50, 1, "theme = 'br'")

occlrs> points(1:100, rep(5, 100), pch = 15,
occlrs+     col=occolors("gr")(100))

occlrs> text(50, 4, "theme = 'gr'")

occlrs> points(1:100, rep(8, 100), pch = 15,
occlrs+     col=occolors("bw")(100))

occlrs> text(50, 7, "theme = 'bw'")

occlrs> points(1:100, rep(11, 100), pch = 15,
occlrs+     col=occolors(terrain.colors)(100))

occlrs> text(50, 10, "theme = terrain.colors")

occlrs> points(1:100, rep(14, 100), pch = 15,
occlrs+     col=occolors(c("purple", "pink", "orange"))(100))

occlrs> text(50, 13, "theme = c('purple', 'pink', 'orange')")

occlrs> points(1:100, rep(17, 100), pch = 15,
occlrs+     col=occolors(c("#a6611a", "#ffffbf", "#018571"))(100))

occlrs> text(50, 16, "theme = c('#a6611a', '#ffffbf', '#018571')")

occlrs> points(1:100, rep(20, 100), pch = 15,
occlrs+     col=occolors(c("#7b3294", "#ffffbf", "#008837"))(100))

occlrs> text(50, 19, "theme = c('#7b3294', '#ffffbf', '#008837')")

occlrs> ## grayscale conversions
occlrs> n <- 25

occlrs> col <- occolors("br")(n)

occlrs> method <- c("BT.709", "BT.601",
occlrs+     "desaturate", "average", "maximum", "minimum",
occlrs+     "red", "green", "blue")

occlrs> plot(0, type="n", ann=FALSE, axes=FALSE,
occlrs+     xlim=c(0, n), ylim=c(3*length(method), 0))

occlrs> for (j in 1:length(method)) {
occlrs+     for (i in 1:n) {
occlrs+         polygon(c(i-1, i, i, i-1), c(0, 0, 1, 1)+((j-1)*3),
occlrs+             col=col[i], border=col[i])
occlrs+         polygon(c(i-1, i, i, i-1), c(1, 1, 2, 2)+((j-1)*3),
occlrs+             col=col2gray(col[i], method=method[j]),
occlrs+             border=col2gray(col[i], method=method[j]))
occlrs+         text(n/2, 1+((j-1)*3), method[j])
occlrs+     }
occlrs+ }


---------- opticut example: ocoptions ----------


ocptns> ## simple example from Legendre 2013
ocptns> ## Indicator Species: Computation, in
ocptns> ## Encyclopedia of Biodiversity, Volume 4
ocptns> ## https://dx.doi.org/10.1016/B978-0-12-384719-5.00430-5
ocptns> gr <- as.factor(paste0("X", rep(1:5, each=5)))

ocptns> spp <- cbind(Species1=rep(c(4,6,5,3,2), each=5),
ocptns+     Species2=c(rep(c(8,4,6), each=5), 4,4,2, rep(0,7)),
ocptns+     Species3=rep(c(18,2,0,0,0), each=5))

ocptns> rownames(spp) <- gr

ocptns> ## must add some noise to avoid perfect fit
ocptns> spp[6, "Species1"] <- 7

ocptns> spp[1, "Species3"] <- 17

ocptns> spp
   Species1 Species2 Species3
X1        4        8       17
X1        4        8       18
X1        4        8       18
X1        4        8       18
X1        4        8       18
X2        7        4        2
X2        6        4        2
X2        6        4        2
X2        6        4        2
X2        6        4        2
X3        5        6        0
X3        5        6        0
X3        5        6        0
X3        5        6        0
X3        5        6        0
X4        3        4        0
X4        3        4        0
X4        3        2        0
X4        3        0        0
X4        3        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0
X5        2        0        0

ocptns> ## current settings
ocptns> print(unlist(ocoptions())) # these give identical answers
     collapse           cut          sort         theme    check_comb 
          "+"           "2"        "TRUE"          "br"        "TRUE" 
    try_error         scale    fix_fitted robust_loglik 
      "FALSE"         "0.5"       "FALSE"        "TRUE" 

ocptns> unlist(getOption("ocoptions"))
     collapse           cut          sort         theme    check_comb 
          "+"           "2"        "TRUE"          "br"        "TRUE" 
    try_error         scale    fix_fitted robust_loglik 
      "FALSE"         "0.5"       "FALSE"        "TRUE" 

ocptns> summary(ocall <- opticut(spp ~ 1, strata=gr, dist="gaussian", comb="all"))
Multivariate opticut results, comb = all, dist = gaussian

Call:
opticut.formula(formula = spp ~ 1, strata = gr, dist = "gaussian", 
    comb = "all")

Best supported models with logLR >= 2:
         split assoc      I mu0  mu1 logLR      w
Species2 X1+X3   +++ 0.9866 2.0  7.0 14.82 0.4995
Species1 X2+X3   +++ 0.8617 3.0  5.6 16.74 0.7035
Species3    X1   +++ 1.0000 0.5 17.8 54.26 1.0000
15 binary splits


ocptns> ## resetting pboptions and checking new settings
ocptns> ocop <- ocoptions(collapse="&", sort=FALSE)

ocptns> unlist(getOption("ocoptions"))
     collapse           cut          sort         theme    check_comb 
          "&"           "2"       "FALSE"          "br"        "TRUE" 
    try_error         scale    fix_fitted robust_loglik 
      "FALSE"         "0.5"       "FALSE"        "TRUE" 

ocptns> ## running again with new settings
ocptns> summary(ocall <- opticut(spp ~ 1, strata=gr, dist="gaussian", comb="all"))
Multivariate opticut results, comb = all, dist = gaussian

Call:
opticut.formula(formula = spp ~ 1, strata = gr, dist = "gaussian", 
    comb = "all")

Best supported models with logLR >= 2:
         split assoc      I mu0  mu1 logLR      w
Species1 X2&X3   +++ 0.8617 3.0  5.6 16.74 0.7035
Species2 X1&X3   +++ 0.9866 2.0  7.0 14.82 0.4995
Species3    X1   +++ 1.0000 0.5 17.8 54.26 1.0000
15 binary splits


ocptns> ## resetting original
ocptns> ocoptions(ocop)

ocptns> unlist(getOption("ocoptions"))
     collapse           cut          sort         theme    check_comb 
          "+"           "2"        "TRUE"          "br"        "TRUE" 
    try_error         scale    fix_fitted robust_loglik 
      "FALSE"         "0.5"       "FALSE"        "TRUE" 
Warning messages:
1: In example(opticut - package, package = "opticut", run.dontrun = TRUE) :
  no help found for 'opticut - package'
2: In sqrt(diag(vc)[np]) : NaNs produced
3: In multicut.default(Y = Y, X = X, strata = strata, dist = dist,  :
  Negative fitted values found for 1 species.
4: In multicut.default(spp, strata = gr, dist = "gaussian") :
  Negative fitted values found for 1 species.
5: In example(sindex, package = "opticut", run.dontrun = TRUE) :
  no help found for 'sindex'
> 
> proc.time()
   user  system elapsed 
 127.04    9.87  198.75