R Under development (unstable) (2025-07-11 r88405 ucrt) -- "Unsuffered Consequences" Copyright (C) 2025 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > #devtools::install_github("psolymos/opticut") > #spelling::spell_check_package("~/repos/opticut") > library(opticut) Loading required package: pbapply opticut 0.1-4 2025-07-12 > > ## --- run examples with \dontrun sections --- > > help_pages <- c("opticut-package", + "dolina", "birdrec", + "opticut", "optilevels", "multicut", + "sindex", "lorenz", + "beta2i", + "allComb", "rankComb", + #"bestmodel", + "uncertainty", + "occolors", "ocoptions") > > for (i in help_pages) { + cat("\n\n---------- opticut example:", i, "----------\n\n") + eval(parse(text=paste0("example(", i, + ", package = 'opticut', run.dontrun = TRUE)"))) + } ---------- opticut example: opticut-package ---------- ---------- opticut example: dolina ---------- dolina> data(dolina) dolina> str(dolina) List of 3 $ xtab: num [1:512, 1:42] 0 0 0 0 0 0 0 0 0 0 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:512] "10A1Q" "10A1T" "10A2Q" "10A2T" ... .. ..$ : chr [1:42] "aacu" "amin" "apol" "apur" ... $ samp:'data.frame': 512 obs. of 9 variables: ..$ sample : Factor w/ 256 levels "10A1","10A2",..: 1 1 2 2 3 3 4 4 5 5 ... ..$ dolina : int [1:512] 10 10 10 10 10 10 10 10 10 10 ... ..$ microhab: Factor w/ 4 levels "dead.wood","litter",..: 2 2 2 2 2 2 2 2 2 2 ... ..$ mhab : Factor w/ 4 levels "LI","DW","TL",..: 1 1 1 1 1 1 1 1 1 1 ... ..$ method : Factor w/ 2 levels "Q","T": 1 2 1 2 1 2 1 2 1 2 ... ..$ aspect : Factor w/ 5 levels "eastern","flat",..: 4 4 4 4 4 4 2 2 3 3 ... ..$ stratum : Factor w/ 4 levels "1bottom","2middle",..: 4 4 3 3 2 2 1 1 2 2 ... ..$ lmoist : num [1:512] 1 1 1 1 1 1 1.5 1.5 1 1 ... ..$ lthick : num [1:512] 2 2 2.5 2.5 3 3 0.5 0.5 1.5 1.5 ... $ taxa:'data.frame': 42 obs. of 2 variables: ..$ scientific.name: Factor w/ 42 levels "Acanthinula aculeata",..: 1 2 32 3 4 5 10 8 13 11 ... ..$ family : Factor w/ 17 levels "Aciculidae","Carychiidae",..: 16 10 1 10 3 3 3 3 17 3 ... dolina> ## species richness by microhabitat and method dolina> Richness <- rowSums(dolina$xtab > 0) dolina> boxplot(Richness ~ mhab + method, dolina$samp, dolina+ ylab="Species richness", main="Dolina data set", dolina+ col=rep(c("#2C7BB6", "#D7191C"), each=4)) ---------- opticut example: birdrec ---------- birdrc> data(birdrec) birdrc> str(birdrec) List of 3 $ xtab: int [1:3967, 1:156] 0 0 0 0 0 0 0 0 0 0 ... ..- attr(*, "dimnames")=List of 2 .. ..$ : chr [1:3967] "T_IG_ABMI_388_2015_1_STATION_NW_89:0:0" "T_IG_ABMI_599_2015_1_STATION_NE_89:0:0" "T_IG_ABMI_855_2015_1_STATION_NW_89:0:0" "T_IG_ABMI_855_2015_1_STATION_SE_89:0:0" ... .. ..$ : chr [1:156] "AlderFlycatcher" "AmericanBittern" "AmericanCoot" "AmericanCrow" ... $ samp:'data.frame': 3967 obs. of 16 variables: ..$ PKEY : Factor w/ 3967 levels "T_IG_ABMI_388_2015_1_STATION_NW_89:0:0",..: 1 2 3 4 5 6 7 8 9 10 ... ..$ POINT : Factor w/ 367 levels "T_IG_ABMI_1_2015_1_STATION_NE",..: 120 197 258 259 81 142 200 230 30 57 ... ..$ SITE : Factor w/ 124 levels "1","1022","1023",..: 31 51 67 67 21 37 51 59 8 15 ... ..$ YEAR : int [1:3967] 2015 2015 2015 2015 2015 2015 2015 2015 2015 2015 ... ..$ MONTH : int [1:3967] 3 3 3 3 4 4 4 4 4 4 ... ..$ MDAY : int [1:3967] 31 31 31 31 1 1 1 1 2 2 ... ..$ HOUR : int [1:3967] 0 0 0 0 0 0 0 0 0 0 ... ..$ MINUTE : int [1:3967] 0 0 0 0 0 0 0 0 0 0 ... ..$ YDAY : int [1:3967] 89 89 89 89 90 90 90 90 91 91 ... ..$ RAIN : int [1:3967] 0 0 0 0 0 0 0 0 0 0 ... ..$ WIND : int [1:3967] 1 0 0 0 2 1 1 2 0 1 ... ..$ INDUSTRY : int [1:3967] 2 1 0 0 0 2 1 2 1 0 ... ..$ NOISE : int [1:3967] 2 0 0 0 0 0 0 0 0 0 ... ..$ MICROPHONE: int [1:3967] 0 0 0 0 0 7 0 0 0 0 ... ..$ TOY : Factor w/ 8 levels "Early1","Early2",..: 1 1 1 1 1 1 1 1 1 1 ... ..$ TOD : Factor w/ 2 levels "Morning","Midnight": 2 2 2 2 2 2 2 2 2 2 ... $ taxa:'data.frame': 156 obs. of 7 variables: ..$ Species : Factor w/ 156 levels "AlderFlycatcher",..: 1 2 3 4 5 6 7 8 145 9 ... ..$ CommonName : Factor w/ 156 levels "Alder Flycatcher",..: 1 2 3 4 5 6 7 8 145 9 ... ..$ ScientificName : Factor w/ 156 levels "Acanthis flammea",..: 47 20 54 42 116 53 109 134 153 117 ... ..$ Family : Factor w/ 43 levels " Vespertilionidae",..: 40 4 31 14 17 16 27 39 29 15 ... ..$ Order : Factor w/ 18 levels "Anseriformes",..: 12 13 11 12 12 8 12 12 14 12 ... ..$ Class : Factor w/ 3 levels "Amphibia","Aves",..: 2 2 2 2 2 2 2 2 2 2 ... ..$ MigratoryBehaviour: Factor w/ 3 levels "Neotropical migrant",..: 1 2 2 2 2 2 1 2 3 2 ... birdrc> aggregate(rowSums(birdrec$xtab), birdrc+ list(TOY=birdrec$samp$TOY, TOD=birdrec$samp$TOD), mean) TOY TOD x 1 Early1 Morning 0.7073171 2 Early2 Morning 1.7207207 3 Early3 Morning 4.7593052 4 Mid4 Morning 6.2938005 5 Mid5 Morning 6.8922652 6 Mid6 Morning 5.8924419 7 Mid7 Morning 6.0426136 8 Late8 Morning 4.0459364 9 Early1 Midnight 0.3896104 10 Early2 Midnight 0.8431373 11 Early3 Midnight 1.2451613 12 Mid4 Midnight 1.0800000 13 Mid5 Midnight 1.3566879 14 Mid6 Midnight 1.1585366 15 Mid7 Midnight 1.0639535 16 Late8 Midnight 0.5560748 birdrc> boxplot(rowSums(birdrec$xtab) ~ TOD + TOY, birdrec$samp, birdrc+ col=c("gold", "tomato"), ylab="# detections") birdrc> y <- ifelse(birdrec$xtab > 0, 1, 0) birdrc> g <- paste0(gsub("[[:digit:]]", "", as.character(birdrec$samp$TOY)), birdrc+ substr(as.character(birdrec$samp$TOD), 1, 4)) birdrc> g <- factor(g, levels=c("EarlyMorn", "MidMorn", "LateMorn", birdrc+ "EarlyMidn", "MidMidn", "LateMidn")) birdrc> ## binary response model birdrc> oc <- opticut(y ~ 1, strata=g, dist="binomial") birdrc> ## multi-level response model birdrc> mc <- multicut(y ~ 1, strata=g, dist="binomial") birdrc> ## testing equality of labels birdrc> splito <- as.character(summary(oc)$summary$split) birdrc> splitm <- as.character(summary(mc)$summary$split) birdrc> table(splito == splitm) FALSE TRUE 38 118 birdrc> ## seeing how much those differ birdrc> bpo <- summary(oc)$bestpart birdrc> bpm <- summary(mc)$bestpart birdrc> rs <- rowSums(abs(bpo-bpm)) birdrc> table(rs) rs 0 1 2 118 35 3 birdrc> 10 * bpo[rs > 0,] + bpm[rs > 0,] EarlyMorn MidMorn LateMorn EarlyMidn MidMidn LateMidn AlderFlycatcher 0 11 10 0 0 0 BlackbackedWoodpecker 10 11 0 0 0 0 BlackbilledMagpie 11 11 10 0 0 0 BlackcappedChickadee 11 10 11 0 0 0 BlackAndWhiteWarbler 10 11 0 0 0 0 BluewingedTeal 10 11 11 0 11 0 BlueJay 11 11 11 0 0 10 BonapartesGull 10 11 11 0 0 0 BrewersBlackbird 11 10 0 0 0 0 BrownCreeper 11 10 0 10 0 0 CommonRaven 11 11 10 0 0 0 DarkeyedJunco 10 11 11 0 0 0 ForstersTern 10 11 0 0 0 0 FoxSparrow 10 11 0 0 0 0 Gadwall 11 11 0 0 10 0 GreatGrayOwl 1 0 0 11 0 1 GreaterYellowlegs 10 11 0 0 0 0 GreenwingedTeal 11 10 0 11 11 0 Killdeer 10 11 0 0 10 0 LeContesSparrow 0 11 10 0 11 11 LesserYellowlegs 11 11 10 0 0 0 LincolnsSparrow 10 11 11 0 0 0 MagnoliaWarbler 0 11 10 0 0 0 MourningDove 11 11 10 0 0 0 NorthernFlicker 11 11 10 0 0 0 NorthernWaterthrush 0 11 10 0 0 0 OrangecrownedWarbler 10 11 11 0 0 0 RedbreastedNuthatch 10 11 11 0 0 0 RedwingedBlackbird 11 11 10 0 0 0 SandhillCrane 11 11 10 0 0 0 SwampSparrow 10 11 11 0 0 0 TreeSwallow 10 11 11 0 0 0 WarblingVireo 10 11 11 0 0 0 WesternKingbird 11 10 0 0 11 0 WesternMeadowlark 11 11 10 0 0 0 WinterWren 10 11 11 0 0 0 YellowbelliedFlycatcher 0 11 11 0 0 10 YellowrumpedWarbler 10 11 11 0 0 0 ---------- opticut example: opticut ---------- optict> ## --- Gaussian optict> ## simple example from Legendre 2013 optict> ## Indicator Species: Computation, in optict> ## Encyclopedia of Biodiversity, Volume 4 optict> ## https://dx.doi.org/10.1016/B978-0-12-384719-5.00430-5 optict> gr <- as.factor(paste0("X", rep(1:5, each=5))) optict> spp <- cbind(Species1=rep(c(4,6,5,3,2), each=5), optict+ Species2=c(rep(c(8,4,6), each=5), 4,4,2, rep(0,7)), optict+ Species3=rep(c(18,2,0,0,0), each=5)) optict> rownames(spp) <- gr optict> ## must add some noise to avoid perfect fit optict> spp[6, "Species1"] <- 7 optict> spp[1, "Species3"] <- 17 optict> spp Species1 Species2 Species3 X1 4 8 17 X1 4 8 18 X1 4 8 18 X1 4 8 18 X1 4 8 18 X2 7 4 2 X2 6 4 2 X2 6 4 2 X2 6 4 2 X2 6 4 2 X3 5 6 0 X3 5 6 0 X3 5 6 0 X3 5 6 0 X3 5 6 0 X4 3 4 0 X4 3 4 0 X4 3 2 0 X4 3 0 0 X4 3 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 optict> ## all partitions optict> summary(ocall <- opticut(spp ~ 1, strata=gr, dist="gaussian", comb="all")) Multivariate opticut results, comb = all, dist = gaussian Call: opticut.formula(formula = spp ~ 1, strata = gr, dist = "gaussian", comb = "all") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w Species2 X1+X3 +++ 0.9866 2.0 7.0 14.82 0.4995 Species1 X2+X3 +++ 0.8617 3.0 5.6 16.74 0.7035 Species3 X1 +++ 1.0000 0.5 17.8 54.26 1.0000 15 binary splits optict> summary(opticut(spp, strata=gr, dist="gaussian", comb="all")) # alternative Multivariate opticut results, comb = all, dist = gaussian Call: opticut.default(Y = spp, strata = gr, dist = "gaussian", comb = "all") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w Species2 X1+X3 +++ 0.9866 2.0 7.0 14.82 0.4995 Species1 X2+X3 +++ 0.8617 3.0 5.6 16.74 0.7035 Species3 X1 +++ 1.0000 0.5 17.8 54.26 1.0000 15 binary splits optict> ## rank based partitions optict> summary(ocrank <- opticut(spp ~ 1, strata=gr, dist="gaussian", comb="rank")) Multivariate opticut results, comb = rank, dist = gaussian Call: opticut.formula(formula = spp ~ 1, strata = gr, dist = "gaussian", comb = "rank") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w Species2 X1+X3 +++ 0.9866 2.0 7.0 14.82 0.4996 Species1 X2+X3 +++ 0.8617 3.0 5.6 16.74 0.7036 Species3 X1 +++ 1.0000 0.5 17.8 54.26 1.0000 4 binary splits optict> summary(opticut(spp, strata=gr, dist="gaussian", comb="rank")) # alternative Multivariate opticut results, comb = rank, dist = gaussian Call: opticut.default(Y = spp, strata = gr, dist = "gaussian", comb = "rank") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w Species2 X1+X3 +++ 0.9866 2.0 7.0 14.82 0.4996 Species1 X2+X3 +++ 0.8617 3.0 5.6 16.74 0.7036 Species3 X1 +++ 1.0000 0.5 17.8 54.26 1.0000 4 binary splits optict> ## --- Binomial optict> ## simulated binary data optict> set.seed(1234) optict> n <- 200 optict> x0 <- sample(1:4, n, TRUE) optict> x1 <- ifelse(x0 <= 2, 1, 0) optict> x2 <- rnorm(n, 0.5, 1) optict> p1 <- plogis(-0.5 + 2*x1 + -0.8*x2) optict> Y1 <- rbinom(n, 1, p1) optict> p2 <- plogis(-0.1 + 2*ifelse(x0==4,1,0) + -0.8*x2) optict> Y2 <- rbinom(n, 1, p2) optict> p3 <- plogis(-0.1 + -0.8*x2) optict> Y3 <- rbinom(n, 1, p3) optict> Y <- cbind(SPP1=Y1, SPP2=Y2, SPP3=Y3) optict> X <- model.matrix(~x2) optict> ## all partitions, single species optict> Z <- allComb(x0) optict> opticut1(Y1, X, Z, dist="binomial") Univariate opticut results, comb = all, dist = binomial I = 0.7139; w = 0.9989; H = 0.9978; logL_null = -132.3 Best supported models with logLR >= 2: assoc I mu0 mu1 logLR w 1+2 +++ 0.7139 0.3389 0.7544 16.807 9.989e-01 4 --- 0.6557 0.6465 0.2755 9.764 8.724e-04 2 +++ 0.5952 0.4594 0.7700 8.336 2.091e-04 1 ++ 0.4260 0.4986 0.7118 3.165 1.188e-06 3 -- 0.3744 0.5914 0.3972 2.500 6.111e-07 7 binary splits (2 models not shown) optict> ## rank based partitions, single species optict> opticut1(Y1, X, as.factor(x0), dist="binomial") Univariate opticut results, comb = rank, dist = binomial I = 0.7139; w = 0.9989; H = 0.9978; logL_null = -132.3 Best supported models with logLR >= 2: assoc I mu0 mu1 logLR w 1+2 +++ 0.7139 0.3389 0.7544 16.807 0.9989185 1+2+3 +++ 0.6557 0.2755 0.6465 9.764 0.0008724 2 +++ 0.5952 0.4594 0.7700 8.336 0.0002091 3 binary splits optict> ## all partitions, multiple species optict> (m1 <- opticut(Y ~ x2, strata=x0, dist="poisson", comb="all")) Multivariate opticut results, comb = all, dist = poisson Call: opticut.formula(formula = Y ~ x2, strata = x0, dist = "poisson", comb = "all") 3 species, 7 binary splits optict> summary(m1) Multivariate opticut results, comb = all, dist = poisson Call: opticut.formula(formula = Y ~ x2, strata = x0, dist = "poisson", comb = "all") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w SPP1 1+2 +++ 0.4137 0.3118 0.7518 8.340 0.9438 SPP2 4 ++ 0.3370 0.4550 0.9175 5.664 0.8333 7 binary splits 1 species not shown optict> ## show all species optict> summary(m1, cut=0) Multivariate opticut results, comb = all, dist = poisson Call: opticut.formula(formula = Y ~ x2, strata = x0, dist = "poisson", comb = "all") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w SPP1 1+2 +++ 0.4137 0.3118 0.7518 8.340 0.9438 SPP2 4 ++ 0.3370 0.4550 0.9175 5.664 0.8333 7 binary splits 1 species not shown optict> ## plot best partitions and indicator values optict> plot(m1) optict> ## model weights for all species optict> wplot(m1) optict> ## different ways of plotting weights for single species optict> wplot(m1$species[[1]]) optict> wplot(m1, which = 1) optict> ## rank based partitions, multiple species optict> summary(m2 <- opticut(Y ~ x2, strata=x0, dist="poisson", comb="rank")) Multivariate opticut results, comb = rank, dist = poisson Call: opticut.formula(formula = Y ~ x2, strata = x0, dist = "poisson", comb = "rank") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w SPP1 1+2 +++ 0.4137 0.3118 0.7518 8.340 0.9462 SPP2 4 ++ 0.3370 0.4550 0.9175 5.664 0.8687 3 binary splits 1 species not shown optict> ## subset results optict> summary(subset(m2, 1:2)) Multivariate opticut results, comb = rank, dist = poisson Call: opticut.formula(formula = Y ~ x2, strata = x0, dist = "poisson", comb = "rank") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w SPP1 1+2 +++ 0.4137 0.3118 0.7518 8.340 0.9462 SPP2 4 ++ 0.3370 0.4550 0.9175 5.664 0.8687 3 binary splits optict> ## best partition optict> head(bestpart(m2)) SPP1 SPP2 SPP3 4 0 1 1 4 0 1 1 2 1 0 0 2 1 0 0 1 1 0 1 4 0 1 1 optict> ## best model optict> mods <- bestmodel(m2) optict> mods $SPP1 Call: stats::glm(formula = Y ~ . - 1, family = Family, data = XX) Coefficients: `(Intercept)` Z1 x2 -1.1653 0.8801 -0.2521 Degrees of Freedom: 200 Total (i.e. Null); 197 Residual Null Deviance: 212 Residual Deviance: 119.3 AIC: 313.3 $SPP2 Call: stats::glm(formula = Y ~ . - 1, family = Family, data = XX) Coefficients: `(Intercept)` Z1 x2 -0.7875 0.7014 -0.3343 Degrees of Freedom: 200 Total (i.e. Null); 197 Residual Null Deviance: 200 Residual Deviance: 116.4 AIC: 322.4 $SPP3 Call: stats::glm(formula = Y ~ . - 1, family = Family, data = XX) Coefficients: `(Intercept)` Z1 x2 -1.1635 0.3235 -0.5092 Degrees of Freedom: 200 Total (i.e. Null); 197 Residual Null Deviance: 266 Residual Deviance: 126.8 AIC: 266.8 optict> ## explore further optict> confint(mods[[1]]) Waiting for profiling to be done... 2.5 % 97.5 % `(Intercept)` -1.5658931 -0.81050022 Z1 0.4493567 1.33725540 x2 -0.4472278 -0.05197309 optict> ## MLE and variance-covariance matrix (species 1) optict> getMLE(m2, which=1, vcov=TRUE) $coef `(Intercept)` Z1 x2 -1.1652948 0.8800652 -0.2520902 $vcov `(Intercept)` Z1 x2 `(Intercept)` 0.036867825 -0.0356672658 -0.0034330887 Z1 -0.035667266 0.0508649662 -0.0001321465 x2 -0.003433089 -0.0001321465 0.0101950604 $dist [1] "poisson" optict> ## fitted values optict> head(fitted(m2)) SPP1 SPP2 SPP3 1 0.2476256 0.6758558 0.2709819 2 0.3098497 0.9098139 0.4261917 3 0.6518694 0.3765488 0.2341692 4 0.7523114 0.4553558 0.3127900 5 0.8162386 0.5073633 0.5096759 6 0.2635700 0.7341598 0.3073866 optict> ## prediction for new data optict> head(predict(m2, gnew=x0, xnew=data.frame(x2=x2))) SPP1 SPP2 SPP3 1 0.2476256 0.6758558 0.2709819 2 0.3098497 0.9098139 0.4261917 3 0.6518694 0.3765488 0.2341692 4 0.7523114 0.4553558 0.3127900 5 0.8162386 0.5073633 0.5096759 6 0.2635700 0.7341598 0.3073866 optict> ## --- Zero-inflated Negative Binomial optict> ## dolina example optict> data(dolina) optict> ## stratum as ordinal optict> dolina$samp$stratum <- as.integer(dolina$samp$stratum) optict> ## filter species to speed up things a bit optict> Y <- dolina$xtab[,colSums(dolina$xtab > 0) >= 20] optict> ## opticut results, note the cloglog link function optict> dol <- opticut(Y ~ stratum + lmoist + method, data=dolina$samp, optict+ strata=dolina$samp$mhab, dist="zinb:cloglog") optict> summary(dol) Multivariate opticut results, comb = rank, dist = zinb:cloglog Call: opticut.formula(formula = Y ~ stratum + lmoist + method, data = dolina$samp, strata = dolina$samp$mhab, dist = "zinb:cloglog") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w cort DW+TL+RO +++ 1.0000 8.421e-10 0.20209 19.344 0.6425 hobv DW+RO +++ 0.9037 3.445e-02 0.68142 39.943 1.0000 bbip DW+RO +++ 0.7063 2.968e-01 1.72407 31.991 0.9995 vidi DW+RO +++ 0.4557 9.997e-01 2.67379 9.851 0.9955 clam DW+RO ++ 0.4209 1.414e-01 0.34702 3.556 0.4724 dper DW+RO +++ 0.4033 8.053e+00 18.93801 12.585 0.9897 ppyg LI+RO ++ 0.3438 4.854e+00 9.94144 5.172 0.9156 amin LI+RO ++ 0.2707 2.971e+00 5.17609 7.875 0.9581 mbor DW +++ 0.9350 1.666e-03 0.04959 19.852 0.9971 iiso RO +++ 0.8448 7.930e-02 0.94235 16.216 0.9994 ffau RO +++ 0.7776 7.499e-02 0.59934 25.852 1.0000 ctri RO ++ 0.6662 3.477e-01 1.73560 6.629 0.9111 estr RO +++ 0.6611 2.392e-01 1.17226 15.321 0.9952 pinc RO +++ 0.6255 3.555e-01 1.54316 14.571 0.5043 tuni RO +++ 0.5913 1.738e+00 6.76551 14.886 0.9997 pvic RO ++ 0.4202 4.646e-01 1.13801 6.200 0.5948 apur RO ++ 0.3766 5.196e+00 11.47426 4.975 0.6152 bcan TL +++ 0.7371 1.451e-01 0.95901 10.756 0.9647 3 binary splits 1 species not shown optict> ## vertical plot orientation optict> plot(dol, horizontal=FALSE, pos=1, upper=0.8) optict> ## parallel computing comparisons optict> library(parallel) optict> cl <- makeCluster(2) optict> ## sequential, all combinations (2^(K-1) - 1) optict> system.time(opticut(Y ~ stratum + lmoist + method, data=dolina$samp, optict+ strata=dolina$samp$mhab, dist="zinb", comb="all", cl=NULL)) user system elapsed 23.10 1.42 24.51 optict> ## sequential, rank based combinations (K - 1) optict> system.time(opticut(Y ~ stratum + lmoist + method, data=dolina$samp, optict+ strata=dolina$samp$mhab, dist="zinb", comb="rank", cl=NULL)) user system elapsed 14.78 1.00 15.78 optict> ## parallel, all combinations (2^(K-1) - 1) optict> system.time(opticut(Y ~ stratum + lmoist + method, data=dolina$samp, optict+ strata=dolina$samp$mhab, dist="zinb", comb="all", cl=cl)) user system elapsed 0.03 0.00 14.25 optict> ## parallel, rank based combinations (K - 1) optict> system.time(opticut(Y ~ stratum + lmoist + method, data=dolina$samp, optict+ strata=dolina$samp$mhab, dist="zinb", comb="rank", cl=cl)) user system elapsed 0.00 0.00 7.98 optict> stopCluster(cl) optict> ## --- Customizing distributions optict> ## we may want to expand the Zero-inflation component in a ZIP model optict> ## see how the return value needs to be structured optict> fun <- function(Y, X, linkinv, zi_term, ...) { optict+ X <- as.matrix(X) optict+ mod <- pscl::zeroinfl(Y ~ X-1 | zi_term, dist = "poisson", ...) optict+ list(coef=coef(mod), optict+ logLik=logLik(mod), optict+ linkinv=mod$linkinv) optict+ } optict> Xdol <- model.matrix(~ stratum + lmoist + method, data=dolina$samp) optict> ## this fits the null model (i.e. no partitions added) optict> fun(Y[,"amin"], Xdol, zi_term=dolina$samp$method) $coef count_X(Intercept) count_Xstratum count_Xlmoist count_XmethodT 1.30243481 -0.14266929 0.02460366 -0.64335270 zero_(Intercept) zero_zi_termT -0.56590398 0.61664601 $logLik 'log Lik.' -788.9897 (df=6) $linkinv function (eta) .Call(C_logit_linkinv, eta) optict> ## now we can use dist=fun optict> opticut1(Y[,"amin"], Xdol, Z=dolina$samp$mhab, optict+ dist=fun, zi_term=dolina$samp$method) Univariate opticut results, comb = rank, dist = fun I = 0.2652; w = 0.9849; H = 0.9703; logL_null = -789 Best supported models with logLR >= 2: assoc I mu0 mu1 logLR w LI+RO +++ 0.2652 0.7754 0.8560 15.370 9.849e-01 RO +++ 0.2358 0.7941 0.8618 11.189 1.505e-02 LI+DW+RO ++ 0.2036 0.7409 0.8121 5.328 4.286e-05 3 binary splits optict> dol2 <- opticut(Y ~ stratum + lmoist + method, data=dolina$samp, optict+ strata=dolina$samp$mhab, dist=fun, zi_term=dolina$samp$method) optict> summary(dol2) Multivariate opticut results, comb = rank, dist = fun Call: opticut.formula(formula = Y ~ stratum + lmoist + method, data = dolina$samp, strata = dolina$samp$mhab, dist = fun, zi_term = dolina$samp$method) Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w cort DW+TL+RO +++ 1.0000 7.247e-10 0.1302 19.933 0.6502 hobv DW+RO +++ 0.9117 9.715e-02 0.6996 49.368 1.0000 bbip DW+RO +++ 0.7037 3.155e-01 0.7260 45.369 1.0000 vidi DW+RO +++ 0.4519 6.570e-01 0.8353 19.411 1.0000 ctri LI+RO +++ 0.3951 5.638e-01 0.7488 27.661 1.0000 amin LI+RO +++ 0.2652 7.754e-01 0.8560 15.370 0.9849 ppyg LI+RO +++ 0.2518 8.781e-01 0.9234 41.925 1.0000 aacu TL+RO ++ 0.3969 5.375e-01 0.7291 6.650 0.9738 mbor DW +++ 0.9423 4.190e-01 0.9605 23.691 0.9803 clam DW ++ 0.4736 3.755e-01 0.6273 5.582 0.7947 iiso RO +++ 0.8562 1.757e-01 0.7334 17.176 0.9998 ffau RO +++ 0.7118 1.149e-01 0.4355 20.143 0.9475 pinc RO +++ 0.6322 2.465e-01 0.5921 14.673 0.5292 estr RO +++ 0.6316 2.133e-01 0.5457 14.059 0.9903 tuni RO +++ 0.4617 8.441e-01 0.9363 13.350 0.9996 apur RO +++ 0.4326 7.859e-01 0.9026 37.612 0.9844 pvic RO ++ 0.4205 5.499e-01 0.7497 7.165 0.4985 dper RO +++ 0.3616 9.152e-01 0.9584 37.337 0.5118 bcan TL +++ 0.7402 3.854e-01 0.8077 11.543 0.9844 3 binary splits optict> ## current collapse value optict> getOption("ocoptions")$collapse [1] "+" optict> ## factor levels sometimes need to be manipulated optict> ## before feeding it to opticut optict> fix_levels(as.factor(c("A b", "C d")), sep=":") [1] A b C d Levels: A b C d optict> fix_levels(as.factor(c("A b", "C d")), sep="") [1] A b C d Levels: A b C d ---------- opticut example: optilevels ---------- optlvl> ## --- Factor levels with Gaussian distribution optlvl> ## simple example from Legendre 2013 optlvl> ## Indicator Species: Computation, in optlvl> ## Encyclopedia of Biodiversity, Volume 4 optlvl> ## https://dx.doi.org/10.1016/B978-0-12-384719-5.00430-5 optlvl> gr <- as.factor(paste0("X", rep(1:5, each=5))) optlvl> spp <- cbind(Species1=rep(c(4,6,5,3,2), each=5), optlvl+ Species2=c(rep(c(8,4,6), each=5), 4,4,2, rep(0,7)), optlvl+ Species3=rep(c(18,2,0,0,0), each=5)) optlvl> rownames(spp) <- gr optlvl> ## must add some noise to avoid perfect fit optlvl> spp[6, "Species1"] <- 7 optlvl> spp[1, "Species3"] <- 17 optlvl> spp Species1 Species2 Species3 X1 4 8 17 X1 4 8 18 X1 4 8 18 X1 4 8 18 X1 4 8 18 X2 7 4 2 X2 6 4 2 X2 6 4 2 X2 6 4 2 X2 6 4 2 X3 5 6 0 X3 5 6 0 X3 5 6 0 X3 5 6 0 X3 5 6 0 X4 3 4 0 X4 3 4 0 X4 3 2 0 X4 3 0 0 X4 3 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 optlvl> ol <- optilevels(spp[,"Species3"], gr) optlvl> ol[c("delta", "coef", "rank", "levels")] $delta [1] 0 -4 NA NA NA $coef X1 X2 X3 X4 X5 [1,] 17.8 2 0 0 0 [2,] 17.8 2 0 0 0 [3,] NA NA NA NA NA [4,] NA NA NA NA NA [5,] NA NA NA NA NA $rank X1 X2 X3 X4 X5 [1,] 5 4 2 2 2 [2,] 3 2 1 1 1 [3,] NA NA NA NA NA [4,] NA NA NA NA NA [5,] NA NA NA NA NA $levels $levels[[1]] X1 X2 X3 X4 X5 "X1" "X2" "X3+X4+X5" "X3+X4+X5" "X3+X4+X5" $levels[[2]] X1 X2 X3 X4 X5 "X1" "X2" "X3+X4+X5" "X3+X4+X5" "X3+X4+X5" optlvl> ## get the final factor level optlvl> gr1 <- gr optlvl> levels(gr1) <- ol$level[[length(ol$level)]] optlvl> table(gr, gr1) gr1 gr X1 X2 X3+X4+X5 X1 5 0 0 X2 0 5 0 X3 0 0 5 X4 0 0 5 X5 0 0 5 optlvl> ## compare the models optlvl> o0 <- lm(spp[,"Species3"] ~ gr - 1) optlvl> o1 <- lm(spp[,"Species3"] ~ gr1 - 1) optlvl> data.frame(AIC(o0, o1), delta=AIC(o0, o1)$AIC - AIC(o0)) df AIC delta o0 6 -3.103558 0 o1 4 -7.103558 -4 optlvl> ol$delta # should be identical [1] 0 -4 NA NA NA optlvl> ## --- Proportions with Poisson distribution optlvl> ## simulation optlvl> set.seed(123) optlvl> n <- 500 # number of observations optlvl> k <- 5 # number of habitat types optlvl> b <- c(-1, -0.2, -0.2, 0.5, 1) optlvl> names(b) <- LETTERS[1:k] optlvl> x <- replicate(k, exp(rnorm(n))) optlvl> x <- x / rowSums(x) # proportions optlvl> X <- model.matrix(~.-1, data=data.frame(x)) optlvl> lam <- exp(drop(crossprod(t(X), b))) optlvl> y <- rpois(n, lam) optlvl> z <- optilevels(y, x, dist="poisson") optlvl> ## best model refit optlvl> bestmodel(z) Call: stats::glm(formula = Y ~ . - 1, family = Family, data = XX) Coefficients: X1 `X2+X3` X4 X5 -0.8941 -0.3098 0.5044 1.0391 Degrees of Freedom: 500 Total (i.e. Null); 496 Residual Null Deviance: 586.2 Residual Deviance: 548.4 AIC: 1297 optlvl> ## estimates optlvl> plogis(z$coef) X1 X2 X3 X4 X5 [1,] 0.2880184 0.3761074 0.4665556 0.6289233 0.737896 [2,] 0.2902740 0.4231689 0.4231689 0.6234846 0.738678 [3,] NA NA NA NA NA [4,] NA NA NA NA NA [5,] NA NA NA NA NA optlvl> plogis(b) A B C D E 0.2689414 0.4501660 0.4501660 0.6224593 0.7310586 optlvl> ## optimal classification optlvl> z$rank X1 X2 X3 X4 X5 [1,] 1 2 3 4 5 [2,] 1 2 2 3 4 [3,] NA NA NA NA NA [4,] NA NA NA NA NA [5,] NA NA NA NA NA optlvl> ## get the final matrix optlvl> x1 <- mefa4::groupSums(x, 2, z$levels[[length(z$levels)]]) optlvl> head(x) [,1] [,2] [,3] [,4] [,5] [1,] 0.2258750 0.21671095 0.14615367 0.17407228 0.23718812 [2,] 0.2256225 0.10514521 0.10039234 0.20888497 0.35995493 [3,] 0.4995209 0.29345552 0.10323013 0.04264214 0.06115135 [4,] 0.1150706 0.22726090 0.09395935 0.20075919 0.36294998 [5,] 0.1998835 0.03883339 0.01372342 0.53850776 0.20905194 [6,] 0.3048449 0.04987853 0.15529267 0.46033369 0.02965023 optlvl> head(x1) X1 X2+X3 X4 X5 [1,] 0.2258750 0.36286462 0.17407228 0.23718812 [2,] 0.2256225 0.20553755 0.20888497 0.35995493 [3,] 0.4995209 0.39668566 0.04264214 0.06115135 [4,] 0.1150706 0.32122025 0.20075919 0.36294998 [5,] 0.1998835 0.05255681 0.53850776 0.20905194 [6,] 0.3048449 0.20517120 0.46033369 0.02965023 optlvl> ## compare the models optlvl> m0 <- glm(y ~ x - 1, family="poisson") optlvl> m1 <- glm(y ~ x1 - 1, family="poisson") optlvl> data.frame(AIC(m0, m1), delta=AIC(m0, m1)$AIC - AIC(m0)) df AIC delta m0 5 1298.452 0.000000 m1 4 1297.381 -1.071501 optlvl> z$delta # should be identical [1] 0.000000 -1.071501 NA NA NA optlvl> ## dolina example with factor optlvl> data(dolina) optlvl> dolina$samp$stratum <- as.integer(dolina$samp$stratum) optlvl> y <- dolina$xtab[dolina$samp$method == "Q", "ppyg"] optlvl> x <- dolina$samp$mhab[dolina$samp$method == "Q"] optlvl> z <- scale(model.matrix(~ stratum + lmoist - 1, optlvl+ dolina$samp[dolina$samp$method == "Q",])) optlvl> ## without additional covariates optlvl> dol1 <- optilevels(y, x, z=NULL, dist="poisson") optlvl> dol1$rank LI DW TL RO [1,] 3 2 1 4 [2,] 2 1 1 3 [3,] NA NA NA NA [4,] NA NA NA NA optlvl> summary(bestmodel(dol1)) Call: stats::glm(formula = Y ~ . - 1, family = Family, data = XX) Coefficients: Estimate Std. Error z value Pr(>|z|) LI 2.16496 0.03201 67.64 <2e-16 *** `DW+TL` 1.52470 0.04762 32.02 <2e-16 *** RO 2.44596 0.04249 57.57 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 8332.5 on 256 degrees of freedom Residual deviance: 3481.6 on 253 degrees of freedom AIC: 4110 Number of Fisher Scoring iterations: 6 optlvl> ## with additional covariates optlvl> dol2 <- optilevels(y, x, z, dist="poisson") optlvl> dol2$rank LI DW TL RO [1,] 4 1 2 3 [2,] 3 1 1 2 [3,] 2 1 1 2 [4,] NA NA NA NA optlvl> summary(bestmodel(dol2)) Call: stats::glm(formula = Y ~ . - 1, family = Family, data = XX) Coefficients: Estimate Std. Error z value Pr(>|z|) `LI+RO` 2.24842 0.02698 83.324 < 2e-16 *** `DW+TL` 1.46044 0.04997 29.227 < 2e-16 *** stratum -0.08785 0.02514 -3.495 0.000475 *** lmoist 0.20390 0.02031 10.038 < 2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 (Dispersion parameter for poisson family taken to be 1) Null deviance: 8332.5 on 256 degrees of freedom Residual deviance: 3377.2 on 252 degrees of freedom AIC: 4007.6 Number of Fisher Scoring iterations: 6 optlvl> ## compare the two models optlvl> AIC(bestmodel(dol1), bestmodel(dol2)) df AIC bestmodel(dol1) 3 4110.004 bestmodel(dol2) 4 4007.575 ---------- opticut example: multicut ---------- multct> ## --- Gaussian multct> ## simple example from Legendre 2013 multct> ## Indicator Species: Computation, in multct> ## Encyclopedia of Biodiversity, Volume 4 multct> ## https://dx.doi.org/10.1016/B978-0-12-384719-5.00430-5 multct> gr <- as.factor(paste0("X", rep(1:5, each=5))) multct> spp <- cbind(Species1=rep(c(4,6,5,3,2), each=5), multct+ Species2=c(rep(c(8,4,6), each=5), 4,4,2, rep(0,7)), multct+ Species3=rep(c(18,2,0,0,0), each=5)) multct> rownames(spp) <- gr multct> ## must add some noise to avoid perfect fit multct> spp[6, "Species1"] <- 7 multct> spp[1, "Species3"] <- 17 multct> spp Species1 Species2 Species3 X1 4 8 17 X1 4 8 18 X1 4 8 18 X1 4 8 18 X1 4 8 18 X2 7 4 2 X2 6 4 2 X2 6 4 2 X2 6 4 2 X2 6 4 2 X3 5 6 0 X3 5 6 0 X3 5 6 0 X3 5 6 0 X3 5 6 0 X4 3 4 0 X4 3 4 0 X4 3 2 0 X4 3 0 0 X4 3 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 multct> ## negative expected values are not good multct> oco <- ocoptions(fix_fitted=TRUE) multct> summary(ocall <- multicut(spp ~ 1, strata=gr, dist="gaussian")) Multivariate multticut results, dist = gaussian Call: multicut.formula(formula = spp ~ 1, strata = gr, dist = "gaussian") Species models with logLR >= 2: split assoc I logLR Species2 X1+X3 +++ 0.9993 32.53 Species1 X2+X3 +++ 0.9705 52.87 Species3 X1 +++ 1.0000 91.55 multct> summary(multicut(spp, strata=gr, dist="gaussian")) # alternative Multivariate multticut results, dist = gaussian Call: multicut.default(Y = spp, strata = gr, dist = "gaussian") Species models with logLR >= 2: split assoc I logLR Species2 X1+X3 +++ 0.9993 32.53 Species1 X2+X3 +++ 0.9705 52.87 Species3 X1 +++ 1.0000 91.55 multct> ocoptions(oco) # reset options multct> ## --- Binomial multct> ## simulated binary data multct> set.seed(1234) multct> n <- 200 multct> x0 <- sample(1:4, n, TRUE) multct> x1 <- ifelse(x0 <= 2, 1, 0) multct> x2 <- rnorm(n, 0.5, 1) multct> p1 <- plogis(-0.5 + 2*x1 + -0.8*x2) multct> Y1 <- rbinom(n, 1, p1) multct> p2 <- plogis(-0.1 + 2*ifelse(x0==4,1,0) + -0.8*x2) multct> Y2 <- rbinom(n, 1, p2) multct> p3 <- plogis(-0.1 + -0.8*x2) multct> Y3 <- rbinom(n, 1, p3) multct> Y <- cbind(SPP1=Y1, SPP2=Y2, SPP3=Y3) multct> X <- model.matrix(~x2) multct> (m0 <- multicut1(Y1, X, as.factor(x0), dist="binomial")) Univariate multicut results, dist = binomial logLR = 17.77 (logL_null = -132.3) Expected values: 1 2 3 4 0.7260 0.7771 0.4076 0.2789 multct> lcplot(m0) multct> summary(m1 <- multicut(Y ~ x2, strata=x0, dist="poisson")) Multivariate multticut results, dist = poisson Call: multicut.formula(formula = Y ~ x2, strata = x0, dist = "poisson") Species models with logLR >= 2: split assoc I logLR SPP1 1+2 +++ 0.5048 8.894 SPP2 4 ++ 0.3644 5.902 1 species not shown multct> plot(m1) multct> ## subset results multct> summary(subset(m1, 1:2)) Multivariate multticut results, dist = poisson Call: multicut.formula(formula = Y ~ x2, strata = x0, dist = "poisson") Species models with logLR >= 2: split assoc I logLR SPP1 1+2 +++ 0.5048 8.894 SPP2 4 ++ 0.3644 5.902 multct> ## best partition multct> head(bestpart(m1)) SPP1 SPP2 SPP3 4 0 1 1 4 0 1 1 2 1 0 0 2 1 0 0 1 1 0 1 4 0 1 1 multct> ## best model multct> mods <- bestmodel(m1) multct> mods $SPP1 Call: stats::glm(formula = Y ~ . - 1, family = Family, data = XX) Coefficients: `(Intercept)` Z2 Z3 Z4 x2 -0.32787 0.07209 -0.65520 -1.03924 -0.25016 Degrees of Freedom: 200 Total (i.e. Null); 195 Residual Null Deviance: 212 Residual Deviance: 118.2 AIC: 316.2 $SPP2 Call: stats::glm(formula = Y ~ . - 1, family = Family, data = XX) Coefficients: `(Intercept)` Z2 Z3 Z4 x2 -0.6518 -0.1977 -0.1922 0.5662 -0.3358 Degrees of Freedom: 200 Total (i.e. Null); 195 Residual Null Deviance: 200 Residual Deviance: 115.9 AIC: 325.9 $SPP3 Call: stats::glm(formula = Y ~ . - 1, family = Family, data = XX) Coefficients: `(Intercept)` Z2 Z3 Z4 x2 -0.85213 -0.31133 -0.03570 0.06478 -0.51151 Degrees of Freedom: 200 Total (i.e. Null); 195 Residual Null Deviance: 266 Residual Deviance: 126.7 AIC: 270.7 multct> ## explore further multct> confint(mods[[1]]) Waiting for profiling to be done... 2.5 % 97.5 % `(Intercept)` -0.7365077 0.03360653 Z2 -0.4137503 0.57263652 Z3 -1.2951680 -0.04816757 Z4 -1.7561319 -0.38232824 x2 -0.4438465 -0.05168407 multct> ## MLE and variance-covariance matrix (species 1) multct> getMLE(m1, which = 1, vcov=TRUE) $coef `(Intercept)` Z2 Z3 Z4 x2 -0.32787491 0.07209324 -0.65519918 -1.03924068 -0.25016397 $vcov `(Intercept)` Z2 Z3 Z4 `(Intercept)` 0.038311276 -3.706629e-02 -0.03722583 -0.0369859922 Z2 -0.037066290 6.267864e-02 0.03704133 0.0370358101 Z3 -0.037225827 3.704133e-02 0.09956289 0.0370294262 Z4 -0.036985992 3.703581e-02 0.03702943 0.1203610295 x2 -0.003575346 8.222807e-05 0.00052985 -0.0001430668 x2 `(Intercept)` -3.575346e-03 Z2 8.222807e-05 Z3 5.298500e-04 Z4 -1.430668e-04 x2 1.003152e-02 $dist [1] "poisson" multct> ## fitted values multct> head(fitted(m1)) SPP1 SPP2 SPP3 1 0.2027267 0.6752329 0.2850377 2 0.2532343 0.9101916 0.4492035 3 0.6720834 0.3536149 0.2338745 4 0.7747911 0.4279878 0.3127995 5 0.7816706 0.5813804 0.5038994 6 0.2156772 0.7337562 0.3235123 multct> ## prediction for new data multct> head(predict(m1, gnew=x0, xnew=data.frame(x2=x2))) SPP1 SPP2 SPP3 1 0.2027267 0.6752329 0.2850377 2 0.2532343 0.9101916 0.4492035 3 0.6720834 0.3536149 0.2338745 4 0.7747911 0.4279878 0.3127995 5 0.7816706 0.5813804 0.5038994 6 0.2156772 0.7337562 0.3235123 multct> ## --- Zero-inflated Negative Binomial multct> ## dolina example multct> data(dolina) multct> ## stratum as ordinal multct> dolina$samp$stratum <- as.integer(dolina$samp$stratum) multct> ## filter species to speed up things a bit multct> Y <- dolina$xtab[,colSums(dolina$xtab > 0) >= 20] multct> ## opticut results, note the cloglog link function multct> dol <- multicut(Y ~ stratum + lmoist + method, data=dolina$samp, multct+ strata=dolina$samp$mhab, dist="zinb:cloglog") multct> summary(dol) Multivariate multticut results, dist = zinb:cloglog Call: multicut.formula(formula = Y ~ stratum + lmoist + method, data = dolina$samp, strata = dolina$samp$mhab, dist = "zinb:cloglog") Species models with logLR >= 2: split assoc I logLR hobv DW+RO +++ 0.9302 41.753 bbip DW+RO +++ 0.8204 38.877 pinc DW+RO +++ 0.7474 18.724 clam DW+RO ++ 0.5769 4.589 vidi DW+RO +++ 0.5371 10.329 pvic DW+RO ++ 0.5128 7.786 dper DW+RO +++ 0.4695 13.555 ppyg LI+RO ++ 0.3695 5.209 amin LI+RO +++ 0.3264 8.692 cort TL+RO +++ 1.0000 26.996 mbor DW +++ 1.0000 25.139 iiso RO +++ 0.8720 16.352 ffau RO +++ 0.8698 28.280 estr RO +++ 0.7096 15.917 ctri RO ++ 0.7088 6.916 tuni RO +++ 0.6401 15.430 apur RO ++ 0.5724 7.090 bcan TL +++ 0.8914 12.414 1 species not shown multct> ## vertical plot orientation multct> plot(dol, horizontal=FALSE, pos=1, upper=0.8) multct> ## parallel multct> library(parallel) multct> cl <- makeCluster(2) multct> multicut(Y ~ stratum + lmoist + method, data=dolina$samp, multct+ strata=dolina$samp$mhab, dist="zip",cl=cl) Multivariate multicut results, dist = zip Call: multicut.formula(formula = Y ~ stratum + lmoist + method, data = dolina$samp, strata = dolina$samp$mhab, dist = "zip", cl = cl) 19 species multct> stopCluster(cl) multct> ## --- Customizing distributions multct> ## we may want to expand the Zero-inflation component in a ZIP model multct> ## see how the return value needs to be structured multct> fun <- function(Y, X, linkinv, zi_term, ...) { multct+ X <- as.matrix(X) multct+ mod <- pscl::zeroinfl(Y ~ X-1 | zi_term, dist = "poisson", ...) multct+ list(coef=coef(mod), multct+ logLik=logLik(mod), multct+ linkinv=mod$linkinv) multct+ } multct> Xdol <- model.matrix(~ stratum + lmoist + method, data=dolina$samp) multct> ## this fits the null model (i.e. no partitions added) multct> fun(Y[,"amin"], Xdol, zi_term=dolina$samp$method) $coef count_X(Intercept) count_Xstratum count_Xlmoist count_XmethodT 1.30243481 -0.14266929 0.02460366 -0.64335270 zero_(Intercept) zero_zi_termT -0.56590398 0.61664601 $logLik 'log Lik.' -788.9897 (df=6) $linkinv function (eta) .Call(C_logit_linkinv, eta) multct> ## now we can use dist=fun multct> multicut1(Y[,"amin"], Xdol, Z=dolina$samp$mhab, multct+ dist=fun, zi_term=dolina$samp$method) Univariate multicut results, dist = fun logLR = 17.9 (logL_null = -789) Expected values: LI DW TL RO 0.8460 0.7864 0.7782 0.8767 multct> dol2 <- multicut(Y ~ stratum + lmoist + method, data=dolina$samp, multct+ strata=dolina$samp$mhab, dist=fun, zi_term=dolina$samp$method) multct> summary(dol2) Multivariate multticut results, dist = fun Call: multicut.formula(formula = Y ~ stratum + lmoist + method, data = dolina$samp, strata = dolina$samp$mhab, dist = fun, zi_term = dolina$samp$method) Species models with logLR >= 2: split assoc I logLR mbor DW+TL+RO +++ 1.0000 28.969 bcan DW+TL+RO +++ 0.8922 13.148 hobv DW+RO +++ 0.9351 51.488 ffau DW+RO +++ 0.8436 24.161 bbip DW+RO +++ 0.8080 54.198 pinc DW+RO +++ 0.7532 18.820 clam DW+RO ++ 0.6069 6.393 vidi DW+RO +++ 0.5604 20.579 pvic DW+RO +++ 0.5245 9.294 dper DW+RO +++ 0.5006 50.815 apur LI+RO +++ 0.6789 49.794 ctri LI+RO +++ 0.6475 32.208 amin LI+RO +++ 0.3393 17.897 ppyg LI+RO +++ 0.2812 43.916 cort TL+RO +++ 1.0000 27.399 aacu TL+RO ++ 0.4452 6.758 iiso RO +++ 0.8738 17.276 estr RO +++ 0.6762 14.571 tuni RO +++ 0.5201 13.684 ---------- opticut example: sindex ---------- ---------- opticut example: lorenz ---------- lorenz> set.seed(1) lorenz> x <- c(rexp(100, 10), rexp(200, 1)) lorenz> l <- lorenz(x) lorenz> head(l) p L x [1,] 0.000000000 0.000000e+00 0.000000000 [2,] 0.003333333 8.256055e-06 0.001700975 [3,] 0.006666667 2.634516e-05 0.003726853 [4,] 0.010000000 4.819482e-05 0.004501631 [5,] 0.013333333 7.346108e-05 0.005205545 [6,] 0.016666667 1.008080e-04 0.005634216 lorenz> tail(l) p L x [296,] 0.9833333 0.8954549 3.173340 [297,] 0.9866667 0.9112748 3.259337 [298,] 0.9900000 0.9279782 3.441357 [299,] 0.9933333 0.9470105 3.921174 [300,] 0.9966667 0.9692697 4.586020 [301,] 1.0000000 1.0000000 6.331284 lorenz> summary(l) Lorenz curve summary x[t] p[t] L[t] G S J 0.6827 0.6433 0.1934 0.5909 0.8367 0.4500 lorenz> summary(unclass(l)) p L x Min. :0.00 Min. :0.00000 Min. :0.00000 1st Qu.:0.25 1st Qu.:0.01708 1st Qu.:0.09968 Median :0.50 Median :0.08771 Median :0.36917 Mean :0.50 Mean :0.20551 Mean :0.68448 3rd Qu.:0.75 3rd Qu.:0.32198 3rd Qu.:1.01262 Max. :1.00 Max. :1.00000 Max. :6.33128 lorenz> (q <- c(0.05, 0.5, 0.95)) [1] 0.05 0.50 0.95 lorenz> (p_i <- quantile(l, probs=q, type="p")) 5% 50% 95% 0.02035104 0.36917103 2.25115270 lorenz> iquantile(l, values=p_i, type="p") 0.02035104 0.36917103 2.25115270 0.05 0.50 0.95 lorenz> (p_i <- quantile(l, probs=q, type="L")) 5% 50% 95% 0.2198279 1.4773854 4.5860202 lorenz> iquantile(l, values=p_i, type="L") 0.2198279 1.4773854 4.5860202 0.05039184 0.50430635 0.96926972 lorenz> op <- par(mfrow=c(2,1)) lorenz> plot(l, lwd=2, tangent=2, h=3, v=4) lorenz> abline(0, 1, lty=2, col="grey") lorenz> abline(1, -1, lty=2, col="grey") lorenz> plot(l, type="x", lwd=2, h=3, v=4) lorenz> par(op) lorenz> ## Lorenz-tangent approach to binarize a multi-level problem lorenz> n <- 100 lorenz> g <- as.factor(sort(sample(LETTERS[1:4], n, replace=TRUE, prob=4:1))) lorenz> x <- rpois(n, exp(as.integer(g))) lorenz> mu <- aggregate(x, list(g), mean) lorenz> (l <- lorenz(mu$x, table(g))) p L x 0.00 0.00000000 0.000000 A 0.41 0.07932011 2.731707 B 0.70 0.22592068 7.137931 C 0.86 0.45467422 20.187500 D 1.00 1.00000000 55.000000 attr(,"summary") t x[t] p[t] L[t] G S J 3.0000000 7.1379310 0.7000000 0.2259207 0.3700425 0.9259207 0.4740793 attr(,"class") [1] "lorenz" "matrix" lorenz> (s <- summary(l)) Lorenz curve summary x[t] p[t] L[t] G S J 7.1379 0.7000 0.2259 0.3700 0.9259 0.4741 lorenz> plot(l) lorenz> abline(0, 1, lty=2) lorenz> lines(rep(s["p[t]"], 2), c(s["p[t]"], s["L[t]"]), col=2) ---------- opticut example: beta2i ---------- beta2i> x <- seq(-5, 5, 0.1) beta2i> Col <- occolors(c("red", "blue"))(10) beta2i> plot(x, beta2i(x), type = "n") beta2i> s <- seq(1, 0.1, -0.1) beta2i> for (i in 1:10) { beta2i+ lines(x, beta2i(x, scale = s[i]), col = Col[i]) beta2i+ text(1.5 - 0.2, beta2i(1.5, scale = s[i]), s[i], col = Col[i]) beta2i+ } ---------- opticut example: allComb ---------- allCmb> kComb(k = 2) [,1] [1,] 1 [2,] 0 allCmb> kComb(k = 3) [,1] [,2] [,3] [1,] 1 0 0 [2,] 0 1 0 [3,] 0 0 1 allCmb> kComb(k = 4) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [1,] 1 0 0 0 1 1 1 [2,] 0 1 0 0 1 0 0 [3,] 0 0 1 0 0 1 0 [4,] 0 0 0 1 0 0 1 allCmb> ## finding all combinations allCmb> (f <- rep(LETTERS[1:4], each=2)) [1] "A" "A" "B" "B" "C" "C" "D" "D" allCmb> (mc <- allComb(f, collapse = "_")) A B C D A_B A_C A_D A 1 0 0 0 1 1 1 A 1 0 0 0 1 1 1 B 0 1 0 0 1 0 0 B 0 1 0 0 1 0 0 C 0 0 1 0 0 1 0 C 0 0 1 0 0 1 0 D 0 0 0 1 0 0 1 D 0 0 0 1 0 0 1 attr(,"collapse") [1] "_" attr(,"comb") [1] "all" allCmb> ## checking for complementary entries allCmb> checkComb(mc) # TRUE [1] TRUE attr(,"comp") i j attr(,"same") i j allCmb> ## adding complementary entries to the matrix allCmb> mc2 <- cbind(z = 1 - mc[,1], mc[,c(1:ncol(mc), 1)]) allCmb> colnames(mc2) <- 1:ncol(mc2) allCmb> mc2 1 2 3 4 5 6 7 8 9 A 0 1 0 0 0 1 1 1 1 A 0 1 0 0 0 1 1 1 1 B 1 0 1 0 0 1 0 0 0 B 1 0 1 0 0 1 0 0 0 C 1 0 0 1 0 0 1 0 0 C 1 0 0 1 0 0 1 0 0 D 1 0 0 0 1 0 0 1 0 D 1 0 0 0 1 0 0 1 0 allCmb> checkComb(mc2) # FALSE [1] FALSE attr(,"comp") i j [1,] 1 2 [2,] 1 9 attr(,"same") i j [1,] 9 2 ---------- opticut example: rankComb ---------- rnkCmb> ## simulate some data rnkCmb> set.seed(1234) rnkCmb> n <- 200 rnkCmb> x0 <- sample(1:4, n, TRUE) rnkCmb> x1 <- ifelse(x0 %in% 1:2, 1, 0) rnkCmb> x2 <- rnorm(n, 0.5, 1) rnkCmb> lam <- exp(0.5 + 0.5*x1 + -0.2*x2) rnkCmb> Y <- rpois(n, lam) rnkCmb> ## binary partitions rnkCmb> head(rc <- rankComb(Y, model.matrix(~x2), as.factor(x0), dist="poisson")) 2 1+2 1+2+4 4 0 0 1 4 0 0 1 2 1 1 1 2 1 1 1 1 0 1 1 4 0 0 1 rnkCmb> attr(rc, "est") # expected values in factor levels 1 2 3 4 2.767840 2.792267 1.563285 1.757870 rnkCmb> aggregate(exp(0.5 + 0.5*x1), list(x0=x0), mean) # true values x0 x 1 1 2.718282 2 2 2.718282 3 3 1.648721 4 4 1.648721 rnkCmb> ## simple example rnkCmb> oComb(1:4, "+") 1 1+2 1+2+3 1 1 1 1 2 0 1 1 3 0 0 1 4 0 0 0 rnkCmb> ## using estimates rnkCmb> oComb(attr(rc, "est")) 3 3+4 1+3+4 1 0 0 1 2 0 0 0 3 1 1 1 4 0 1 1 ---------- opticut example: uncertainty ---------- uncrtn> set.seed(2345) uncrtn> n <- 50 uncrtn> x0 <- sample(1:4, n, TRUE) uncrtn> x1 <- ifelse(x0 %in% 1:2, 1, 0) uncrtn> x2 <- rnorm(n, 0.5, 1) uncrtn> x3 <- ifelse(x0 %in% 2:4, 1, 0) uncrtn> lam1 <- exp(0.5 + 1*x1 + -0.2*x2) uncrtn> Y1 <- rpois(n, lam1) uncrtn> lam2 <- exp(1 + 0.5*x3) uncrtn> Y2 <- rpois(n, lam2) uncrtn> Y3 <- rpois(n, exp(0)) uncrtn> Y <- cbind(Spp1=Y1, Spp2=Y2, Spp3=Y3) uncrtn> oc <- opticut(Y ~ x2, strata=x0, dist="poisson", comb="rank") uncrtn> ## asymptotic confidence intervals uncrtn> (uc1 <- uncertainty(oc, type="asymp", B=999)) Multivariate multicut uncertainty results, type = asymp, B = 999 uncrtn> summary(uc1) Multivariate multicut uncertainty results type = asymp, B = 999, level = 0.95 split R I Lower Upper Spp1 1+2 1 0.3756 0.205644 0.5150 Spp3 1+2+3 1 0.3438 0.016874 0.6984 Spp2 2+3+4 1 0.1354 0.007452 0.2944 uncrtn> ## bootstrap-based confidence intervals uncrtn> (uc2 <- uncertainty(oc, type="boot", B=19)) Multivariate multicut uncertainty results, type = boot, B = 19 uncrtn> summary(uc2) Multivariate multicut uncertainty results type = boot, B = 19, level = 0.95 split R I Lower Upper Spp1 1+2 1 0.3688 0.266965 0.4710 Spp3 1+2+3 1 0.3689 0.023399 0.8796 Spp2 2+3+4 1 0.1354 0.003183 0.2706 uncrtn> ## use user-supplied indices uncrtn> ## multi-model bootstrap based uncertainties uncrtn> B <- replicate(25, sample.int(n, replace=TRUE)) uncrtn> check_strata(oc, B) # check representation [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE attr(,"nx") [1] 4 attr(,"nmat") [1] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 uncrtn> (uc3 <- uncertainty(oc, type="multi", B=B)) Multivariate multicut uncertainty results, type = multi, B = 25 uncrtn> summary(uc3) Multivariate multicut uncertainty results type = multi, B = 25, level = 0.95 split R I Lower Upper Spp1 1+2 0.7308 0.3980 0.27604 0.552 Spp3 1+2+3 0.3846 0.5624 0.21287 1.000 Spp2 2+3+4 0.4615 0.1760 0.09648 0.266 uncrtn> ## best partitions: uncrtn> ## selection frequencies for strata and species uncrtn> bestpart(uc3) Spp1 Spp2 Spp3 1 1.0000000 0.03846154 0.6538462 2 1.0000000 0.73076923 0.6538462 3 0.0000000 0.92307692 0.8461538 4 0.2692308 0.65384615 0.0000000 uncrtn> heatmap(bestpart(uc3), scale="none", col=occolors()(25)) uncrtn> ## bootstrap smoothed predictions per strata uncrtn> bsmooth(uc3) Spp1 Spp2 Spp3 1 4.242728 3.413736 0.6399826 2 4.242728 4.330780 0.6884190 3 1.808303 4.583842 0.7642625 4 2.429689 4.188160 0.3545554 uncrtn> heatmap(bestpart(uc3), scale="none", col=occolors()(25)) uncrtn> ## individual species results uncrtn> uc3$uncertainty $Spp1 Univariate multicut uncertainty results, type = multi, B = 25 best I mu0 mu1 Length:26 Min. :0.2675 Min. :1.159 Min. :3.436 Class :character 1st Qu.:0.3360 1st Qu.:1.502 1st Qu.:3.974 Mode :character Median :0.4088 Median :1.777 Median :4.184 Mean :0.4065 Mean :1.808 Mean :4.243 3rd Qu.:0.4719 3rd Qu.:2.108 3rd Qu.:4.405 Max. :0.5892 Max. :2.764 Max. :5.502 $Spp2 Univariate multicut uncertainty results, type = multi, B = 25 best I mu0 mu1 Length:26 Min. :0.0258 Min. :2.312 Min. :3.844 Class :character 1st Qu.:0.1105 1st Qu.:3.097 1st Qu.:4.400 Mode :character Median :0.1344 Median :3.359 Median :4.659 Mean :0.1559 Mean :3.394 Mean :4.627 3rd Qu.:0.2209 3rd Qu.:3.724 3rd Qu.:4.944 Max. :0.2813 Max. :4.225 Max. :5.321 $Spp3 Univariate multicut uncertainty results, type = multi, B = 25 best I mu0 mu1 Length:26 Min. :0.1554 Min. :1.000e-08 Min. :0.6147 Class :character 1st Qu.:0.2951 1st Qu.:2.790e-01 1st Qu.:0.6926 Mode :character Median :0.3702 Median :3.674e-01 Median :0.7865 Mean :0.4280 Mean :3.546e-01 Mean :0.8334 3rd Qu.:0.4884 3rd Qu.:4.624e-01 3rd Qu.:0.9216 Max. :1.0000 Max. :6.937e-01 Max. :1.7098 uncrtn> bestpart(uc3$uncertainty[[1]]) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 0 0 0 0 0 0 0 0 0 0 1 0 0 1 [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 3 0 0 0 0 0 0 0 0 0 0 0 0 4 0 1 0 1 0 1 1 0 0 0 1 0 uncrtn> bsmooth(uc3$uncertainty[[1]]) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] 1 4.353309 4.123784 4.796411 5.502443 4.966979 3.956581 4.263636 4.066393 2 4.353309 4.123784 4.796411 5.502443 4.966979 3.956581 4.263636 4.066393 3 1.968847 1.752355 2.048311 1.802075 2.764274 1.307073 2.327590 2.026677 4 1.968847 1.752355 2.048311 1.802075 2.764274 1.307073 2.327590 2.026677 [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] 1 4.547626 4.154616 3.436177 4.320181 4.421835 3.510415 4.258540 3.983873 2 4.547626 4.154616 3.436177 4.320181 4.421835 3.510415 4.258540 3.983873 3 2.368197 1.727303 1.399548 2.128446 1.869369 1.312151 1.990268 1.277240 4 2.368197 1.727303 3.436177 2.128446 1.869369 3.510415 1.990268 3.983873 [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] 1 3.906009 3.714836 3.921941 4.242182 4.214182 4.605661 3.970598 4.998947 2 3.906009 3.714836 3.921941 4.242182 4.214182 4.605661 3.970598 4.998947 3 2.251073 2.147011 1.628562 2.208800 1.491471 1.532036 1.559550 1.292377 4 2.251073 3.714836 1.628562 4.242182 4.214182 1.532036 1.559550 1.292377 [,25] [,26] 1 4.049467 4.024312 2 4.049467 4.024312 3 1.158853 1.676410 4 4.049467 1.676410 uncrtn> ## block bootstrap uncrtn> block_fun <- function() uncrtn+ unlist(lapply(unique(x0), function(z) if (sum(x0==z) < 2) uncrtn+ which(x0==z) else sample(which(x0==z), sum(x0==z), replace=TRUE))) uncrtn> B <- replicate(25, block_fun()) uncrtn> check_strata(oc, B) # check representation [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE attr(,"nx") [1] 4 attr(,"nmat") [1] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 uncrtn> summary(uncertainty(oc, type="multi", B=B)) Multivariate multicut uncertainty results type = multi, B = 25, level = 0.95 split R I Lower Upper Spp1 1+2 0.8077 0.3841 0.2339 0.5036 Spp3 1+2+3 0.4231 0.4046 0.2604 0.5733 Spp2 2+3+4 0.5000 0.2080 0.1272 0.3285 uncrtn> ## jackknife uncrtn> B <- sapply(1:n, function(i) which((1:n) != i)) uncrtn> check_strata(oc, B) # check representation [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE [46] TRUE TRUE TRUE TRUE TRUE attr(,"nx") [1] 4 attr(,"nmat") [1] 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 [39] 4 4 4 4 4 4 4 4 4 4 4 4 uncrtn> summary(uncertainty(oc, type="multi", B=B)) Multivariate multicut uncertainty results type = multi, B = 50, level = 0.95 split R I Lower Upper Spp1 1+2 1.0000 0.3772 0.3604 0.3995 Spp3 1+2+3 0.6471 0.3526 0.3227 0.4232 Spp2 2+3+4 1.0000 0.1328 0.1009 0.1714 uncrtn> ## multicut based uncertainty uncrtn> mc <- multicut(Y ~ x2, strata=x0, dist="poisson") uncrtn> ## asymptotic confidence intervals uncrtn> (muc1 <- uncertainty(mc, type="asymp", B=999)) Multivariate multicut uncertainty results, type = asymp, B = 999 uncrtn> summary(muc1) Multivariate multicut uncertainty results type = asymp, B = 999, level = 0.95 split R I Lower Upper Spp1 1+2 0.916 0.4743 0.30552 0.6334 Spp3 2+3 0.303 0.4930 0.19709 0.7854 Spp2 2+3 0.212 0.2195 0.08524 0.3912 uncrtn> bestpart(muc1) Spp1 Spp2 Spp3 1 0.987 0.085 0.376 2 0.999 0.580 0.469 3 0.001 0.776 0.888 4 0.073 0.561 0.070 uncrtn> ## bootstrap-based confidence intervals uncrtn> (muc2 <- uncertainty(mc, type="boot", B=19)) Multivariate multicut uncertainty results, type = boot, B = 19 uncrtn> summary(muc2) Multivariate multicut uncertainty results type = boot, B = 19, level = 0.95 split R I Lower Upper Spp1 1+2 0.9 0.4879 0.3619 0.6358 Spp3 2+3 0.4 0.5490 0.4102 0.9272 Spp2 2+3+4 0.4 0.2456 0.1542 0.3829 uncrtn> bestpart(muc2) Spp1 Spp2 Spp3 1 0.95 0.05 0.25 2 1.00 0.80 0.45 3 0.00 0.85 0.90 4 0.05 0.55 0.05 uncrtn> ## dolina example uncrtn> data(dolina) uncrtn> ## stratum as ordinal uncrtn> dolina$samp$stratum <- as.integer(dolina$samp$stratum) uncrtn> ## filter species to speed up things a bit uncrtn> Y <- ifelse(dolina$xtab[,colSums(dolina$xtab > 0) >= 20] > 0, 1, 0) uncrtn> ## opticut results, note the cloglog link function uncrtn> dol <- opticut(Y ~ stratum + lmoist + method, data=dolina$samp, uncrtn+ strata=dolina$samp$mhab, dist="binomial:cloglog") uncrtn> ## parallel computing for uncertainty uncrtn> library(parallel) uncrtn> cl <- makeCluster(2) uncrtn> ucdol <- uncertainty(dol, type="multi", B=25, cl=cl) uncrtn> stopCluster(cl) uncrtn> bestpart(ucdol) aacu amin apur bbip bcan clam cort LI 0.92307692 0.50000000 0.03846154 0.0000000 0.0000000 0.07692308 0.0000000 DW 0.46153846 0.07692308 0.92307692 0.6923077 0.4615385 0.80769231 0.5384615 TL 0.03846154 0.03846154 0.07692308 0.0000000 1.0000000 0.50000000 1.0000000 RO 0.88461538 1.00000000 0.96153846 1.0000000 0.1923077 0.73076923 0.8076923 ctri dper estr ffau hobv iiso mbor pinc LI 0.07692308 0.00000000 0.0000000 0 0 0.00000000 0.00000000 0.00000000 DW 0.61538462 0.92307692 0.0000000 0 1 0.03846154 1.00000000 0.42307692 TL 0.03846154 0.07692308 0.1153846 0 0 0.03846154 0.03846154 0.03846154 RO 1.00000000 0.96153846 1.0000000 1 1 1.00000000 0.30769231 1.00000000 ppyg pvic tuni vidi LI 0.92307692 0.03846154 0.03846154 0.2692308 DW 0.07692308 0.61538462 0.00000000 1.0000000 TL 0.19230769 0.15384615 0.00000000 0.0000000 RO 1.00000000 0.96153846 1.00000000 1.0000000 uncrtn> heatmap(t(bestpart(ucdol)), scale="none", col=occolors()(25), uncrtn+ distfun=function(x) dist(x, "manhattan")) uncrtn> ## See how indicator value changes with different partitions uncrtn> ## (and why it is the wrong metric to use in this calse) uncrtn> with(ucdol$uncertainty[["pvic"]], uncrtn+ boxplot(I ~ best, col="gold", ylab="Indicator value")) uncrtn> ## What we should calculate is the bootstrap smoothed mean of the uncrtn> ## expected value and its confidence intervals uncrtn> bs <- bsmooth(ucdol$uncertainty[["pvic"]]) uncrtn> boxplot(t(bs), ylab="Expected value") uncrtn> cbind(Mean=rowMeans(bs), t(apply(bs, 1, quantile, probs=c(0.025, 0.975)))) Mean 2.5% 97.5% LI 0.2001777 0.09004651 0.3267507 DW 0.2759182 0.15710556 0.3714238 TL 0.2080179 0.11342479 0.3181963 RO 0.3635844 0.14240696 0.6655275 uncrtn> ## A more interesting simulated example for bootstrap smoothing uncrtn> ## and comparing opticut vs. multicut uncrtn> set.seed(1) uncrtn> n <- 2000 uncrtn> x <- sort(runif(n, -8, 8)) uncrtn> p <- plogis(0.5 + -0.1 * x + -0.2 * x^2) uncrtn> y <- rbinom(n, 1, p) uncrtn> d <- diff(range(x))/10 uncrtn> br <- seq(min(x), max(x), by=d) uncrtn> g <- cut(x, br, include.lowest=TRUE) uncrtn> levels(g) <- LETTERS[1:nlevels(g)] uncrtn> o <- opticut(y ~ 1, strata=g, dist="binomial") uncrtn> m <- multicut(y ~ 1, strata=g, dist="binomial") uncrtn> library(parallel) uncrtn> cl <- makeCluster(2) uncrtn> uo <- uncertainty(o, type="multi", B=99, cl=cl) uncrtn> um <- uncertainty(m, type="boot", B=99, cl=cl) uncrtn> stopCluster(cl) uncrtn> ## bootstrap average for opticut uncrtn> bs <- bsmooth(uo$uncertainty[[1]]) uncrtn> stat <- cbind(Mean=rowMeans(bs), uncrtn+ t(apply(bs, 1, quantile, probs=c(0.025, 0.975)))) uncrtn> ## bootstrap average for multicut uncrtn> bsm <- as.matrix(um$uncertainty[[1]][,-(1:2)]) uncrtn> statm <- cbind(Mean=colMeans(bsm), uncrtn+ t(apply(bsm, 2, quantile, probs=c(0.025, 0.975)))) uncrtn> op <- par(mfrow=c(2,1)) uncrtn> plot(p ~ x, type="l", ylim=c(0,1), main="Binary partitions (opticut)") uncrtn> abline(v=br, col="grey", lty=3) uncrtn> lines(br[-1]-0.5*d, stat[,1], col=4) uncrtn> lines(br[-1]-0.5*d, stat[,2], col=4, lty=2) uncrtn> lines(br[-1]-0.5*d, stat[,3], col=4, lty=2) uncrtn> lines(br[-1]-0.5*d, bs[,1], col=2) uncrtn> legend("topright", bty="n", lty=c(1,1,2,1), col=c(1,4,4,2), uncrtn+ legend=c("True response","bsmooth","0.95 CI","Best partition")) uncrtn> plot(p ~ x, type="l", ylim=c(0,1), main="Multi-level model (multicut)") uncrtn> abline(v=br, col="grey", lty=3) uncrtn> lines(br[-1]-0.5*d, statm[,1], col=4) uncrtn> lines(br[-1]-0.5*d, statm[,2], col=4, lty=2) uncrtn> lines(br[-1]-0.5*d, statm[,3], col=4, lty=2) uncrtn> legend("topright", bty="n", lty=c(1,1,2), col=c(1,4,4), uncrtn+ legend=c("True response","bsmooth","0.95 CI")) uncrtn> par(op) ---------- opticut example: occolors ---------- occlrs> ## using palettes occlrs> plot(1:100, rep(2, 100), pch = 15, occlrs+ ylim = c(0, 21), axes = FALSE, ann = FALSE, occlrs+ col = occolors()(100)) # default 'bg' occlrs> text(50, 1, "theme = 'br'") occlrs> points(1:100, rep(5, 100), pch = 15, occlrs+ col=occolors("gr")(100)) occlrs> text(50, 4, "theme = 'gr'") occlrs> points(1:100, rep(8, 100), pch = 15, occlrs+ col=occolors("bw")(100)) occlrs> text(50, 7, "theme = 'bw'") occlrs> points(1:100, rep(11, 100), pch = 15, occlrs+ col=occolors(terrain.colors)(100)) occlrs> text(50, 10, "theme = terrain.colors") occlrs> points(1:100, rep(14, 100), pch = 15, occlrs+ col=occolors(c("purple", "pink", "orange"))(100)) occlrs> text(50, 13, "theme = c('purple', 'pink', 'orange')") occlrs> points(1:100, rep(17, 100), pch = 15, occlrs+ col=occolors(c("#a6611a", "#ffffbf", "#018571"))(100)) occlrs> text(50, 16, "theme = c('#a6611a', '#ffffbf', '#018571')") occlrs> points(1:100, rep(20, 100), pch = 15, occlrs+ col=occolors(c("#7b3294", "#ffffbf", "#008837"))(100)) occlrs> text(50, 19, "theme = c('#7b3294', '#ffffbf', '#008837')") occlrs> ## grayscale conversions occlrs> n <- 25 occlrs> col <- occolors("br")(n) occlrs> method <- c("BT.709", "BT.601", occlrs+ "desaturate", "average", "maximum", "minimum", occlrs+ "red", "green", "blue") occlrs> plot(0, type="n", ann=FALSE, axes=FALSE, occlrs+ xlim=c(0, n), ylim=c(3*length(method), 0)) occlrs> for (j in 1:length(method)) { occlrs+ for (i in 1:n) { occlrs+ polygon(c(i-1, i, i, i-1), c(0, 0, 1, 1)+((j-1)*3), occlrs+ col=col[i], border=col[i]) occlrs+ polygon(c(i-1, i, i, i-1), c(1, 1, 2, 2)+((j-1)*3), occlrs+ col=col2gray(col[i], method=method[j]), occlrs+ border=col2gray(col[i], method=method[j])) occlrs+ text(n/2, 1+((j-1)*3), method[j]) occlrs+ } occlrs+ } ---------- opticut example: ocoptions ---------- ocptns> ## simple example from Legendre 2013 ocptns> ## Indicator Species: Computation, in ocptns> ## Encyclopedia of Biodiversity, Volume 4 ocptns> ## https://dx.doi.org/10.1016/B978-0-12-384719-5.00430-5 ocptns> gr <- as.factor(paste0("X", rep(1:5, each=5))) ocptns> spp <- cbind(Species1=rep(c(4,6,5,3,2), each=5), ocptns+ Species2=c(rep(c(8,4,6), each=5), 4,4,2, rep(0,7)), ocptns+ Species3=rep(c(18,2,0,0,0), each=5)) ocptns> rownames(spp) <- gr ocptns> ## must add some noise to avoid perfect fit ocptns> spp[6, "Species1"] <- 7 ocptns> spp[1, "Species3"] <- 17 ocptns> spp Species1 Species2 Species3 X1 4 8 17 X1 4 8 18 X1 4 8 18 X1 4 8 18 X1 4 8 18 X2 7 4 2 X2 6 4 2 X2 6 4 2 X2 6 4 2 X2 6 4 2 X3 5 6 0 X3 5 6 0 X3 5 6 0 X3 5 6 0 X3 5 6 0 X4 3 4 0 X4 3 4 0 X4 3 2 0 X4 3 0 0 X4 3 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 X5 2 0 0 ocptns> ## current settings ocptns> print(unlist(ocoptions())) # these give identical answers collapse cut sort theme check_comb "+" "2" "TRUE" "br" "TRUE" try_error scale fix_fitted robust_loglik "FALSE" "0.5" "FALSE" "TRUE" ocptns> unlist(getOption("ocoptions")) collapse cut sort theme check_comb "+" "2" "TRUE" "br" "TRUE" try_error scale fix_fitted robust_loglik "FALSE" "0.5" "FALSE" "TRUE" ocptns> summary(ocall <- opticut(spp ~ 1, strata=gr, dist="gaussian", comb="all")) Multivariate opticut results, comb = all, dist = gaussian Call: opticut.formula(formula = spp ~ 1, strata = gr, dist = "gaussian", comb = "all") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w Species2 X1+X3 +++ 0.9866 2.0 7.0 14.82 0.4995 Species1 X2+X3 +++ 0.8617 3.0 5.6 16.74 0.7035 Species3 X1 +++ 1.0000 0.5 17.8 54.26 1.0000 15 binary splits ocptns> ## resetting pboptions and checking new settings ocptns> ocop <- ocoptions(collapse="&", sort=FALSE) ocptns> unlist(getOption("ocoptions")) collapse cut sort theme check_comb "&" "2" "FALSE" "br" "TRUE" try_error scale fix_fitted robust_loglik "FALSE" "0.5" "FALSE" "TRUE" ocptns> ## running again with new settings ocptns> summary(ocall <- opticut(spp ~ 1, strata=gr, dist="gaussian", comb="all")) Multivariate opticut results, comb = all, dist = gaussian Call: opticut.formula(formula = spp ~ 1, strata = gr, dist = "gaussian", comb = "all") Best supported models with logLR >= 2: split assoc I mu0 mu1 logLR w Species1 X2&X3 +++ 0.8617 3.0 5.6 16.74 0.7035 Species2 X1&X3 +++ 0.9866 2.0 7.0 14.82 0.4995 Species3 X1 +++ 1.0000 0.5 17.8 54.26 1.0000 15 binary splits ocptns> ## resetting original ocptns> ocoptions(ocop) ocptns> unlist(getOption("ocoptions")) collapse cut sort theme check_comb "+" "2" "TRUE" "br" "TRUE" try_error scale fix_fitted robust_loglik "FALSE" "0.5" "FALSE" "TRUE" Warning messages: 1: In example(opticut - package, package = "opticut", run.dontrun = TRUE) : no help found for 'opticut - package' 2: In sqrt(diag(vc)[np]) : NaNs produced 3: In multicut.default(Y = Y, X = X, strata = strata, dist = dist, : Negative fitted values found for 1 species. 4: In multicut.default(spp, strata = gr, dist = "gaussian") : Negative fitted values found for 1 species. 5: In example(sindex, package = "opticut", run.dontrun = TRUE) : no help found for 'sindex' > > proc.time() user system elapsed 127.04 9.87 198.75