R Under development (unstable) (2025-01-06 r87534 ucrt) -- "Unsuffered Consequences" Copyright (C) 2025 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > require("fitdistrplus") Loading required package: fitdistrplus Loading required package: MASS Loading required package: survival > #We choose a low number of bootstrap replicates in order to satisfy CRAN running times constraint. > #For practical application, we recommend to use nbboot=501 or nbboot=1001. > > nbboot <- 1001 > nbboot <- 11 > > nsample <- 100 > nsample <- 10 > > visualize <- FALSE # TRUE for manual tests with visualization of results > > # (1) Fit of a gamma distribution to serving size data > # using default method (maximum likelihood estimation) > # followed by parametric bootstrap > # > data(groundbeef) > serving <- groundbeef$serving > f1 <- fitdist(serving, "gamma") > b1 <- bootdist(f1, niter=nbboot, silent=TRUE) > b1 <- bootdist(f1, niter=nbboot, silent=FALSE) > > print(lapply(b1, head)) $estim shape rate 1 3.518018 0.04513514 2 3.870860 0.05274972 3 4.085907 0.05277592 4 4.044606 0.05533558 5 3.936531 0.05093270 6 3.667763 0.04974809 $converg [1] 0 0 0 0 0 0 $method [1] "param" $nbboot [1] 11 $CI Median 2.5% 97.5% shape 3.93653069 3.53485812 4.68797900 rate 0.05274972 0.04551202 0.06430244 $fitpart $fitpart$estimate shape rate 4.00955898 0.05443907 $fitpart$method [1] "mle" $fitpart$sd shape rate 0.341451641 0.004937239 $fitpart$cor shape rate shape 1.0000000 0.9384578 rate 0.9384578 1.0000000 $fitpart$vcov shape rate shape 0.116589223 1.582079e-03 rate 0.001582079 2.437633e-05 $fitpart$loglik [1] -1253.625 > plot(b1) > summary(b1) Parametric bootstrap medians and 95% percentile CI Median 2.5% 97.5% shape 3.93653069 3.53485812 4.68797900 rate 0.05274972 0.04551202 0.06430244 > > # (2) new plot arguments > #for new graph functions > f1 <- fitdist(rgamma(nsample, 2, 3), "gamma") > b1 <- bootdist(f1, niter=nbboot, silent=TRUE) > plot(b1) > plot(b1, trueval = c(2, 3)) > plot(b1, enhance=TRUE) > plot(b1, enhance=TRUE, trueval = c(2, 3)) > plot(b1, enhance=TRUE, rampcol=c("blue", "green"), nbgrid=15, nbcol=15) > > if(any(installed.packages()[, "Package"] == "actuar") && visualize) + { + require("actuar") + set.seed(123) + f1 <- fitdist(rburr(nsample, 2, 3, 1), "burr", start=list(shape1=10, shape2=10, rate=1)) + b1 <- bootdist(f1, niter=nbboot, silent=TRUE) + plot(b1) + plot(b1, trueval = c(2, 3, 1)) + plot(b1, enhance=TRUE) + plot(b1, enhance=TRUE, trueval = c(2, 3, 1)) + } > > > # (3) estimation of the rate of a gamma distribution > # by maximum likelihood with the shape fixed at 4 using the argument fix.arg > # followed by parametric bootstrap > # > f1c <- fitdist(serving, "gamma", start=list(rate=0.1), fix.arg=list(shape=4)) > b1c <- bootdist(f1c, niter=nbboot) > summary(b1c) Parametric bootstrap medians and 95% percentile CI Median 2.5% 97.5% 0.05488348 0.05337624 0.05656264 > > # (4) fit of a gamma distribution to serving size data > # by quantile matching estimation (in this example matching > # first and third quartiles) followed by parametric bootstrap > # > f1d <- fitdist(serving, "gamma", method="qme", probs=c(0.25, 0.75)) > b1d <- bootdist(f1d, niter=nbboot) > summary(b1d) Parametric bootstrap medians and 95% percentile CI Median 2.5% 97.5% shape 4.31100756 3.26207478 4.86018255 rate 0.05665868 0.04148652 0.06415661 > > # (5) fit of a gamma distribution with control of the optimization > # method, followed by parametric bootstrap > # > if(visualize) { # check ERROR on aarch64-apple-darwin20.4.0 (64-bit) (2021/05/12) + set.seed(1234) + f1e <- fitdist(serving, "gamma", optim.method="L-BFGS-B", lower=c(0, 0)) + b1e <- bootdist(f1e, niter=nbboot) + summary(b1e) + } > > # (6) fit of a discrete distribution by matching moment estimation > # (using a closed formula) followed by parametric bootstrap > # > set.seed(1234) > x2 <- rpois(nsample, lambda = 5) > f2 <- fitdist(x2, "pois", method="mme") > b2 <- bootdist(f2, niter=nbboot) > plot(b2,pch=16) > summary(b2) Parametric bootstrap medians and 95% percentile CI Median 2.5% 97.5% 4.400 2.975 4.900 > > # (7) Fit of a uniform distribution using the Cramer-von Mises distance > # followed by parametric bootstrap > # > if(visualize) + { + x3 <- runif(nsample, min=5, max=10) + f3 <- fitdist(x3, "unif", method="mge", gof="CvM") + b3 <- bootdist(f3, bootmethod="param", niter=nbboot) + summary(b3) + plot(b3) + } > > # (9) fit of a Weibull distribution to serving size data by maximum likelihood > # estimation or by quantile matching estimation (in this example matching > # first and third quartiles) followed by parametric bootstrap > # > fWmle <- fitdist(serving, "weibull") > bWmle <- bootdist(fWmle, niter=nbboot) > summary(bWmle) Parametric bootstrap medians and 95% percentile CI Median 2.5% 97.5% shape 2.161366 2.062089 2.380348 scale 84.435931 77.331984 85.643448 > quantile(bWmle, probs=c(0.25, 0.75)) (original) estimated quantiles for each specified probability (non-censored data) p=0.25 p=0.75 estimate 47.13642 96.7809 Median of bootstrap estimates p=0.25 p=0.75 estimate 46.97044 97.32588 two-sided 95 % CI of each quantile p=0.25 p=0.75 2.5 % 42.98426 90.03661 97.5 % 50.73265 99.75572 > > fWqme <- fitdist(serving, "weibull", method="qme", probs=c(0.25, 0.75)) > bWqme <- bootdist(fWqme, niter=nbboot) > summary(bWqme) Parametric bootstrap medians and 95% percentile CI Median 2.5% 97.5% shape 2.249725 2.143962 2.418769 scale 86.476953 84.002435 89.635212 > quantile(bWqme, probs=c(0.25, 0.75)) (original) estimated quantiles for each specified probability (non-censored data) p=0.25 p=0.75 estimate 50.0001 99.99983 Median of bootstrap estimates p=0.25 p=0.75 estimate 49.72662 99.90189 two-sided 95 % CI of each quantile p=0.25 p=0.75 2.5 % 47.93772 97.18378 97.5 % 53.34119 102.88616 > > > # (10) Fit of a Pareto distribution by numerical moment matching estimation > # followed by parametric bootstrap > # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! LONG TO RUN !!!!!!!!!!!!!!!!!!!!!!!! > # > if (visualize) + { + if(any(installed.packages()[, "Package"] == "actuar")) + { + require("actuar") + #simulate a sample + x4 <- rpareto(nsample, 6, 2) + memp <- function(x, order) + ifelse(order == 1, mean(x), sum(x^order)/length(x)) + + f4 <- fitdist(x4, "pareto", "mme", order=1:2, + start=list(shape=10, scale=10), + lower=1, memp="memp", upper=50) + + b4 <- bootdist(f4, niter=nbboot) + summary(b4) + + b4npar <- bootdist(f4, niter=nbboot, bootmethod="nonparam") + summary(b4npar) + } + } > > # (11) Fit of a Burr distribution (3 parameters) using MLE > # followed by parametric boostrap > # !!!!!!!!!!!!!!!! LONG TO RUN !!!!!!!!!!!!!!!!!! > # > if (visualize) + { + if(any(installed.packages()[, "Package"] == "actuar")) + { + require("actuar") + data(danishuni) + + fdan <- fitdist(danishuni$Loss, "burr", method="mle", + start=list(shape1=5, shape2=5, rate=10), lower=0+1e-1, control=list(trace=0)) + bdan <- bootdist(fdan, bootmethod="param", niter=nbboot) + summary(bdan) + plot(bdan) + cdfcomp(fdan, xlogscale=TRUE) + } + } > > # (12) Fit of a Triangular distribution (3 parameters) using MLE > # followed by parametric boostrap, with crashes of optim > # > if(any(installed.packages()[, "Package"] == "mc2d")) + { + require("mc2d") + set.seed(1234) + x4 <- rtriang(100,min=0,mode=4,max=20) # nsample not used : does not converge if the sample is too small + fit4t<-fitdist(x4,dtriang,start=list(min=0,mode=4,max=20)) + summary(fit4t) + b4t<-bootdist(fit4t,niter=nbboot) + b4t + plot(b4t) + summary(b4t) + quantile(b4t) + + } Loading required package: mc2d Loading required package: mvtnorm Attaching package: 'mc2d' The following objects are masked from 'package:base': pmax, pmin (original) estimated quantiles for each specified probability (non-censored data) p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 p=0.8 estimate 6.99066 7.110847 7.20307 7.280817 7.349314 7.41124 7.468186 7.522676 p=0.9 estimate 7.590967 Median of bootstrap estimates p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 p=0.8 estimate 7.381901 7.411768 7.439453 7.462793 7.482165 7.500423 7.51904 7.534952 p=0.9 estimate 7.552589 two-sided 95 % CI of each quantile p=0.1 p=0.2 p=0.3 p=0.4 p=0.5 p=0.6 p=0.7 p=0.8 2.5 % 7.312251 7.340770 7.360830 7.378643 7.395703 7.414393 7.435609 7.460138 97.5 % 7.516127 7.529222 7.543164 7.558143 7.574432 7.592449 7.612900 7.637158 p=0.9 2.5 % 7.489453 97.5 % 7.670650 The estimation method converged only for 9 among 11 bootstrap iterations. > > # (13) Fit of a Pareto and a Burr distribution, with bootstrap on the Burr distribution > # > # > if(visualize) + { + data(endosulfan) + ATV <-endosulfan$ATV + plotdist(ATV) + descdist(ATV,boot=nbboot) + + fln <- fitdist(ATV, "lnorm") + summary(fln) + gofstat(fln) + + # use of plotdist to find good reasonable initial values for parameters + plotdist(ATV, "pareto", para=list(shape=1, scale=500)) + fP <- fitdist(ATV, "pareto", start=list(shape=1, scale=500)) + summary(fP) + gofstat(fP) + + # definition of the initial values from the fit of the Pareto + # as the Burr distribution is the Pareto when shape2 == 1 + fB <- fitdist(ATV, "burr", start=list(shape1=0.3, shape2=1, rate=1)) + summary(fB) + gofstat(fB) + + cdfcomp(list(fln,fP,fB),xlogscale=TRUE) + qqcomp(list(fln,fP,fB),xlogscale=TRUE,ylogscale=TRUE) + ppcomp(list(fln,fP,fB),xlogscale=TRUE,ylogscale=TRUE) + denscomp(list(fln,fP,fB)) # without great interest as hist does accept argument log="x" + + # comparison of HC5 values (5 percent quantiles) + quantile(fln,probs=0.05) + quantile(fP,probs=0.05) + quantile(fB,probs=0.05) + + # bootstrap for the Burr distribution + bfB <- bootdist(fB,niter=nbboot) + plot(bfB) + } > > # (14) relevant example for zero modified geometric distribution > # > dzmgeom <- function(x, p1, p2) + { + p1 * (x == 0) + (1-p1)*dgeom(x-1, p2) + } > pzmgeom <- function(q, p1, p2) + { + p1 * (q >= 0) + (1-p1)*pgeom(q-1, p2) + } > rzmgeom <- function(n, p1, p2) + { + u <- rbinom(n, 1, 1-p1) #prob to get zero is p1 + u[u != 0] <- rgeom(sum(u != 0), p2)+1 + u + } > > x2 <- rzmgeom(nsample, 1/2, 1/10) > > f2 <- fitdist(x2, "zmgeom", method="mle", fix.arg=function(x) list(p1=mean(x == 0)), start=list(p2=1/2)) > b2 <- bootdist(f2, niter=nbboot) > plot(b2) > > f3 <- fitdist(x2, "zmgeom", method="mle", start=list(p1=1/2, p2=1/2)) > b3 <- bootdist(f3, niter=nbboot) > plot(b3, enhance=TRUE) > > # (15) does fixing p1 reduce bias of estimating p2? > summary(b2$estim[, "p2"] - 1/10) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.03889 0.08847 0.12500 0.13867 0.16236 0.31176 > summary(b3$estim[, "p2"] - 1/10) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.04813 0.11748 0.12577 0.15418 0.19396 0.28762 > > par(mfrow=c(1, 2)) > hist(b2$estim[, "p2"] - 1/10, breaks=100, xlim=c(-.015, .015)) > hist(b3$estim[, "p2"] - 1/10, breaks=100, xlim=c(-.015, .015)) > par(mfrow=c(1, 1)) > > # (16) efficiency of parallel operation > if (visualize) + { + niter <- 1001 + data(groundbeef) + serving <- groundbeef$serving + f1 <- fitdist(serving, "gamma") + + alltime <- matrix(NA, 9, 5) + colnames(alltime) <- c("user.self", "sys.self", "elapsed", "user.child", "sys.child" ) + rownames(alltime) <- c("base R", paste("snow", 1:4), paste("multicore", 1:4)) + + alltime[1,] <- system.time(res <- bootdist(f1, niter = niter)) + + for (cli in 1:4) + { + cat("\nnb cluster", cli, "\n") + #ptm <- proc.time() + alltime[cli+1,] <- system.time(res <- bootdist(f1, niter = niter, parallel = "snow", ncpus = cli)) + print(summary(res)) + #print(proc.time() - ptm) + + } + # not available on Windows + if(.Platform$OS.type == "unix") + for (cli in 1:4) + { + cat("\nnb cluster", cli, "\n") + #ptm <- proc.time() + alltime[cli+5,] <- system.time(res <- bootdist(f1, niter = niter, parallel = "multicore", ncpus = cli)) + print(summary(res)) + #print(proc.time() - ptm) + + } + + alltime + } > > # (17) bootdist with weights (not yet available, test of error message) > # > x <- rpois(nsample, 10) > xtab <- table(x) > xval <- sort(unique(x)) > (f1 <- fitdist(x, "pois")) Fitting of the distribution ' pois ' by maximum likelihood Parameters: estimate Std. Error lambda 10.5 1.024695 > (f2 <- fitdist(xval, "pois", weights = xtab)) Fitting of the distribution ' pois ' by maximum likelihood Parameters: estimate Std. Error lambda 10.50006 1.024701 Warning message: In mledist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, : weights are not taken into account in the default initial values > summary(bootdist(f1, niter = nbboot)) Parametric bootstrap medians and 95% percentile CI Median 2.5% 97.5% 10.99923 9.55000 11.90000 > try(summary(bootdist(f2, niter = nbboot))) # not yet developed Error in bootdist(f2, niter = nbboot) : Bootstrap is not yet available when using weights > > # (18) density of bootdist() > # > x <- rlnorm(1e3) > b0 <- bootdist(fitdist(x, "lnorm"), niter = 50) > b1 <- bootdist(fitdist(x, "lnorm"), niter = 100) > b2 <- bootdist(fitdist(x, "lnorm"), niter = 200) > > #d1 <- fitdistrplus:::density.bootdist(b0, b1, b2) > d1 <- density(b0, b1, b2) > str(d1) List of 3 $ :List of 2 ..$ meanlog:List of 11 .. ..$ x : num [1:512] -0.078 -0.0776 -0.0772 -0.0768 -0.0764 ... .. ..$ y : num [1:512] 0.00758 0.00835 0.0092 0.01014 0.01114 ... .. ..$ bw : num 0.0126 .. ..$ n : int 50 .. ..$ old.coords: logi FALSE .. ..$ call : language density.default(x = x[[j]]$estim[, i], bw = bw, adjust = adjust, kernel = kernel) .. ..$ data.name : chr "x[[j]]$estim[, i]" .. ..$ has.na : logi FALSE .. ..$ mean : num 0.0321 .. ..$ sd : num 0.0312 .. ..$ distname : chr "lnorm" .. ..- attr(*, "class")= chr "density" ..$ sdlog :List of 11 .. ..$ x : num [1:512] 0.941 0.941 0.941 0.942 0.942 ... .. ..$ y : num [1:512] 0.0124 0.0138 0.0154 0.0171 0.019 ... .. ..$ bw : num 0.0075 .. ..$ n : int 50 .. ..$ old.coords: logi FALSE .. ..$ call : language density.default(x = x[[j]]$estim[, i], bw = bw, adjust = adjust, kernel = kernel) .. ..$ data.name : chr "x[[j]]$estim[, i]" .. ..$ has.na : logi FALSE .. ..$ mean : num 1 .. ..$ sd : num 0.0208 .. ..$ distname : chr "lnorm" .. ..- attr(*, "class")= chr "density" $ :List of 2 ..$ meanlog:List of 11 .. ..$ x : num [1:512] -0.0598 -0.0594 -0.0591 -0.0587 -0.0583 ... .. ..$ y : num [1:512] 0.00996 0.01131 0.01278 0.0144 0.01628 ... .. ..$ bw : num 0.0101 .. ..$ n : int 100 .. ..$ old.coords: logi FALSE .. ..$ call : language density.default(x = x[[j]]$estim[, i], bw = bw, adjust = adjust, kernel = kernel) .. ..$ data.name : chr "x[[j]]$estim[, i]" .. ..$ has.na : logi FALSE .. ..$ mean : num 0.0314 .. ..$ sd : num 0.0308 .. ..$ distname : chr "lnorm" .. ..- attr(*, "class")= chr "density" ..$ sdlog :List of 11 .. ..$ x : num [1:512] 0.93 0.93 0.931 0.931 0.931 ... .. ..$ y : num [1:512] 0.0144 0.0161 0.0179 0.0199 0.0221 ... .. ..$ bw : num 0.00724 .. ..$ n : int 100 .. ..$ old.coords: logi FALSE .. ..$ call : language density.default(x = x[[j]]$estim[, i], bw = bw, adjust = adjust, kernel = kernel) .. ..$ data.name : chr "x[[j]]$estim[, i]" .. ..$ has.na : logi FALSE .. ..$ mean : num 0.995 .. ..$ sd : num 0.0202 .. ..$ distname : chr "lnorm" .. ..- attr(*, "class")= chr "density" $ :List of 2 ..$ meanlog:List of 11 .. ..$ x : num [1:512] -0.104 -0.103 -0.103 -0.102 -0.102 ... .. ..$ y : num [1:512] 0.00242 0.00282 0.00327 0.00379 0.00439 ... .. ..$ bw : num 0.00923 .. ..$ n : int 200 .. ..$ old.coords: logi FALSE .. ..$ call : language density.default(x = x[[j]]$estim[, i], bw = bw, adjust = adjust, kernel = kernel) .. ..$ data.name : chr "x[[j]]$estim[, i]" .. ..$ has.na : logi FALSE .. ..$ mean : num 0.0297 .. ..$ sd : num 0.0311 .. ..$ distname : chr "lnorm" .. ..- attr(*, "class")= chr "density" ..$ sdlog :List of 11 .. ..$ x : num [1:512] 0.914 0.915 0.915 0.915 0.915 ... .. ..$ y : num [1:512] 0.00352 0.00407 0.0047 0.0054 0.00618 ... .. ..$ bw : num 0.00656 .. ..$ n : int 200 .. ..$ old.coords: logi FALSE .. ..$ call : language density.default(x = x[[j]]$estim[, i], bw = bw, adjust = adjust, kernel = kernel) .. ..$ data.name : chr "x[[j]]$estim[, i]" .. ..$ has.na : logi FALSE .. ..$ mean : num 1 .. ..$ sd : num 0.021 .. ..$ distname : chr "lnorm" .. ..- attr(*, "class")= chr "density" - attr(*, "distname")= chr "lnorm" - attr(*, "nbobject")= int 3 - attr(*, "nbboot")= num [1:3] 50 100 200 - attr(*, "n")= int 1000 - attr(*, "class")= chr "density.bootdist" > plot(d1) > print(d1) Bootstrap values for: lnorm for 3 object(s) with 50, 100, 200 bootstrap values (original sample size 1000).> > d1 <- density(b1) > str(d1) List of 1 $ :List of 2 ..$ meanlog:List of 11 .. ..$ x : num [1:512] -0.0598 -0.0594 -0.0591 -0.0587 -0.0583 ... .. ..$ y : num [1:512] 0.00996 0.01131 0.01278 0.0144 0.01628 ... .. ..$ bw : num 0.0101 .. ..$ n : int 100 .. ..$ old.coords: logi FALSE .. ..$ call : language density.default(x = x[[j]]$estim[, i], bw = bw, adjust = adjust, kernel = kernel) .. ..$ data.name : chr "x[[j]]$estim[, i]" .. ..$ has.na : logi FALSE .. ..$ mean : num 0.0314 .. ..$ sd : num 0.0308 .. ..$ distname : chr "lnorm" .. ..- attr(*, "class")= chr "density" ..$ sdlog :List of 11 .. ..$ x : num [1:512] 0.93 0.93 0.931 0.931 0.931 ... .. ..$ y : num [1:512] 0.0144 0.0161 0.0179 0.0199 0.0221 ... .. ..$ bw : num 0.00724 .. ..$ n : int 100 .. ..$ old.coords: logi FALSE .. ..$ call : language density.default(x = x[[j]]$estim[, i], bw = bw, adjust = adjust, kernel = kernel) .. ..$ data.name : chr "x[[j]]$estim[, i]" .. ..$ has.na : logi FALSE .. ..$ mean : num 0.995 .. ..$ sd : num 0.0202 .. ..$ distname : chr "lnorm" .. ..- attr(*, "class")= chr "density" - attr(*, "distname")= chr "lnorm" - attr(*, "nbobject")= int 1 - attr(*, "nbboot")= num 100 - attr(*, "n")= int 1000 - attr(*, "class")= chr "density.bootdist" > plot(d1) > print(d1) Bootstrap values for: lnorm for 1 object(s) with 100 bootstrap values (original sample size 1000).> > proc.time() user system elapsed 6.42 0.48 6.89