R Under development (unstable) (2024-07-10 r86888 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(fitdistrplus) Loading required package: MASS Loading required package: survival > nbboot <- 100 > nbboot <- 10 > > nsample <- 100 > nsample <- 10 > > visualize <- FALSE # TRUE for manual tests with visualization of results > > # (1) basic fit of a gamma distribution by maximum likelihood estimation > # > data(groundbeef) > serving <- groundbeef$serving > fitg <- fitdist(serving, "gamma") > summary(fitg) Fitting of the distribution ' gamma ' by maximum likelihood Parameters : estimate Std. Error shape 3.99526328 0.340187063 rate 0.05423688 0.004919331 Loglikelihood: -1253.626 AIC: 2511.252 BIC: 2518.326 Correlation matrix: shape rate shape 1.0000000 0.9382418 rate 0.9382418 1.0000000 > plot(fitg) > cdfcomp(fitg, addlegend=FALSE) > > #check names > names(fitdist(serving, "gamma", optim.method="Brent", lower=0, upper=10, fix.arg=list(shape=2))$estimate) [1] "rate" > names(fitdist(serving, "gamma", optim.method="Nelder-Mead")$estimate) [1] "shape" "rate" > names(fitdist(serving, "gamma", optim.method="BFGS")$estimate) [1] "shape" "rate" > # names(fitdist(serving, "gamma", optim.method="CG", start=list(shape=4, rate=1/20))$estimate) > if(visualize) { # check ERROR on aarch64-apple-darwin20.4.0 (64-bit) (2021/05/12) + set.seed(1234) + names(fitdist(serving, "gamma", optim.method="L-BFGS-B", lower=0)$estimate) + } > > > # (7) custom optimization function > # > > #create the sample > set.seed(1234) > mysample <- rexp(nsample, 5) > mystart <- list(rate=8) > > res1 <- fitdist(mysample, dexp, start= mystart, optim.method="Nelder-Mead") > > #show the result > summary(res1) Fitting of the distribution ' exp ' by maximum likelihood Parameters : estimate Std. Error rate 6.578125 2.080186 Loglikelihood: 8.838553 AIC: -15.67711 BIC: -15.37452 > > #the warning tell us to use optimise, because the Nelder-Mead is not adequate. > > #to meet the standard 'fn' argument and specific name arguments, we wrap optimize, > myoptimize <- function(fn, par, ...) + { + res <- optimize(f=fn, ..., maximum=FALSE) + #assume the optimization function minimize + + standardres <- c(res, convergence=0, value=res$objective, + par=res$minimum, hessian=NA) + + return(standardres) + } > > #call fitdist with a 'custom' optimization function > res2 <- fitdist(mysample, dexp, start=mystart, custom.optim=myoptimize, + interval=c(0, 100)) > > #show the result > summary(res2) Fitting of the distribution ' exp ' by maximum likelihood Parameters : estimate rate 6.578815 Loglikelihood: 8.838553 AIC: -15.67711 BIC: -15.37452 > > > # (8) custom optimization function - another example with the genetic algorithm > # > if(any(installed.packages()[,"Package"] == "rgenoud") && visualize) + { + + #set a sample + fit1 <- fitdist(serving, "gamma") + summary(fit1) + + #wrap genoud function rgenoud package + mygenoud <- function(fn, par, ...) + { + require(rgenoud) + res <- genoud(fn, starting.values=par, ...) + standardres <- c(res, convergence=0, counts=NULL) + + return(standardres) + } + + #call fitdist with a 'custom' optimization function + fit2 <- fitdist(serving, "gamma", custom.optim=mygenoud, nvars=2, start=as.list(fit1$estimate), + Domains=cbind(c(0, 0), c(10, 10)), boundary.enforcement=1, + print.level=0, hessian=TRUE) + + summary(fit2) + } > > # (11) Fit of a Pareto distribution by numerical moment matching estimation > # > if(any(installed.packages()[,"Package"] == "actuar") && visualize) + { + require(actuar) + #simulate a sample + set.seed(1234) + x4 <- rpareto(nsample, 6, 2) + + #empirical raw moment + memp <- function(x, order) + ifelse(order == 1, mean(x), sum(x^order)/length(x)) + + #fit + fP <- fitdist(x4, "pareto", method="mme", order=c(1, 2), memp="memp", + start=list(shape=10, scale=10), lower=1, upper=Inf) + summary(fP) + plot(fP) + + } > > > # (14) scaling problem - too small values > # > if (visualize) # LONG TO RUN ON CRAN + { + x2 <- c(-0.00707717, -0.000947418, -0.00189753, + -0.000474947, -0.00190205, -0.000476077, 0.00237812, 0.000949668, + 0.000474496, 0.00284226, -0.000473149, -0.000473373, 0, 0, 0.00283688, + -0.0037843, -0.0047506, -0.00238379, -0.00286807, 0.000478583, + 0.000478354, -0.00143575, 0.00143575, 0.00238835, 0.0042847, + 0.00237248, -0.00142281, -0.00142484, 0, 0.00142484, 0.000948767, + 0.00378609, -0.000472478, 0.000472478, -0.0014181, 0, -0.000946522, + -0.00284495, 0, 0.00331832, 0.00283554, 0.00141476, -0.00141476, + -0.00188947, 0.00141743, -0.00236351, 0.00236351, 0.00235794, + 0.00235239, -0.000940292, -0.0014121, -0.00283019, 0.000472255, + 0.000472032, 0.000471809, -0.0014161, 0.0014161, -0.000943842, + 0.000472032, -0.000944287, -0.00094518, -0.00189304, -0.000473821, + -0.000474046, 0.00331361, -0.000472701, -0.000946074, 0.00141878, + -0.000945627, -0.00189394, -0.00189753, -0.0057143, -0.00143369, + -0.00383326, 0.00143919, 0.000479272, -0.00191847, -0.000480192, + 0.000960154, 0.000479731, 0, 0.000479501, 0.000958313, -0.00383878, + -0.00240674, 0.000963391, 0.000962464, -0.00192586, 0.000481812, + -0.00241138, -0.00144963) + + for(i in 6:0) + { + cat("\nscaling", 10^i, "\n") + res <- try(mledist(x2*10^i, "cauchy"), silent=TRUE) + if(inherits(res, "try-error")) + print(res) + else + { + cat("estimate\n") + print(res$estimate) + cat("Hessian\n") + print(res$hessian) + } + } + } > > # (15) scaling problem - too big values > # > if (visualize) # LONG TO RUN ON CRAN + { + x1 <- c( + 1401928684, 1413455609, 1432458425, 1436910475, 1494883250, 1565770323, 1577486458, + 1568908053, 1606424896, 1632264979, 1780495643, 1865525923, 2035689865, 2141429306, + 2335443964, 2465661689, 2563368221, 2845012431, 2949890881, 3180645942, 3309009836, + 3618581152, 4109197451, 4064662257, 4028375795, 4176781983, 4303024833, 4493470109 + ) + + + for(i in 0:5) + { + cat("\nscaling", 10^(-2*i), "\n") + res <- mledist(x1*10^(-2*i), "norm") + Hm1 <- try(solve(res$hessian), silent=TRUE) + if(inherits(Hm1, "try-error")) + print(Hm1) + else + { + cat("estimate\n") + print(res$estimate) + cat("Hessian\n") + print(res$hessian) + cat("inverse Hessian\n") + print(Hm1) + } + } + + fitdist(x1, "norm") + fitdist(x1*1e-6, "norm") + } > > # (16) Fit of a lognormal distribution on acute toxicity values of endosulfan for > # nonarthropod invertebrates, using maximum likelihood estimation > # to estimate what is called a species sensitivity distribution > # (SSD) in ecotoxicology, followed by estimation of the 5 percent quantile value of > # the fitted distribution, what is called the 5 percent hazardous concentration (HC5) > # in ecotoxicology, with its two-sided 95 percent confidence interval calculated by > # parametric bootstrap > # > data(endosulfan) > ATV <- subset(endosulfan, group == "NonArthroInvert")$ATV > log10ATV <- log10(subset(endosulfan, group == "NonArthroInvert")$ATV) > fln <- fitdist(log10ATV, "norm") > > quantile(fln, probs = 0.05) Estimated quantiles for each specified probability (non-censored data) p=0.05 estimate 1.744227 > > > # (17) Fit of a triangular distribution using Cramer-von Mises or > # Kolmogorov-Smirnov distance > # > if(any(installed.packages()[,"Package"] == "mc2d") && visualize) + { + set.seed(1234) + require(mc2d) + t <- rtriang(100,min=5,mode=6,max=10) # nsample not used : does not converge with a too small sample + fCvM <- fitdist(t,"triang",method="mge",start = list(min=4, mode=6,max=9),gof="CvM") + fKS <- fitdist(t,"triang",method="mge",start = list(min=4, mode=6,max=9),gof="KS") + cdfcomp(list(fCvM,fKS)) + } > > # (18) gumbel distribution > # > dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) > pgumbel <- function(q, a, b) exp(-exp((a-q)/b)) > qgumbel <- function(p, a, b) a-b*log(-log(p)) > > data(danishuni) > fitdist(danishuni$Loss, "gumbel", start=list(a=5, b=10)) Fitting of the distribution ' gumbel ' by maximum likelihood Parameters: estimate Std. Error a 1.977520 0.03792251 b 1.738562 0.03447841 > > > # (19) check the 'start' argument > # > if (FALSE) # NO INTEREST WITHOUT VISUALIZATION OF THE RESULT + { + #create the sample + mysample <- rexp(nsample, 5) + mystart2 <- list(rate2=8) + mystart3 <- list(8) + + try( fitdist(mysample, dexp, start= mystart2, method="mle") ) + try( fitdist(mysample, dexp, start= mystart3, method="mle") ) + + try( fitdist(mysample, dexp, start= mystart2, method="mme") ) + try( fitdist(mysample, dexp, start= mystart3, method="mme") ) + + try( fitdist(mysample, dexp, start= mystart2, method="qme", probs=1/2) ) + try( fitdist(mysample, dexp, start= mystart3, method="qme", probs=1/2) ) + + try( fitdist(mysample, dexp, start= mystart2, method="mge", gof="AD") ) + try( fitdist(mysample, dexp, start= mystart3, method="mge", gof="AD") ) + + } > > # (20) example with dexgauss > # would require to suggest the package gamlss.dist in the Description file > # > #if(any(installed.packages()[,"Package"] == "gamlss.dist")) > #{ > # require(gamlss.dist) > # set.seed(1234) > # a=rexGAUS(100,mu=500,sigma=50,nu=75) > # fitdist(a,dexGAUS,start=list(mu=median(a),sigma=sqrt(var(a)/2),nu=sqrt(var(a)/2))) > #} > > > # (21) check the 'keepdata' argument > # > if (visualize) # REQUIRES VISUALIZATION OF THE RESULTS + { + #create the sample + x <- rexp(1e6, 5) + summary(x) + f1 <- fitdist(x, "exp", keepdata=FALSE) + f2 <- fitdist(x, "exp", keepdata=TRUE) + + par(mfrow=c(1,2)) + cdfcomp(f1) + cdfcomp(f2) + } > > # (22) relevant example for zero modified geometric distribution > # > dzmgeom <- function(x, p1, p2) + { + p1 * (x == 0) + (1-p1)*dgeom(x-1, p2) + } > pzmgeom <- function(q, p1, p2) + { + p1 * (q >= 0) + (1-p1)*pgeom(q-1, p2) + } > rzmgeom <- function(n, p1, p2) + { + u <- rbinom(n, 1, 1-p1) #prob to get zero is p1 + u[u != 0] <- rgeom(sum(u != 0), p2)+1 + u + } > > x2 <- rzmgeom(nsample, 1/2, 1/10) > table(x2) x2 0 3 9 20 7 1 1 1 > #this is the MLE which converges almost surely and in distribution to the true value. > initp1 <- function(x) list(p1=mean(x == 0)) > > fitdist(x2, "zmgeom", start=list(p1=1/2, p2=1/2)) Fitting of the distribution ' zmgeom ' by maximum likelihood Parameters: estimate Std. Error p1 0.70003945 0.14490166 p2 0.09373461 0.05151407 > > f2 <- fitdist(x2, "zmgeom", fix.arg=initp1, start=list(p2=1/2)) > print(f2) Fitting of the distribution ' zmgeom ' by maximum likelihood Parameters: estimate Std. Error p2 0.09375271 0.05152299 Fixed parameters (computed by a user-supplied function): value p1 0.7 > summary(f2) Fitting of the distribution ' zmgeom ' by maximum likelihood Parameters : estimate Std. Error p2 0.09375271 0.05152299 Fixed parameters (computed by a user-supplied function): value p1 0.7 Loglikelihood: -16.06478 AIC: 34.12955 BIC: 34.43214 > > f2 <- fitdist(x2, "zmgeom", fix.arg=list(p1=1/2), start=list(p2=1/2)) > print(f2) Fitting of the distribution ' zmgeom ' by maximum likelihood Parameters: estimate Std. Error p2 0.09375271 0.05152299 Fixed parameters: value p1 0.5 > summary(f2) Fitting of the distribution ' zmgeom ' by maximum likelihood Parameters : estimate Std. Error p2 0.09375271 0.05152299 Fixed parameters: value p1 0.5 Loglikelihood: -16.8876 AIC: 35.77521 BIC: 36.07779 > > # (23) check the use of weights with MLE > # > set.seed(1234) > x <- rpois(nsample, 10) > xtab <- table(x) > xval <- sort(unique(x)) > f1 <- fitdist(x, "pois") > f2 <- fitdist(xval, "pois", weights = xtab) Warning message: In mledist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, : weights are not taken into account in the default initial values > > f1$estimate lambda 8.4 > f2$estimate #should be identical lambda 8.4 > > > # (24) check the use of weights with other methods > # > > set.seed(1234) > x <- rpois(nsample, 10) > xtab <- table(x) > xval <- sort(unique(x)) > (f1 <- fitdist(x, "norm", method = "mle")) Fitting of the distribution ' norm ' by maximum likelihood Parameters: estimate Std. Error mean 8.400000 0.5138093 sd 1.624808 0.3633174 > (f2 <- fitdist(xval, "norm", weights = xtab, method = "mle")) Fitting of the distribution ' norm ' by maximum likelihood Parameters: estimate Std. Error mean 8.400002 0.5138242 sd 1.624855 0.3633437 Warning message: In mledist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, : weights are not taken into account in the default initial values > > (f1 <- fitdist(x, "norm", method = "mme")) Fitting of the distribution ' norm ' by matching moments Parameters: estimate Std. Error mean 8.400000 1.624808 sd 1.624808 1.148912 > (f2 <- fitdist(xval, "norm", weights = xtab, method = "mme")) Fitting of the distribution ' norm ' by matching moments Parameters: estimate Std. Error mean 8.400000 1.712698 sd 1.712698 NaN Warning messages: 1: In mmedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, : weights are not taken into account in the default initial values 2: In cov2cor(varcovar) : diag(V) had non-positive or NA entries; the non-finite result may be dubious 3: In sqrt(diag(varcovar)) : NaNs produced > > (f1 <- fitdist(x, "norm", method = "qme", probs=c(1/4, 3/4))) Fitting of the distribution ' norm ' by matching quantiles Parameters: estimate mean 8.374998 sd 1.667926 > (f2 <- fitdist(xval, "norm", method = "qme", weights = xtab, probs=c(1/4, 3/4) )) Fitting of the distribution ' norm ' by matching quantiles Parameters: estimate mean 8.374997 sd 1.667910 Warning message: In qmedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, : weights are not taken into account in the default initial values > > fitdist(x, "norm", method="mge", gof = "CvM") Fitting of the distribution ' norm ' by maximum goodness-of-fit Parameters: estimate mean 8.230838 sd 1.881098 > try(fitdist(xval, "norm", method="mge", gof = "CvM", weights = xtab)) # not yet developped Error in mgedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, : Weights is not allowed for maximum GOF estimation > > > # (24b) check the use of weights with qme with a discrete distribution > # > set.seed(1234) > x <- rpois(nsample, 10) > xtab <- table(x) > xval <- sort(unique(x)) > (f1 <- fitdist(x, "pois", method = "qme", probs=c(1/2))) Fitting of the distribution ' pois ' by matching quantiles Parameters: estimate lambda 8.4 > (f2 <- fitdist(xval, "pois", method = "qme", weights = xtab, probs=c(1/2) )) # similar to f1 Fitting of the distribution ' pois ' by matching quantiles Parameters: estimate lambda 8.4 Warning message: In qmedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, : weights are not taken into account in the default initial values > > fitdist(xval, "pois", method = "qme", weights = xtab, probs=c(1/2), optim.method="SANN", control=list(maxit=1000)) # Fitting of the distribution ' pois ' by matching quantiles Parameters: estimate lambda 7.875113 Warning message: In qmedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, : weights are not taken into account in the default initial values > fitdist(x, "pois", method = "qme", probs=c(1/2), optim.method="SANN", control=list(maxit=1000)) # should be similar Fitting of the distribution ' pois ' by matching quantiles Parameters: estimate lambda 8.251548 > > # should give similar results for big samples > > proc.time() user system elapsed 2.51 0.26 2.81