R Under development (unstable) (2024-07-10 r86888 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(fitdistrplus)
Loading required package: MASS
Loading required package: survival
> nbboot <- 100
> nbboot <- 10
> 
> nsample <- 100
> nsample <- 10
> 
> visualize <- FALSE # TRUE for manual tests with visualization of results
> 
> # (1) basic fit of a gamma distribution by maximum likelihood estimation
> #
> data(groundbeef)
> serving <- groundbeef$serving
> fitg <- fitdist(serving, "gamma")
> summary(fitg)
Fitting of the distribution ' gamma ' by maximum likelihood 
Parameters : 
        estimate  Std. Error
shape 3.99526328 0.340187063
rate  0.05423688 0.004919331
Loglikelihood:  -1253.626   AIC:  2511.252   BIC:  2518.326 
Correlation matrix:
          shape      rate
shape 1.0000000 0.9382418
rate  0.9382418 1.0000000

> plot(fitg)
> cdfcomp(fitg, addlegend=FALSE)
> 
> #check names
> names(fitdist(serving, "gamma", optim.method="Brent", lower=0, upper=10, fix.arg=list(shape=2))$estimate)
[1] "rate"
> names(fitdist(serving, "gamma", optim.method="Nelder-Mead")$estimate)
[1] "shape" "rate" 
> names(fitdist(serving, "gamma", optim.method="BFGS")$estimate)
[1] "shape" "rate" 
> # names(fitdist(serving, "gamma", optim.method="CG", start=list(shape=4, rate=1/20))$estimate)
> if(visualize) {  # check ERROR on aarch64-apple-darwin20.4.0 (64-bit) (2021/05/12)
+   set.seed(1234)
+   names(fitdist(serving, "gamma", optim.method="L-BFGS-B", lower=0)$estimate)
+ }
> 
> 
> # (7) custom optimization function
> #
> 
> #create the sample
> set.seed(1234)
> mysample <- rexp(nsample, 5)
> mystart <- list(rate=8)
> 
> res1 <- fitdist(mysample, dexp, start= mystart, optim.method="Nelder-Mead")
> 
> #show the result
> summary(res1)
Fitting of the distribution ' exp ' by maximum likelihood 
Parameters : 
     estimate Std. Error
rate 6.578125   2.080186
Loglikelihood:  8.838553   AIC:  -15.67711   BIC:  -15.37452 
> 
> #the warning tell us to use optimise, because the Nelder-Mead is not adequate.
> 
> #to meet the standard 'fn' argument and specific name arguments, we wrap optimize, 
> myoptimize <- function(fn, par, ...) 
+ {
+   res <- optimize(f=fn, ..., maximum=FALSE)  
+   #assume the optimization function minimize
+   
+   standardres <- c(res, convergence=0, value=res$objective, 
+                    par=res$minimum, hessian=NA)
+   
+   return(standardres)
+ }
> 
> #call fitdist with a 'custom' optimization function
> res2 <- fitdist(mysample, dexp, start=mystart, custom.optim=myoptimize, 
+                 interval=c(0, 100))
> 
> #show the result
> summary(res2)
Fitting of the distribution ' exp ' by maximum likelihood 
Parameters : 
     estimate
rate 6.578815
Loglikelihood:  8.838553   AIC:  -15.67711   BIC:  -15.37452 
> 
> 
> # (8) custom optimization function - another example with the genetic algorithm
> #
> if(any(installed.packages()[,"Package"] == "rgenoud") && visualize)
+ {
+   
+   #set a sample
+   fit1 <- fitdist(serving, "gamma")
+   summary(fit1)
+   
+   #wrap genoud function rgenoud package
+   mygenoud <- function(fn, par, ...) 
+   {
+     require(rgenoud)
+     res <- genoud(fn, starting.values=par, ...)        
+     standardres <- c(res, convergence=0, counts=NULL)
+     
+     return(standardres)
+   }
+   
+   #call fitdist with a 'custom' optimization function
+   fit2 <- fitdist(serving, "gamma", custom.optim=mygenoud, nvars=2, start=as.list(fit1$estimate),
+                   Domains=cbind(c(0, 0), c(10, 10)), boundary.enforcement=1, 
+                   print.level=0, hessian=TRUE)
+   
+   summary(fit2)
+ }
> 
> # (11) Fit of a Pareto distribution by numerical moment matching estimation
> #
> if(any(installed.packages()[,"Package"] == "actuar") && visualize)
+ {
+   require(actuar)
+   #simulate a sample
+   set.seed(1234)
+   x4 <- rpareto(nsample, 6, 2)
+   
+   #empirical raw moment
+   memp <- function(x, order)
+     ifelse(order == 1, mean(x), sum(x^order)/length(x))
+   
+   #fit
+   fP <- fitdist(x4, "pareto", method="mme", order=c(1, 2), memp="memp", 
+                 start=list(shape=10, scale=10), lower=1, upper=Inf)
+   summary(fP)
+   plot(fP)
+   
+ }
> 
> 
> # (14) scaling problem - too small values
> #
> if (visualize) # LONG TO RUN ON CRAN
+ {
+   x2 <- c(-0.00707717, -0.000947418, -0.00189753,
+           -0.000474947, -0.00190205, -0.000476077, 0.00237812, 0.000949668,
+           0.000474496, 0.00284226, -0.000473149, -0.000473373, 0, 0, 0.00283688,
+           -0.0037843, -0.0047506, -0.00238379, -0.00286807, 0.000478583,
+           0.000478354, -0.00143575, 0.00143575, 0.00238835, 0.0042847,
+           0.00237248, -0.00142281, -0.00142484, 0, 0.00142484, 0.000948767,
+           0.00378609, -0.000472478, 0.000472478, -0.0014181, 0, -0.000946522,
+           -0.00284495, 0, 0.00331832, 0.00283554, 0.00141476, -0.00141476,
+           -0.00188947, 0.00141743, -0.00236351, 0.00236351, 0.00235794,
+           0.00235239, -0.000940292, -0.0014121, -0.00283019, 0.000472255,
+           0.000472032, 0.000471809, -0.0014161, 0.0014161, -0.000943842,
+           0.000472032, -0.000944287, -0.00094518, -0.00189304, -0.000473821,
+           -0.000474046, 0.00331361, -0.000472701, -0.000946074, 0.00141878,
+           -0.000945627, -0.00189394, -0.00189753, -0.0057143, -0.00143369,
+           -0.00383326, 0.00143919, 0.000479272, -0.00191847, -0.000480192,
+           0.000960154, 0.000479731, 0, 0.000479501, 0.000958313, -0.00383878,
+           -0.00240674, 0.000963391, 0.000962464, -0.00192586, 0.000481812,
+           -0.00241138, -0.00144963)
+   
+   for(i in 6:0)
+   {
+     cat("\nscaling", 10^i, "\n")
+     res <- try(mledist(x2*10^i, "cauchy"), silent=TRUE)
+     if(inherits(res, "try-error"))
+       print(res)
+     else
+     {
+       cat("estimate\n")
+       print(res$estimate)
+       cat("Hessian\n")
+       print(res$hessian)
+     }
+   }
+ }
> 
> # (15) scaling problem - too big values
> #
> if (visualize) # LONG TO RUN ON CRAN
+ {
+   x1 <- c(
+     1401928684, 1413455609, 1432458425, 1436910475, 1494883250, 1565770323, 1577486458,
+     1568908053, 1606424896, 1632264979, 1780495643, 1865525923, 2035689865, 2141429306,
+     2335443964, 2465661689, 2563368221, 2845012431, 2949890881, 3180645942, 3309009836,
+     3618581152, 4109197451, 4064662257, 4028375795, 4176781983, 4303024833, 4493470109
+   )
+   
+   
+   for(i in 0:5)
+   {
+     cat("\nscaling", 10^(-2*i), "\n")
+     res <- mledist(x1*10^(-2*i), "norm")
+     Hm1 <- try(solve(res$hessian), silent=TRUE)
+     if(inherits(Hm1, "try-error"))
+       print(Hm1)
+     else
+     {
+       cat("estimate\n")
+       print(res$estimate)
+       cat("Hessian\n")
+       print(res$hessian)
+       cat("inverse Hessian\n")
+       print(Hm1)
+     }
+   }
+   
+   fitdist(x1, "norm")
+   fitdist(x1*1e-6, "norm")
+ }
> 
> # (16) Fit of a lognormal distribution on acute toxicity values of endosulfan for
> # nonarthropod invertebrates, using maximum likelihood estimation
> # to estimate what is called a species sensitivity distribution 
> # (SSD) in ecotoxicology, followed by estimation of the 5 percent quantile value of 
> # the fitted distribution, what is called the 5 percent hazardous concentration (HC5)
> # in ecotoxicology, with its two-sided 95 percent confidence interval calculated by 
> # parametric bootstrap
> #
> data(endosulfan)
> ATV <- subset(endosulfan, group == "NonArthroInvert")$ATV
> log10ATV <- log10(subset(endosulfan, group == "NonArthroInvert")$ATV)
> fln <- fitdist(log10ATV, "norm")
> 
> quantile(fln, probs = 0.05)
Estimated quantiles for each specified probability (non-censored data)
           p=0.05
estimate 1.744227
> 
> 
> # (17) Fit of a triangular distribution using Cramer-von Mises or
> # Kolmogorov-Smirnov distance
> # 
> if(any(installed.packages()[,"Package"] == "mc2d") && visualize)
+ {
+   set.seed(1234)
+   require(mc2d)
+   t <- rtriang(100,min=5,mode=6,max=10) # nsample not used : does not converge with a too small sample
+   fCvM <- fitdist(t,"triang",method="mge",start = list(min=4, mode=6,max=9),gof="CvM")
+   fKS <- fitdist(t,"triang",method="mge",start = list(min=4, mode=6,max=9),gof="KS")
+   cdfcomp(list(fCvM,fKS))
+ }
> 
> # (18) gumbel distribution
> #
> dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b))
> pgumbel <- function(q, a, b) exp(-exp((a-q)/b))
> qgumbel <- function(p, a, b) a-b*log(-log(p))
> 
> data(danishuni)
> fitdist(danishuni$Loss, "gumbel", start=list(a=5, b=10))
Fitting of the distribution ' gumbel ' by maximum likelihood 
Parameters:
  estimate Std. Error
a 1.977520 0.03792251
b 1.738562 0.03447841
> 
> 
> # (19) check the 'start' argument
> #
> if (FALSE) # NO INTEREST WITHOUT VISUALIZATION OF THE RESULT
+ {
+   #create the sample
+   mysample <- rexp(nsample, 5)
+   mystart2 <- list(rate2=8)
+   mystart3 <- list(8)
+   
+   try( fitdist(mysample, dexp, start= mystart2, method="mle") ) 
+   try( fitdist(mysample, dexp, start= mystart3, method="mle") ) 
+   
+   try( fitdist(mysample, dexp, start= mystart2, method="mme") ) 
+   try( fitdist(mysample, dexp, start= mystart3, method="mme") ) 
+   
+   try( fitdist(mysample, dexp, start= mystart2, method="qme", probs=1/2) ) 
+   try( fitdist(mysample, dexp, start= mystart3, method="qme", probs=1/2) ) 
+   
+   try( fitdist(mysample, dexp, start= mystart2, method="mge", gof="AD") ) 
+   try( fitdist(mysample, dexp, start= mystart3, method="mge", gof="AD") ) 
+   
+ }
> 
> # (20) example with dexgauss
> # would require to suggest the package gamlss.dist in the Description file
> #
> #if(any(installed.packages()[,"Package"] == "gamlss.dist"))
> #{
> #    require(gamlss.dist)
> #    set.seed(1234)
> #    a=rexGAUS(100,mu=500,sigma=50,nu=75)
> #    fitdist(a,dexGAUS,start=list(mu=median(a),sigma=sqrt(var(a)/2),nu=sqrt(var(a)/2)))
> #}
> 
> 
> # (21) check the 'keepdata' argument
> #
> if (visualize) # REQUIRES VISUALIZATION OF THE RESULTS
+ {
+   #create the sample
+   x <- rexp(1e6, 5)
+   summary(x)
+   f1 <- fitdist(x, "exp", keepdata=FALSE)
+   f2 <- fitdist(x, "exp", keepdata=TRUE)
+   
+   par(mfrow=c(1,2))
+   cdfcomp(f1)
+   cdfcomp(f2)
+ }
> 
> # (22) relevant example for zero modified geometric distribution
> #
> dzmgeom <- function(x, p1, p2)
+ {
+   p1 * (x == 0) + (1-p1)*dgeom(x-1, p2)
+ }
> pzmgeom <- function(q, p1, p2)
+ {
+   p1 * (q >= 0) + (1-p1)*pgeom(q-1, p2)
+ }
> rzmgeom <- function(n, p1, p2)
+ {
+   u <- rbinom(n, 1, 1-p1) #prob to get zero is p1
+   u[u != 0] <- rgeom(sum(u != 0), p2)+1
+   u
+ }
> 
> x2 <- rzmgeom(nsample, 1/2, 1/10)
> table(x2)
x2
 0  3  9 20 
 7  1  1  1 
> #this is the MLE which converges almost surely and in distribution to the true value.
> initp1 <- function(x) list(p1=mean(x == 0))
> 
> fitdist(x2, "zmgeom", start=list(p1=1/2, p2=1/2))
Fitting of the distribution ' zmgeom ' by maximum likelihood 
Parameters:
     estimate Std. Error
p1 0.70003945 0.14490166
p2 0.09373461 0.05151407
> 
> f2 <- fitdist(x2, "zmgeom", fix.arg=initp1, start=list(p2=1/2))
> print(f2)
Fitting of the distribution ' zmgeom ' by maximum likelihood 
Parameters:
     estimate Std. Error
p2 0.09375271 0.05152299
Fixed parameters (computed by a user-supplied function):
   value
p1   0.7
> summary(f2)
Fitting of the distribution ' zmgeom ' by maximum likelihood 
Parameters : 
     estimate Std. Error
p2 0.09375271 0.05152299
Fixed parameters (computed by a user-supplied function):
   value
p1   0.7
Loglikelihood:  -16.06478   AIC:  34.12955   BIC:  34.43214 
> 
> f2 <- fitdist(x2, "zmgeom", fix.arg=list(p1=1/2), start=list(p2=1/2))
> print(f2)
Fitting of the distribution ' zmgeom ' by maximum likelihood 
Parameters:
     estimate Std. Error
p2 0.09375271 0.05152299
Fixed parameters:
   value
p1   0.5
> summary(f2)
Fitting of the distribution ' zmgeom ' by maximum likelihood 
Parameters : 
     estimate Std. Error
p2 0.09375271 0.05152299
Fixed parameters:
   value
p1   0.5
Loglikelihood:  -16.8876   AIC:  35.77521   BIC:  36.07779 
> 
> # (23) check the use of weights with MLE
> #
> set.seed(1234)
> x <- rpois(nsample, 10)
> xtab <- table(x)
> xval <- sort(unique(x))
> f1 <- fitdist(x, "pois")
> f2 <- fitdist(xval, "pois", weights = xtab)
Warning message:
In mledist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg,  :
  weights are not taken into account in the default initial values
> 
> f1$estimate
lambda 
   8.4 
> f2$estimate #should be identical
lambda 
   8.4 
> 
> 
> # (24) check the use of weights with other methods
> #
> 
> set.seed(1234)
> x <- rpois(nsample, 10)
> xtab <- table(x)
> xval <- sort(unique(x))
> (f1 <- fitdist(x, "norm", method = "mle"))
Fitting of the distribution ' norm ' by maximum likelihood 
Parameters:
     estimate Std. Error
mean 8.400000  0.5138093
sd   1.624808  0.3633174
> (f2 <- fitdist(xval, "norm", weights = xtab, method = "mle"))
Fitting of the distribution ' norm ' by maximum likelihood 
Parameters:
     estimate Std. Error
mean 8.400002  0.5138242
sd   1.624855  0.3633437
Warning message:
In mledist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg,  :
  weights are not taken into account in the default initial values
> 
> (f1 <- fitdist(x, "norm", method = "mme"))
Fitting of the distribution ' norm ' by matching moments 
Parameters:
     estimate Std. Error
mean 8.400000   1.624808
sd   1.624808   1.148912
> (f2 <- fitdist(xval, "norm", weights = xtab, method = "mme"))
Fitting of the distribution ' norm ' by matching moments 
Parameters:
     estimate Std. Error
mean 8.400000   1.712698
sd   1.712698        NaN
Warning messages:
1: In mmedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg,  :
  weights are not taken into account in the default initial values
2: In cov2cor(varcovar) :
  diag(V) had non-positive or NA entries; the non-finite result may be dubious
3: In sqrt(diag(varcovar)) : NaNs produced
> 
> (f1 <- fitdist(x, "norm", method = "qme", probs=c(1/4, 3/4)))
Fitting of the distribution ' norm ' by matching quantiles 
Parameters:
     estimate
mean 8.374998
sd   1.667926
> (f2 <- fitdist(xval, "norm", method = "qme", weights = xtab, probs=c(1/4, 3/4) ))
Fitting of the distribution ' norm ' by matching quantiles 
Parameters:
     estimate
mean 8.374997
sd   1.667910
Warning message:
In qmedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg,  :
  weights are not taken into account in the default initial values
> 
> fitdist(x, "norm", method="mge", gof = "CvM")
Fitting of the distribution ' norm ' by maximum goodness-of-fit 
Parameters:
     estimate
mean 8.230838
sd   1.881098
> try(fitdist(xval, "norm", method="mge", gof = "CvM", weights = xtab)) # not yet developped
Error in mgedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg,  : 
  Weights is not allowed for maximum GOF estimation
> 
> 
> # (24b) check the use of weights with qme with a discrete distribution
> #
> set.seed(1234)
> x <- rpois(nsample, 10)
> xtab <- table(x)
> xval <- sort(unique(x))
> (f1 <- fitdist(x, "pois", method = "qme", probs=c(1/2)))
Fitting of the distribution ' pois ' by matching quantiles 
Parameters:
       estimate
lambda      8.4
> (f2 <- fitdist(xval, "pois", method = "qme", weights = xtab, probs=c(1/2) )) # similar to f1
Fitting of the distribution ' pois ' by matching quantiles 
Parameters:
       estimate
lambda      8.4
Warning message:
In qmedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg,  :
  weights are not taken into account in the default initial values
> 
> fitdist(xval, "pois", method = "qme", weights = xtab, probs=c(1/2), optim.method="SANN", control=list(maxit=1000)) #
Fitting of the distribution ' pois ' by matching quantiles 
Parameters:
       estimate
lambda 7.875113
Warning message:
In qmedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg,  :
  weights are not taken into account in the default initial values
> fitdist(x, "pois", method = "qme", probs=c(1/2), optim.method="SANN", control=list(maxit=1000)) # should be similar
Fitting of the distribution ' pois ' by matching quantiles 
Parameters:
       estimate
lambda 8.251548
> 
> # should give similar results for big samples
> 
> proc.time()
   user  system elapsed 
   2.51    0.26    2.81