R Under development (unstable) (2024-07-10 r86888 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(fitdistrplus)
Loading required package: MASS
Loading required package: survival
> nboot <- 1000
> nboot <- 10
> nsample <- 10
> 
> set.seed(123)
> 
> #manual implementation
> geomeanspacing <- function(pdist, obs, ...)
+ {
+   sx <- c(-Inf, sort(obs), Inf)
+   n <- length(sx)
+   Di <- pdist(sx[-1], ...) - pdist(sx[-n], ...)
+   mean(log(Di))
+ }
> 
> #--------------------------------------------------------
> # exponential sample
> 
> x1 <- rexp(nsample)
> lam <- seq(0.1, 2, length=51)
> Sn <- sapply(lam, function(x) geomeanspacing(pexp, obs=x1, rate=x))
> 
> Dn <- function(theta)
+   -geomeanspacing(pexp, obs=x1, rate=theta[1])
> #theoretical optimum
> Dn(1/mean(x1))
[1] 3.090208
> 
> #check curve behavior
> par(mar=c(4,4,2,1))
> plot(lam, Sn, type="l", xlab="theta", main="Average spacing logarithm")
> abline(v=1, col="green")
> abline(v=msedist(x1, "exp")$estimate, lty=2, col="blue")
> legend("bottomright", lty=1:2, col=c("green", "blue"), leg=c("theoretical value", "fitted value"))
> 
> msedist(x1, "exp", control=list(trace=0, REPORT=1))
$estimate
    rate 
1.336924 

$convergence
[1] 0

$value
[1] 3.078204

$hessian
          rate
rate 0.4930124

$optim.function
[1] "optim"

$optim.method
[1] "BFGS"

$fix.arg
NULL

$fix.arg.fun
NULL

$weights
NULL

$counts
function gradient 
      11        6 

$optim.message
NULL

$loglik
[1] -5.610213

$phidiv
[1] "KL"

$power.phidiv
NULL

> 
> mse_exp <- fitdist(x1, "exp", method="mse")
> plot(mse_exp)
> summary(mse_exp)
Parameters : 
     estimate
rate 1.336924
Loglikelihood:  -5.610213   AIC:  13.22043   BIC:  13.52301 
> gofstat(mse_exp)
Goodness-of-fit statistics
                             1-mse-exp
Kolmogorov-Smirnov statistic 0.2549881
Cramer-von Mises statistic   0.1544480
Anderson-Darling statistic   1.0571407

Goodness-of-fit criteria
                               1-mse-exp
Akaike's Information Criterion  13.22043
Bayesian Information Criterion  13.52301
> 
> mse_exp_boot <- bootdist(mse_exp, niter = nboot)
> plot(mse_exp_boot)
> abline(v=1, col="green")
> abline(v=msedist(x1, "exp")$estimate, lty=2, col="blue")
> legend("bottomright", lty=1:2, col=c("green", "blue"), leg=c("theoretical value", "fitted value"))
> 
> 
> # library(BMT)
> # x <- rBMT(1e3, 1/4, 3/4)
> # 
> # BMTfit.mpse(x)
> # fitdist(x, "BMT", method="mse", start=list(p3=1/2, p4=1/2, p1=-1/2, p2=1/2), lower=c(0, 0, -Inf, 0), 
> #         upper=c(1,1,0, Inf))
> # pBMT(x, p3=1/2, p4=1/2, p1=-1/2, p2=1/2)
> 
> 
> 
> 
> 
> try(msedist(c(x1, "a"), "gamma"))
Error in msedist(c(x1, "a"), "gamma") : 
  data must be a numeric vector of length greater than 1 for non censored data
            or a dataframe with two columns named left and right and more than one line for censored data
> try(msedist(c(x1, NA), "gamma"))
Error in checkUncensoredNAInfNan(data) : data contain NA values.
> try(msedist(c(x1, Inf), "gamma"))
Error in checkUncensoredNAInfNan(data) : 
  data contain Inf (infinite) values.
> try(msedist(c(x1, -Inf), "gamma"))
Error in checkUncensoredNAInfNan(data) : 
  data contain Inf (infinite) values.
> try(msedist(c(x1, NaN), "gamma"))
Error in checkUncensoredNAInfNan(data) : 
  data contain NaN (not a numeric) values.
> 
> 
> 
> #--------------------------------------------------------
> # lognormal sample
> 
> 
> x1 <- rlnorm(nsample, 0, 1)
> mu <- seq(-1, 1, length=51)
> Sn <- sapply(mu, 
+              function(x) geomeanspacing(plnorm, obs=x1, mean=x, sd=1))
> 
> 
> Dn <- function(theta)
+   -geomeanspacing(plnorm, obs=x1, mean=theta[1], sd=theta[2])
> 
> plot(mu, Sn, type="l")
> abline(v=0)
> 
> 
> optim(c(2,2), Dn)
$par
[1] -0.5939607  0.6723916

$value
[1] 2.614255

$counts
function gradient 
      81       NA 

$convergence
[1] 0

$message
NULL

> msedist(x1, "lnorm", control=list(trace=0, REPORT=1))
$estimate
   meanlog      sdlog 
-0.5939281  0.6723368 

$convergence
[1] 0

$value
[1] 2.614255

$hessian
           meanlog      sdlog
meanlog 2.12465975 0.01259491
sdlog   0.01259491 3.14939812

$optim.function
[1] "optim"

$optim.method
[1] "Nelder-Mead"

$fix.arg
NULL

$fix.arg.fun
NULL

$weights
NULL

$counts
function gradient 
      47       NA 

$optim.message
NULL

$loglik
[1] -2.380166

$phidiv
[1] "KL"

$power.phidiv
NULL

> 
> 
> mse_lnorm <- fitdist(x1, "lnorm", method="mse")
> mle_lnorm <- fitdist(x1, "lnorm", method="mle")
> plot(mse_lnorm)
> summary(mse_lnorm)
Parameters : 
          estimate
meanlog -0.5939281
sdlog    0.6723368
Loglikelihood:  -2.380166   AIC:  8.760331   BIC:  9.365502 
> cdfcomp(list(mse_lnorm, mle_lnorm))
> gofstat(list(mse_lnorm, mle_lnorm))
Goodness-of-fit statistics
                             1-mse-lnorm 2-mle-lnorm
Kolmogorov-Smirnov statistic  0.16002509  0.15515574
Cramer-von Mises statistic    0.03898949  0.02565167
Anderson-Darling statistic    0.26173571  0.18525761

Goodness-of-fit criteria
                               1-mse-lnorm 2-mle-lnorm
Akaike's Information Criterion    8.760331    7.735518
Bayesian Information Criterion    9.365502    8.340688
> mse_lnorm_boot <- bootdist(mse_lnorm, niter = nboot)
> par(mar=c(4,4,2,1))
> plot(mse_lnorm_boot, enhance = TRUE, trueval=c(0,1))
> 
> #--------------------------------------------------------
> # Pareto sample
> 
> library(actuar)

Attaching package: 'actuar'

The following objects are masked from 'package:stats':

    sd, var

The following object is masked from 'package:grDevices':

    cm

> 
> x1 <- rburr(nsample, 2,2,2)
> 
> Dn <- function(theta)
+   -geomeanspacing(pburr, obs=x1, shape1=theta[1], shape2=theta[2], rate=theta[3])
> Dn(c(1,1,10))
[1] 3.408594
> 
> optim(c(1,1,10), Dn)
$par
[1] 0.609447 3.343173 3.811287

$value
[1] 2.738615

$counts
function gradient 
     126       NA 

$convergence
[1] 0

$message
NULL

Warning messages:
1: In pdist(sx[-1], ...) : NaNs produced
2: In pdist(sx[-n], ...) : NaNs produced
3: In pdist(sx[-1], ...) : NaNs produced
4: In pdist(sx[-n], ...) : NaNs produced
> 
> msedist(x1, "burr", start=list(shape1=1, shape2=1, rate=10), control=list(trace=0, REPORT=1))
$estimate
  shape1   shape2     rate 
0.609447 3.343173 3.811287 

$convergence
[1] 0

$value
[1] 2.738615

$hessian
          shape1     shape2       rate
shape1 2.3885660 0.29619164 0.53774428
shape2 0.2961916 0.09251466 0.02910551
rate   0.5377443 0.02910551 0.19023261

$optim.function
[1] "optim"

$optim.method
[1] "Nelder-Mead"

$fix.arg
NULL

$fix.arg.fun
NULL

$weights
NULL

$counts
function gradient 
     126       NA 

$optim.message
NULL

$loglik
[1] 3.023078

$phidiv
[1] "KL"

$power.phidiv
NULL

> 
> 
> mse_burr <- fitdist(x1, "burr", method="mse", start=list(shape1=1, shape2=1, rate=10))
> mle_burr <- fitdist(x1, "burr", method="mle", start=list(shape1=1, shape2=1, rate=10))
> plot(mse_burr)
> summary(mse_burr)
Parameters : 
       estimate
shape1 0.609447
shape2 3.343173
rate   3.811287
Loglikelihood:  3.023078   AIC:  -0.046155   BIC:  0.8616003 
> cdfcomp(list(mse_burr, mle_burr))
> gofstat(list(mse_burr, mle_burr))
Goodness-of-fit statistics
                             1-mse-burr 2-mle-burr
Kolmogorov-Smirnov statistic 0.20751258 0.15833478
Cramer-von Mises statistic   0.07113128 0.03458676
Anderson-Darling statistic   0.40398658 0.28801421

Goodness-of-fit criteria
                               1-mse-burr  2-mle-burr
Akaike's Information Criterion -0.0461550 -0.99734543
Bayesian Information Criterion  0.8616003 -0.08959015
> mse_burr_boot <- bootdist(mse_burr, niter = pmin(nboot,100))
> plot(mse_burr_boot, enhance = TRUE, trueval=c(2,2,2))
> 
> 
> 
> #--------------------------------------------------------
> # Poisson sample
> 
> x1 <- rpois(nsample, 15)
> 
> geomeanSpacingUnique <- function(pdist, obs, ...)
+ {
+   sx <- c(-Inf, unique(sort(obs)), Inf)
+   n <- length(sx)
+   Di <- pdist(sx[-1], ...) - pdist(sx[-n], ...)
+   mean(log(Di))
+ }
> 
> geomeanSpacingWeight <- function(pdist, obs, weights, ...)
+ {
+   sx <- c(-Inf, unique(sort(obs)), Inf)
+   weights <- c(1, weights)
+   n <- length(sx)
+   Di <- pdist(sx[-1], ...) - pdist(sx[-n], ...)
+   mean(weights*log(Di))
+ }
> 
> DnUnique <- function(theta)
+   -geomeanSpacingUnique(ppois, obs=x1, lambda=theta[1])
> 
> DnWeight <- function(theta, weights)
+   -geomeanSpacingWeight(ppois, obs=x1, lambda=theta[1], weights=weights)
> 
> 
> optimize(DnWeight, c(1, 30), weights=as.numeric(table(x1)))
$minimum
[1] 15.86236

$objective
[1] 2.796532

> optimize(DnUnique, c(1, 30))
$minimum
[1] 15.49055

$objective
[1] 2.581978

> optimize(Dn, c(1, 30)) #does not converge
$minimum
[1] 29.99993

$objective
[1] NA

There were 27 warnings (use warnings() to see them)
> 
> mle_pois1 <- fitdist(x1, "pois", method="mle")
> #no weight
> mse_pois1 <- fitdist(x1, "pois", method="mse")
> #with weight
> mse_pois2 <- fitdist(unique(sort(x1)), "pois", method="mse", weights=as.numeric(table(x1)))
Warning message:
In msedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg,  :
  weights are not taken into account in the default initial values
> plot(mse_pois1)
> plot(mse_pois2)
> summary(mse_pois1)
Parameters : 
       estimate
lambda 15.49046
Loglikelihood:  -29.76579   AIC:  61.53157   BIC:  61.83416 
> gofstat(mse_pois1)
Chi-squared statistic:  6.798192 
Degree of freedom of the Chi-squared distribution:  3 
Chi-squared p-value:  0.07861594 
   the p-value may be wrong with some theoretical counts < 5  
Chi-squared table:
      obscounts theocounts
<= 9  2.0000000  0.5544272
<= 13 2.0000000  2.6248528
<= 17 2.0000000  3.8811167
<= 18 2.0000000  0.7716062
> 18  2.0000000  2.1679971

Goodness-of-fit criteria
                               1-mse-pois
Akaike's Information Criterion   61.53157
Bayesian Information Criterion   61.83416
> gofstat(mse_pois2)
Chi-squared statistic:  6.650898 
Degree of freedom of the Chi-squared distribution:  2 
Chi-squared p-value:  0.03595636 
   the p-value may be wrong with some theoretical counts < 5  
Chi-squared table:
      obscounts theocounts
<= 9  2.0000000  0.4168917
<= 13 2.0000000  2.1558228
<= 17 2.0000000  3.4765332
> 17  3.0000000  2.9507523

Goodness-of-fit criteria
                               1-mse-pois
Akaike's Information Criterion   56.74383
Bayesian Information Criterion   56.94105
> 
> par(mfrow=c(1,1))
> cdfcomp(list(mle_pois1, mse_pois1), addlegend = FALSE, fitlty = 1)
> curve(ppois(x, lambda=mse_pois2$estimate), type="s", col="blue", add=TRUE)
> legend("bottomright", lty=1, col=c("red", "green", "blue"), leg=c("MLE", "MSE no weight", "MSE with weight"))
> 
> 
> 
> #--------------------------------------------------------
> # real dataset
> # library(CASdatasets)
> # data("ushustormloss")
> # x <- ushustormloss$Normalized.CL05
> # 
> # plot(Normalized.CL05 ~ Year, data=ushustormloss, type="h", main="Normalized Hurricane Damages in United States")
> # 
> # mse_burr <- fitdist(x, "burr", method="mse", start=list(shape1=1, shape2=1, rate=10), lower=0)
> # mle_burr0 <- fitdist(x, "burr", method="mle", start=list(shape1=1, shape2=1, rate=10), lower=0)
> # 
> # cbind(MSE=coef(mse_burr), MLE=coef(mle_burr0))
> # 
> # 
> # setwd("~/Desktop/")
> # par(mar=c(4,4,2,1))
> # pdf("Ushustorm-cdfcomp.pdf", 6, 6)
> # cdfcomp(list(mse_burr, mle_burr0), xlogscale = TRUE, do.points = FALSE)
> # dev.off()
> # pdf("Ushustorm-qqcomp.pdf", 6, 6)
> # qqcomp(list(mse_burr, mle_burr0), xlogscale=TRUE, ylogscale=TRUE)
> # dev.off()
> # pdf("Ushustorm-ppcomp.pdf", 6, 6)
> # ppcomp(list(mse_burr, mle_burr0))
> # dev.off()
> # 
> # gofstat(list(mse_burr, mle_burr0))
> # 
> # mse_iburr <- fitdist(x, "invburr", method="mse", start=list(shape1=1, shape2=1, rate=10), lower=0)
> # mle_iburr0 <- fitdist(x, "invburr", method="mle", start=list(shape1=1, shape2=1, rate=10), lower=0)
> # 
> # gofstat(list(mse_iburr, mle_iburr0))
> # cdfcomp(list(mse_iburr, mle_iburr0))
> 
> 
> proc.time()
   user  system elapsed 
   3.07    0.35    3.43