R Under development (unstable) (2024-07-10 r86888 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(fitdistrplus) Loading required package: MASS Loading required package: survival > nboot <- 1000 > nboot <- 10 > nsample <- 10 > > set.seed(123) > > #manual implementation > geomeanspacing <- function(pdist, obs, ...) + { + sx <- c(-Inf, sort(obs), Inf) + n <- length(sx) + Di <- pdist(sx[-1], ...) - pdist(sx[-n], ...) + mean(log(Di)) + } > > #-------------------------------------------------------- > # exponential sample > > x1 <- rexp(nsample) > lam <- seq(0.1, 2, length=51) > Sn <- sapply(lam, function(x) geomeanspacing(pexp, obs=x1, rate=x)) > > Dn <- function(theta) + -geomeanspacing(pexp, obs=x1, rate=theta[1]) > #theoretical optimum > Dn(1/mean(x1)) [1] 3.090208 > > #check curve behavior > par(mar=c(4,4,2,1)) > plot(lam, Sn, type="l", xlab="theta", main="Average spacing logarithm") > abline(v=1, col="green") > abline(v=msedist(x1, "exp")$estimate, lty=2, col="blue") > legend("bottomright", lty=1:2, col=c("green", "blue"), leg=c("theoretical value", "fitted value")) > > msedist(x1, "exp", control=list(trace=0, REPORT=1)) $estimate rate 1.336924 $convergence [1] 0 $value [1] 3.078204 $hessian rate rate 0.4930124 $optim.function [1] "optim" $optim.method [1] "BFGS" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 11 6 $optim.message NULL $loglik [1] -5.610213 $phidiv [1] "KL" $power.phidiv NULL > > mse_exp <- fitdist(x1, "exp", method="mse") > plot(mse_exp) > summary(mse_exp) Parameters : estimate rate 1.336924 Loglikelihood: -5.610213 AIC: 13.22043 BIC: 13.52301 > gofstat(mse_exp) Goodness-of-fit statistics 1-mse-exp Kolmogorov-Smirnov statistic 0.2549881 Cramer-von Mises statistic 0.1544480 Anderson-Darling statistic 1.0571407 Goodness-of-fit criteria 1-mse-exp Akaike's Information Criterion 13.22043 Bayesian Information Criterion 13.52301 > > mse_exp_boot <- bootdist(mse_exp, niter = nboot) > plot(mse_exp_boot) > abline(v=1, col="green") > abline(v=msedist(x1, "exp")$estimate, lty=2, col="blue") > legend("bottomright", lty=1:2, col=c("green", "blue"), leg=c("theoretical value", "fitted value")) > > > # library(BMT) > # x <- rBMT(1e3, 1/4, 3/4) > # > # BMTfit.mpse(x) > # fitdist(x, "BMT", method="mse", start=list(p3=1/2, p4=1/2, p1=-1/2, p2=1/2), lower=c(0, 0, -Inf, 0), > # upper=c(1,1,0, Inf)) > # pBMT(x, p3=1/2, p4=1/2, p1=-1/2, p2=1/2) > > > > > > try(msedist(c(x1, "a"), "gamma")) Error in msedist(c(x1, "a"), "gamma") : data must be a numeric vector of length greater than 1 for non censored data or a dataframe with two columns named left and right and more than one line for censored data > try(msedist(c(x1, NA), "gamma")) Error in checkUncensoredNAInfNan(data) : data contain NA values. > try(msedist(c(x1, Inf), "gamma")) Error in checkUncensoredNAInfNan(data) : data contain Inf (infinite) values. > try(msedist(c(x1, -Inf), "gamma")) Error in checkUncensoredNAInfNan(data) : data contain Inf (infinite) values. > try(msedist(c(x1, NaN), "gamma")) Error in checkUncensoredNAInfNan(data) : data contain NaN (not a numeric) values. > > > > #-------------------------------------------------------- > # lognormal sample > > > x1 <- rlnorm(nsample, 0, 1) > mu <- seq(-1, 1, length=51) > Sn <- sapply(mu, + function(x) geomeanspacing(plnorm, obs=x1, mean=x, sd=1)) > > > Dn <- function(theta) + -geomeanspacing(plnorm, obs=x1, mean=theta[1], sd=theta[2]) > > plot(mu, Sn, type="l") > abline(v=0) > > > optim(c(2,2), Dn) $par [1] -0.5939607 0.6723916 $value [1] 2.614255 $counts function gradient 81 NA $convergence [1] 0 $message NULL > msedist(x1, "lnorm", control=list(trace=0, REPORT=1)) $estimate meanlog sdlog -0.5939281 0.6723368 $convergence [1] 0 $value [1] 2.614255 $hessian meanlog sdlog meanlog 2.12465975 0.01259491 sdlog 0.01259491 3.14939812 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 47 NA $optim.message NULL $loglik [1] -2.380166 $phidiv [1] "KL" $power.phidiv NULL > > > mse_lnorm <- fitdist(x1, "lnorm", method="mse") > mle_lnorm <- fitdist(x1, "lnorm", method="mle") > plot(mse_lnorm) > summary(mse_lnorm) Parameters : estimate meanlog -0.5939281 sdlog 0.6723368 Loglikelihood: -2.380166 AIC: 8.760331 BIC: 9.365502 > cdfcomp(list(mse_lnorm, mle_lnorm)) > gofstat(list(mse_lnorm, mle_lnorm)) Goodness-of-fit statistics 1-mse-lnorm 2-mle-lnorm Kolmogorov-Smirnov statistic 0.16002509 0.15515574 Cramer-von Mises statistic 0.03898949 0.02565167 Anderson-Darling statistic 0.26173571 0.18525761 Goodness-of-fit criteria 1-mse-lnorm 2-mle-lnorm Akaike's Information Criterion 8.760331 7.735518 Bayesian Information Criterion 9.365502 8.340688 > mse_lnorm_boot <- bootdist(mse_lnorm, niter = nboot) > par(mar=c(4,4,2,1)) > plot(mse_lnorm_boot, enhance = TRUE, trueval=c(0,1)) > > #-------------------------------------------------------- > # Pareto sample > > library(actuar) Attaching package: 'actuar' The following objects are masked from 'package:stats': sd, var The following object is masked from 'package:grDevices': cm > > x1 <- rburr(nsample, 2,2,2) > > Dn <- function(theta) + -geomeanspacing(pburr, obs=x1, shape1=theta[1], shape2=theta[2], rate=theta[3]) > Dn(c(1,1,10)) [1] 3.408594 > > optim(c(1,1,10), Dn) $par [1] 0.609447 3.343173 3.811287 $value [1] 2.738615 $counts function gradient 126 NA $convergence [1] 0 $message NULL Warning messages: 1: In pdist(sx[-1], ...) : NaNs produced 2: In pdist(sx[-n], ...) : NaNs produced 3: In pdist(sx[-1], ...) : NaNs produced 4: In pdist(sx[-n], ...) : NaNs produced > > msedist(x1, "burr", start=list(shape1=1, shape2=1, rate=10), control=list(trace=0, REPORT=1)) $estimate shape1 shape2 rate 0.609447 3.343173 3.811287 $convergence [1] 0 $value [1] 2.738615 $hessian shape1 shape2 rate shape1 2.3885660 0.29619164 0.53774428 shape2 0.2961916 0.09251466 0.02910551 rate 0.5377443 0.02910551 0.19023261 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 126 NA $optim.message NULL $loglik [1] 3.023078 $phidiv [1] "KL" $power.phidiv NULL > > > mse_burr <- fitdist(x1, "burr", method="mse", start=list(shape1=1, shape2=1, rate=10)) > mle_burr <- fitdist(x1, "burr", method="mle", start=list(shape1=1, shape2=1, rate=10)) > plot(mse_burr) > summary(mse_burr) Parameters : estimate shape1 0.609447 shape2 3.343173 rate 3.811287 Loglikelihood: 3.023078 AIC: -0.046155 BIC: 0.8616003 > cdfcomp(list(mse_burr, mle_burr)) > gofstat(list(mse_burr, mle_burr)) Goodness-of-fit statistics 1-mse-burr 2-mle-burr Kolmogorov-Smirnov statistic 0.20751258 0.15833478 Cramer-von Mises statistic 0.07113128 0.03458676 Anderson-Darling statistic 0.40398658 0.28801421 Goodness-of-fit criteria 1-mse-burr 2-mle-burr Akaike's Information Criterion -0.0461550 -0.99734543 Bayesian Information Criterion 0.8616003 -0.08959015 > mse_burr_boot <- bootdist(mse_burr, niter = pmin(nboot,100)) > plot(mse_burr_boot, enhance = TRUE, trueval=c(2,2,2)) > > > > #-------------------------------------------------------- > # Poisson sample > > x1 <- rpois(nsample, 15) > > geomeanSpacingUnique <- function(pdist, obs, ...) + { + sx <- c(-Inf, unique(sort(obs)), Inf) + n <- length(sx) + Di <- pdist(sx[-1], ...) - pdist(sx[-n], ...) + mean(log(Di)) + } > > geomeanSpacingWeight <- function(pdist, obs, weights, ...) + { + sx <- c(-Inf, unique(sort(obs)), Inf) + weights <- c(1, weights) + n <- length(sx) + Di <- pdist(sx[-1], ...) - pdist(sx[-n], ...) + mean(weights*log(Di)) + } > > DnUnique <- function(theta) + -geomeanSpacingUnique(ppois, obs=x1, lambda=theta[1]) > > DnWeight <- function(theta, weights) + -geomeanSpacingWeight(ppois, obs=x1, lambda=theta[1], weights=weights) > > > optimize(DnWeight, c(1, 30), weights=as.numeric(table(x1))) $minimum [1] 15.86236 $objective [1] 2.796532 > optimize(DnUnique, c(1, 30)) $minimum [1] 15.49055 $objective [1] 2.581978 > optimize(Dn, c(1, 30)) #does not converge $minimum [1] 29.99993 $objective [1] NA There were 27 warnings (use warnings() to see them) > > mle_pois1 <- fitdist(x1, "pois", method="mle") > #no weight > mse_pois1 <- fitdist(x1, "pois", method="mse") > #with weight > mse_pois2 <- fitdist(unique(sort(x1)), "pois", method="mse", weights=as.numeric(table(x1))) Warning message: In msedist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, : weights are not taken into account in the default initial values > plot(mse_pois1) > plot(mse_pois2) > summary(mse_pois1) Parameters : estimate lambda 15.49046 Loglikelihood: -29.76579 AIC: 61.53157 BIC: 61.83416 > gofstat(mse_pois1) Chi-squared statistic: 6.798192 Degree of freedom of the Chi-squared distribution: 3 Chi-squared p-value: 0.07861594 the p-value may be wrong with some theoretical counts < 5 Chi-squared table: obscounts theocounts <= 9 2.0000000 0.5544272 <= 13 2.0000000 2.6248528 <= 17 2.0000000 3.8811167 <= 18 2.0000000 0.7716062 > 18 2.0000000 2.1679971 Goodness-of-fit criteria 1-mse-pois Akaike's Information Criterion 61.53157 Bayesian Information Criterion 61.83416 > gofstat(mse_pois2) Chi-squared statistic: 6.650898 Degree of freedom of the Chi-squared distribution: 2 Chi-squared p-value: 0.03595636 the p-value may be wrong with some theoretical counts < 5 Chi-squared table: obscounts theocounts <= 9 2.0000000 0.4168917 <= 13 2.0000000 2.1558228 <= 17 2.0000000 3.4765332 > 17 3.0000000 2.9507523 Goodness-of-fit criteria 1-mse-pois Akaike's Information Criterion 56.74383 Bayesian Information Criterion 56.94105 > > par(mfrow=c(1,1)) > cdfcomp(list(mle_pois1, mse_pois1), addlegend = FALSE, fitlty = 1) > curve(ppois(x, lambda=mse_pois2$estimate), type="s", col="blue", add=TRUE) > legend("bottomright", lty=1, col=c("red", "green", "blue"), leg=c("MLE", "MSE no weight", "MSE with weight")) > > > > #-------------------------------------------------------- > # real dataset > # library(CASdatasets) > # data("ushustormloss") > # x <- ushustormloss$Normalized.CL05 > # > # plot(Normalized.CL05 ~ Year, data=ushustormloss, type="h", main="Normalized Hurricane Damages in United States") > # > # mse_burr <- fitdist(x, "burr", method="mse", start=list(shape1=1, shape2=1, rate=10), lower=0) > # mle_burr0 <- fitdist(x, "burr", method="mle", start=list(shape1=1, shape2=1, rate=10), lower=0) > # > # cbind(MSE=coef(mse_burr), MLE=coef(mle_burr0)) > # > # > # setwd("~/Desktop/") > # par(mar=c(4,4,2,1)) > # pdf("Ushustorm-cdfcomp.pdf", 6, 6) > # cdfcomp(list(mse_burr, mle_burr0), xlogscale = TRUE, do.points = FALSE) > # dev.off() > # pdf("Ushustorm-qqcomp.pdf", 6, 6) > # qqcomp(list(mse_burr, mle_burr0), xlogscale=TRUE, ylogscale=TRUE) > # dev.off() > # pdf("Ushustorm-ppcomp.pdf", 6, 6) > # ppcomp(list(mse_burr, mle_burr0)) > # dev.off() > # > # gofstat(list(mse_burr, mle_burr0)) > # > # mse_iburr <- fitdist(x, "invburr", method="mse", start=list(shape1=1, shape2=1, rate=10), lower=0) > # mle_iburr0 <- fitdist(x, "invburr", method="mle", start=list(shape1=1, shape2=1, rate=10), lower=0) > # > # gofstat(list(mse_iburr, mle_iburr0)) > # cdfcomp(list(mse_iburr, mle_iburr0)) > > > proc.time() user system elapsed 3.07 0.35 3.43