R Under development (unstable) (2025-01-06 r87534 ucrt) -- "Unsuffered Consequences" Copyright (C) 2025 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > require("fitdistrplus") Loading required package: fitdistrplus Loading required package: MASS Loading required package: survival > > nsample <- 10 > > # (1) Fit of a Weibull distribution to serving size data by maximum > # goodness-of-fit estimation using all the distances available > # > > data(groundbeef) > serving <- groundbeef$serving > mgedist(serving, "weibull", gof="CvM")$estimate shape scale 2.093204 82.660014 > mgedist(serving, "weibull", gof="CvM", silent = FALSE)$estimate shape scale 2.093204 82.660014 Warning messages: 1: In pweibull(c(10, 11.5, 17, 20, 20, 20, 20, 20, 20, 20, 20, 24, : NaNs produced 2: In pweibull(c(10, 11.5, 17, 20, 20, 20, 20, 20, 20, 20, 20, 24, : NaNs produced > mgedist(serving, "weibull", gof="KS")$estimate shape scale 2.065634 81.450487 > mgedist(serving, "weibull", gof="AD")$estimate shape scale 2.125425 82.890502 > mgedist(serving, "weibull", gof="ADR")$estimate shape scale 2.072035 82.762593 > mgedist(serving, "weibull", gof="ADL")$estimate shape scale 2.197498 82.016005 > mgedist(serving, "weibull", gof="AD2R")$estimate shape scale 1.90328 81.33464 > mgedist(serving, "weibull", gof="AD2L")$estimate shape scale 2.483836 78.252113 > mgedist(serving, "weibull", gof="AD2")$estimate shape scale 2.081168 85.281194 > > #fitted values are around (2.123932 82.145177) > > # (2) Fit of a uniform distribution using Cramer-von Mises or > # Kolmogorov-Smirnov distance > # > > set.seed(1234) > u <- runif(nsample, min=5, max=10) > mgedist(u, "unif", gof="CvM")$estimate min max 5.151281 9.808278 > mgedist(u, "unif", gof="KS")$estimate min max 5.840091 9.452868 > > #fitted values are around (5.495686 9.630573) > > # (3) Fit of a triangular distribution using Cramer-von Mises or > # Kolmogorov-Smirnov distance > # > require("mc2d") Loading required package: mc2d Loading required package: mvtnorm Attaching package: 'mc2d' The following objects are masked from 'package:base': pmax, pmin > set.seed(1234) > v <- rtriang(nsample, min=5, mode=6, max=10) > mgedist(v, "triang", start = list(min=4, mode=6,max=9), gof="CvM")$estimate min mode max 4.602048 7.367066 8.682229 Warning message: In checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, : Some parameter names have no starting/fixed value but have a default value: mean. > mgedist(v, "triang", start = list(min=4, mode=6,max=9), gof="KS")$estimate min mode max 5.191300 7.414283 8.306100 Warning message: In checkparamlist(arg_startfix$start.arg, arg_startfix$fix.arg, : Some parameter names have no starting/fixed value but have a default value: mean. > > #fitted values are around (4.896674 7.390675 8.494165 ) > > # (4) scaling problem > # the simulated dataset (below) has particularly small values, hence without scaling (10^0), > # the optimization raises an error. The for loop shows how scaling by 10^i > # for i=1,...,6 makes the fitting procedure work correctly. > if(FALSE) + { + set.seed(1234) + x2 <- rnorm(nsample, 1e-4, 2e-4) + for(i in 6:0) + cat(i, try(mgedist(x2*10^i,"cauchy")$estimate, silent=TRUE), "\n") + + # 6 19.86906 122.0622 + # 5 1.986906 12.20622 + # 4 0.1986906 1.220622 + # 3 0.01986906 0.1220622 + # 2 0.001986906 0.01220622 + # 1 NA NA + # 0 NA NA + } > > > # (5) scaling problem > # > if(FALSE) + { + x <- c(-0.00707717, -0.000947418, -0.00189753, + -0.000474947, -0.00190205, -0.000476077, 0.00237812, 0.000949668, + 0.000474496, 0.00284226, -0.000473149, -0.000473373, 0, 0, 0.00283688, + -0.0037843, -0.0047506, -0.00238379, -0.00286807, 0.000478583, + 0.000478354, -0.00143575, 0.00143575, 0.00238835, 0.0042847, + 0.00237248, -0.00142281, -0.00142484, 0, 0.00142484, 0.000948767, + 0.00378609, -0.000472478, 0.000472478, -0.0014181, 0, -0.000946522, + -0.00284495, 0, 0.00331832, 0.00283554, 0.00141476, -0.00141476, + -0.00188947, 0.00141743, -0.00236351, 0.00236351, 0.00235794, + 0.00235239, -0.000940292, -0.0014121, -0.00283019, 0.000472255, + 0.000472032, 0.000471809, -0.0014161, 0.0014161, -0.000943842, + 0.000472032, -0.000944287, -0.00094518, -0.00189304, -0.000473821, + -0.000474046, 0.00331361, -0.000472701, -0.000946074, 0.00141878, + -0.000945627, -0.00189394, -0.00189753, -0.0057143, -0.00143369, + -0.00383326, 0.00143919, 0.000479272, -0.00191847, -0.000480192, + 0.000960154, 0.000479731, 0, 0.000479501, 0.000958313, -0.00383878, + -0.00240674, 0.000963391, 0.000962464, -0.00192586, 0.000481812, + -0.00241138, -0.00144963) + + + #only i == 0, no scaling, should not converge. + for(i in 6:0) + cat(i, try(mgedist(x*10^i,"cauchy")$estimate, silent=TRUE), "\n") + + # 6 -283.1442 1185.903 + # 5 -28.31442 118.5903 + # 4 -2.831442 11.85903 + # 3 -0.2831442 1.185903 + # 2 -0.02831442 0.1185903 + # 1 -0.002831442 0.01185903 + # 0 NA NA + } > > > # (6) test error messages > # > > dnorm2 <- pnorm2 <- function(x, a) + "NA" > x <- rexp(10) > > #should get a one-line error > res <- mgedist(x, "norm2", start=list(a=1)) <simpleError in theop - (2 * 1:n - 1)/(2 * n): non-numeric argument to binary operator> > #as in > attr(try(log("a"), silent=TRUE), "condition") <simpleError in log("a"): non-numeric argument to mathematical function> > > > try(mgedist(c(serving, "a"), "gamma")) Error in mgedist(c(serving, "a"), "gamma") : data must be a numeric vector of length greater than 1 for non censored data or a dataframe with two columns named left and right and more than one line for censored data > try(mgedist(c(serving, NA), "gamma")) Error in checkUncensoredNAInfNan(data) : data contain NA values. > try(mgedist(c(serving, Inf), "gamma")) Error in checkUncensoredNAInfNan(data) : data contain Inf (infinite) values. > try(mgedist(c(serving, -Inf), "gamma")) Error in checkUncensoredNAInfNan(data) : data contain Inf (infinite) values. > try(mgedist(c(serving, NaN), "gamma")) Error in checkUncensoredNAInfNan(data) : data contain NaN (not a numeric) values. > > > # (7) test the component optim.message > > x <- rnorm(1000) > #change parameter to obtain unsuccessful convergence > mgedist(x, "norm", control=list(maxit=2), start=list(mean=1e5, sd=1), optim.method="L-BFGS-B", lower=0) $estimate mean sd 1e+05 1e+00 $convergence [1] 0 $value [1] 0.3333333 $hessian mean sd mean 0 0 sd 0 0 $optim.function [1] "optim" $optim.method [1] "L-BFGS-B" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 1 1 $optim.message [1] "CONVERGENCE: NORM OF PROJECTED GRADIENT <= PGTOL" $loglik [1] -Inf $gof [1] "CvM" > > # (8) test bounds > > x <- rnorm(1000) > mgedist(x, "norm", optim.method="L-BFGS-B", lower=c(-Inf, 0)) #optim and L-BFGS-B $estimate mean sd 0.01134413 0.97945748 $convergence [1] 0 $value [1] 2.003669e-05 $hessian mean sd mean 1.915741e-01 -5.237362e-05 sd -5.237362e-05 6.219275e-02 $optim.function [1] "optim" $optim.method [1] "L-BFGS-B" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 7 7 $optim.message [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH" $loglik [1] -1399.894 $gof [1] "CvM" > mgedist(x, "norm", optim.method="Nelder", lower=c(-Inf, 0)) $estimate mean sd 0.01134389 0.97945192 $convergence [1] 0 $value [1] 2.003669e-05 $hessian NULL $optim.function [1] "constrOptim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 61 NA $optim.message NULL $loglik [1] -1399.894 $gof [1] "CvM" > > # (9) numerical issue with log(1-p) > > obs <- c(rep(1, 14), 10) > try(mgedist(obs, "weibull", gof="AD", control=list(trace=1, REPORT=1))) Nelder-Mead direct search function minimizer function value for initial parameters = 0.512741 Scaled convergence tolerance is 7.64043e-09 Stepsize computed as 0.201842 BUILD 3 0.592510 0.512741 EXTENSION 5 0.542194 0.456266 LO-REDUCTION 7 0.512741 0.444880 HI-REDUCTION 9 0.462295 0.444880 HI-REDUCTION 11 0.456266 0.444880 HI-REDUCTION 13 0.447347 0.444880 HI-REDUCTION 15 0.446096 0.444880 HI-REDUCTION 17 0.445189 0.444880 HI-REDUCTION 19 0.445018 0.444880 HI-REDUCTION 21 0.444918 0.444880 HI-REDUCTION 23 0.444892 0.444880 HI-REDUCTION 25 0.444884 0.444878 LO-REDUCTION 27 0.444880 0.444878 HI-REDUCTION 29 0.444878 0.444878 REFLECTION 31 0.444878 0.444878 HI-REDUCTION 33 0.444878 0.444878 HI-REDUCTION 35 0.444878 0.444878 HI-REDUCTION 37 0.444878 0.444878 LO-REDUCTION 39 0.444878 0.444878 Exiting from Nelder Mead minimizer 41 function evaluations used $estimate shape scale 2.142788 1.244091 $convergence [1] 0 $value [1] 0.4448776 $hessian shape scale shape 0.04287606 0.3380888 scale 0.33808876 2.6665859 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 41 NA $optim.message NULL $loglik [1] -88.72866 $gof [1] "AD" > > # (10) large sample size issue > > if(FALSE) + { + set.seed(123) + obs <- rlnorm(1e6, 3, 2) + for(i in 2:6) + cat(i, try(mgedist(obs[1:10^i], "lnorm", control=list(trace=0, REPORT=1))$estimate, silent=TRUE), "\n") + + # 2 3.143734 1.819549 + # 3 3.023688 1.955416 + # 4 2.995646 1.993392 + # 5 3.002682 2.000394 + # 6 2.999036 1.999887 + # 7 3.000222 1.999981 + } > > > proc.time() user system elapsed 1.90 0.26 2.15