R Under development (unstable) (2024-07-10 r86888 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(fitdistrplus) Loading required package: MASS Loading required package: survival > > > # (1) fit of two distributions by maximum likelihood estimation > # to the serving size data > # and comparison of goodness-of-fit statistics > # > > data(groundbeef) > serving <- groundbeef$serving > (fitg <- fitdist(serving, "gamma")) Fitting of the distribution ' gamma ' by maximum likelihood Parameters: estimate Std. Error shape 3.99526328 0.340187063 rate 0.05423688 0.004919331 > gg <- gofstat(fitg) > (fitln <- fitdist(serving, "lnorm")) Fitting of the distribution ' lnorm ' by maximum likelihood Parameters: estimate Std. Error meanlog 4.1693701 0.03366988 sdlog 0.5366095 0.02380783 > gn <- gofstat(fitln) > > gofstat(list(fitg, fitln)) Goodness-of-fit statistics 1-mle-gamma 2-mle-lnorm Kolmogorov-Smirnov statistic 0.1277015 0.1493090 Cramer-von Mises statistic 0.6918724 0.8277358 Anderson-Darling statistic 3.5561864 4.5436542 Goodness-of-fit criteria 1-mle-gamma 2-mle-lnorm Akaike's Information Criterion 2511.252 2526.639 Bayesian Information Criterion 2518.326 2533.713 > > # (2) fit of two discrete distributions to toxocara data > # and comparison of goodness-of-fit statistics > # > > data(toxocara) > number <- toxocara$number > > fitp <- fitdist(number, "pois") > summary(fitp) Fitting of the distribution ' pois ' by maximum likelihood Parameters : estimate Std. Error lambda 8.679245 0.4046719 Loglikelihood: -507.5334 AIC: 1017.067 BIC: 1019.037 > plot(fitp) > gp <- gofstat(fitp) > gp Chi-squared statistic: 31256.96 Degree of freedom of the Chi-squared distribution: 5 Chi-squared p-value: 0 the p-value may be wrong with some theoretical counts < 5 Chi-squared table: obscounts theocounts <= 0 14.000000000 0.009014207 <= 1 8.000000000 0.078236512 <= 3 6.000000000 1.321767215 <= 4 6.000000000 2.131297776 <= 9 6.000000000 29.827829221 <= 21 6.000000000 19.626223732 > 21 7.000000000 0.005631339 Goodness-of-fit criteria 1-mle-pois Akaike's Information Criterion 1017.067 Bayesian Information Criterion 1019.037 > > fitnb <- fitdist(number, "nbinom") > summary(fitnb) Fitting of the distribution ' nbinom ' by maximum likelihood Parameters : estimate Std. Error size 0.3971457 0.08289027 mu 8.6802520 1.93501005 Loglikelihood: -159.3441 AIC: 322.6882 BIC: 326.6288 Correlation matrix: size mu size 1.0000000000 -0.0001038553 mu -0.0001038553 1.0000000000 > plot(fitnb) > gnb <- gofstat(fitnb) > gnb Chi-squared statistic: 7.48606 Degree of freedom of the Chi-squared distribution: 4 Chi-squared p-value: 0.1123255 the p-value may be wrong with some theoretical counts < 5 Chi-squared table: obscounts theocounts <= 0 14.000000 15.295027 <= 1 8.000000 5.808596 <= 3 6.000000 6.845015 <= 4 6.000000 2.407815 <= 9 6.000000 7.835196 <= 21 6.000000 8.271110 > 21 7.000000 6.537242 Goodness-of-fit criteria 1-mle-nbinom Akaike's Information Criterion 322.6882 Bayesian Information Criterion 326.6288 > > gofstat(list(fitp, fitnb)) Chi-squared statistic: 31256.96 7.48606 Degree of freedom of the Chi-squared distribution: 5 4 Chi-squared p-value: 0 0.1123255 the p-value may be wrong with some theoretical counts < 5 Chi-squared table: obscounts theo 1-mle-pois theo 2-mle-nbinom <= 0 14 0.009014207 15.295027 <= 1 8 0.078236512 5.808596 <= 3 6 1.321767215 6.845015 <= 4 6 2.131297776 2.407815 <= 9 6 29.827829221 7.835196 <= 21 6 19.626223732 8.271110 > 21 7 0.005631339 6.537242 Goodness-of-fit criteria 1-mle-pois 2-mle-nbinom Akaike's Information Criterion 1017.067 322.6882 Bayesian Information Criterion 1019.037 326.6288 > > attributes(gofstat(list(fitp, fitnb))) $names [1] "chisq" "chisqbreaks" "chisqpvalue" "chisqdf" "chisqtable" [6] "aic" "bic" "discrete" "nbfit" $class [1] "gofstat.fitdist" "fitdist" > > > # (3) Use of Chi-squared results in addition to > # recommended statistics for continuous distributions > # > > set.seed(1234) > x4 <- rweibull(n=10,shape=2,scale=1) > # fit of the good distribution > f4 <- fitdist(x4, "weibull") > g4 <- gofstat(f4, meancount=10) > print(g4) Goodness-of-fit statistics 1-mle-weibull Kolmogorov-Smirnov statistic 0.2630196 Cramer-von Mises statistic 0.1406688 Anderson-Darling statistic 0.7343746 Goodness-of-fit criteria 1-mle-weibull Akaike's Information Criterion 16.27194 Bayesian Information Criterion 16.87711 > > # fit of a bad distribution > f4b <- fitdist(x4, "cauchy") > g4b <- gofstat(f4b, meancount=10) > print(g4b) Goodness-of-fit statistics 1-mle-cauchy Kolmogorov-Smirnov statistic 0.2513339 Cramer-von Mises statistic 0.1764315 Anderson-Darling statistic 1.3801337 Goodness-of-fit criteria 1-mle-cauchy Akaike's Information Criterion 13.56483 Bayesian Information Criterion 14.17000 > > > # (4) estimation of the standard deviation of a normal distribution > # by maximum likelihood with the mean fixed at 10 using the argument fix.arg > # > f1b <- fitdist(serving, "norm", start=list(sd=5), fix.arg=list(mean=10), lower=0) Warning message: In mledist(data, distname, start = arg_startfix$start.arg, fix.arg = arg_startfix$fix.arg, : The BFGS method cannot be used with bounds without provided the gradient. The method is changed to L-BFGS-B. > gofstat(f1b) Goodness-of-fit statistics 1-mle-norm Kolmogorov-Smirnov statistic 0.5426457 Cramer-von Mises statistic 27.8433240 Anderson-Darling statistic 129.3509861 Goodness-of-fit criteria 1-mle-norm Akaike's Information Criterion 2902.585 Bayesian Information Criterion 2906.122 > > # (5) Use on a small data set (less than 10 observations) > # no pb identified > # > > set.seed(1234) > x5a <- rweibull(n=4,shape=2,scale=1) > f5a <- fitdist(x5a, "weibull") > (g5a <- gofstat(f5a)) Goodness-of-fit statistics 1-mle-weibull Kolmogorov-Smirnov statistic 0.4360008 Cramer-von Mises statistic 0.1581251 Anderson-Darling statistic 0.8276217 Goodness-of-fit criteria 1-mle-weibull Akaike's Information Criterion 6.404324 Bayesian Information Criterion 5.176913 > > x5b <- rpois(n = 4, lambda = 1) > f5b <- fitdist(x5b, "pois") > (g5b <- gofstat(f5b)) Chi-squared statistic: 0.01225515 Degree of freedom of the Chi-squared distribution: 0 The degree of freedom of the chi-squared distribution is less than 1 The number of cells is insufficient to calculate the p-value. Chi-squared table: obscounts theocounts <= 0 2.000000 1.889466 > 0 2.000000 2.110534 Goodness-of-fit criteria 1-mle-pois Akaike's Information Criterion 11.11239 Bayesian Information Criterion 10.49868 > > nsample <- 500 > nsample <- 10 > visualize <- FALSE # TRUE for manual tests with visualization of results > set.seed(1234) > > # (6) censored dataset > # > data(fluazinam) > log10EC50 <-log10(fluazinam) > > # call fitdistcens with a 'custom' optimization function > fit.1 <- fitdistcens(log10EC50, "logis") > fit.2 <- fitdistcens(log10EC50, "norm") > > gofstat(fit.1, fitnames = "logistic") Goodness-of-fit criteria logistic Akaike's Information Criterion 45.10781 Bayesian Information Criterion 46.38593 > gofstat(list(fit.1, fit.2), fitnames = c("logistic", "normal")) Goodness-of-fit criteria logistic normal Akaike's Information Criterion 45.10781 44.82424 Bayesian Information Criterion 46.38593 46.10235 > > proc.time() user system elapsed 1.65 0.17 1.79