R Under development (unstable) (2025-01-06 r87534 ucrt) -- "Unsuffered Consequences" Copyright (C) 2025 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > require("fitdistrplus") Loading required package: fitdistrplus Loading required package: MASS Loading required package: survival > nsample <- 10 > > # (1) basic fit of a normal distribution > # > > set.seed(1234) > x1 <- rnorm(n=nsample) > qmedist(x1, "norm", probs=c(1/3, 2/3)) $estimate mean sd -0.1486435 0.9892013 $convergence [1] 0 $value [1] 2.1564e-10 $hessian mean sd mean 2.000000e+00 2.774033e-14 sd 2.774033e-14 3.710520e-01 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 55 NA $optim.message NULL $loglik [1] -13.92195 $probs [1] 0.3333333 0.6666667 > > #fitted coef is -0.1486435 0.9892013, fitted loglik is -13.92195 > > # (2) defining your own distribution functions, here for the Gumbel > # distribution for other distributions, see the CRAN task view dedicated > # to probability distributions > > dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) > qgumbel <- function(p, a, b) a - b*log(-log(p)) > qmedist(x1, "gumbel", probs=c(1/3, 2/3), start=list(a=10,b=5)) $estimate a b -0.4935571 0.8557281 $convergence [1] 0 $value [1] 1.367933e-06 $hessian a b a 2.0000000 0.8086726 b 0.8086726 0.8237492 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 61 NA $optim.message NULL $loglik [1] -16.79828 $probs [1] 0.3333333 0.6666667 > > #fitted coef is -0.4935571 0.8557281, fitted loglik is -16.79828 > > # (3) fit a discrete distribution (Poisson) > # > > set.seed(1234) > x2 <- rpois(n=nsample,lambda = 2) > qmedist(x2, "pois", probs=1/2) $estimate lambda 1.7 $convergence [1] 0 $value [1] 0 $hessian lambda lambda 0 $optim.function [1] "optim" $optim.method [1] "BFGS" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 1 1 $optim.message NULL $loglik [1] -15.31626 $probs [1] 0.5 > > #fitted coef is 1.7, fitted loglik is -15.31626 > > # (4) fit a finite-support distribution (beta) > # > > set.seed(1234) > x3 <- rbeta(n=nsample, shape1=5, shape2=10) > qmedist(x3, "beta", probs=c(1/3, 2/3)) $estimate shape1 shape2 4.010999 8.397853 $convergence [1] 0 $value [1] 2.49844e-11 $hessian shape1 shape2 shape1 0.006627461 -0.003061615 shape2 -0.003061615 0.001438511 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 61 NA $optim.message NULL $loglik [1] 6.642124 $probs [1] 0.3333333 0.6666667 > > #fitted coef is 4.010999 8.397853, fitted loglik is 6.642124 > > # (5) fit frequency distributions on USArrests dataset. > # > > x4 <- USArrests$Assault > qmedist(x4, "pois", probs=1/2) $estimate lambda 170.76 $convergence [1] 0 $value [1] 144 $hessian lambda lambda 0 $optim.function [1] "optim" $optim.method [1] "BFGS" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 1 1 $optim.message NULL $loglik [1] -1211.705 $probs [1] 0.5 > qmedist(x4, "nbinom", probs=c(1/3, 2/3)) $estimate size mu 2.518966 182.313344 $convergence [1] 0 $value [1] 0.1111111 $hessian size mu size 0 0 mu 0 0 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 37 NA $optim.message NULL $loglik [1] -292.5969 $probs [1] 0.3333333 0.6666667 > > #fitted coef is 2.518966 182.313344, fitted loglik is 292.5969 > > # (6) normal mixture > # > > #mixture of two normal distributions > #density > dnorm2 <- function(x, poid, m1, s1, m2, s2) + poid*dnorm(x, m1, s1) + (1-poid)*dnorm(x, m2, s2) > #numerically approximate quantile function > qnorm2 <- function(p, poid, m1, s1, m2, s2) + { + L2 <- function(x, prob) + (prob - pnorm2(x, poid, m1, s1, m2, s2))^2 + sapply(p, function(pr) optimize(L2, c(-20, 30), prob=pr)$minimum) + } > #distribution function > pnorm2 <- function(q, poid, m1, s1, m2, s2) + poid*pnorm(q, m1, s1) + (1-poid)*pnorm(q, m2, s2) > > > #basic normal distribution > x <- c(rnorm(nsample, 5), rnorm(nsample, 10)) > #QME > fit2 <- qmedist(x, "norm2", probs=c(1/6, 1/4, 1/3, 1/2, 2/3), + start=list(poid=1/3, m1=4, s1=2, m2=8, s2=2), + lower=c(0, 0, 0, 0, 0), upper=c(1/2, Inf, Inf, Inf, Inf)) > > fit2 $estimate poid m1 s1 m2 s2 0.3433539 4.2872486 0.3891164 9.2598667 1.7948558 $convergence [1] 0 $value [1] 4.943389e-12 $hessian NULL $optim.function [1] "constrOptim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 990 NA $optim.message NULL $loglik [1] -38.14105 $probs [1] 0.1666667 0.2500000 0.3333333 0.5000000 0.6666667 > #fitted coef is 0.3433528 4.2872449 0.3891135 9.2598612 1.7948554, fitted loglik is -38.14106 > > # (7) test error messages > # > > dnorm3 <- qnorm3 <- function(x, a) + "NA" > x <- rexp(nsample) > > #should get a one-line error > res <- qmedist(x, "norm3", start=list(a=1), probs=1/2) > #as in > attr(try(log("a"), silent=TRUE), "condition") > > > > > > try(qmedist(c(x1, "a"), "gamma", probs=1/2)) Error in qmedist(c(x1, "a"), "gamma", probs = 1/2) : data must be a numeric vector of length greater than 1 for non censored data or a dataframe with two columns named left and right and more than one line for censored data > try(qmedist(c(x1, NA), "gamma", probs=1/2)) Error in checkUncensoredNAInfNan(data) : data contain NA values. > try(qmedist(c(x1, Inf), "gamma", probs=1/2)) Error in checkUncensoredNAInfNan(data) : data contain Inf (infinite) values. > try(qmedist(c(x1, -Inf), "gamma", probs=1/2)) Error in checkUncensoredNAInfNan(data) : data contain Inf (infinite) values. > try(qmedist(c(x1, NaN), "gamma", probs=1/2)) Error in checkUncensoredNAInfNan(data) : data contain NaN (not a numeric) values. > > > # (8) weighted QME > # > n <- 1e6 > x <- rpois(nsample, 10) > xtab <- table(x) > xval <- sort(unique(x)) > f1 <- qmedist(x, "pois", start=list(lambda=mean(x)), lower=0, upper=100, probs=1/2) #, control=list(trace=1, REPORT=1) Warning message: In qmedist(x, "pois", start = list(lambda = mean(x)), lower = 0, : The BFGS method cannot be used with bounds without provided the gradient. The method is changed to L-BFGS-B. > f2 <- qmedist(xval, "pois", weights=xtab, start=list(lambda=mean(x)), probs=1/2) Warning message: In qmedist(xval, "pois", weights = xtab, start = list(lambda = mean(x)), : weights are not taken into account in the default initial values > > f1$estimate lambda 9.6 > f2$estimate #should be identical lambda 9.6 > > x <- rexp(nsample) > f3 <- qmedist(x, "exp", probs=1/2) > w4 <- c(rep(1, nsample/2), round(sqrt(1:(nsample/2)))) > f4 <- qmedist(x, "exp", weights=w4, probs=1/2) Warning message: In qmedist(x, "exp", weights = w4, probs = 1/2) : weights are not taken into account in the default initial values > c(f3$estimate, f4$estimate) rate rate 0.4809858 0.4860816 > > c(f3$loglik, f4$loglik) [1] -14.43196 -18.91757 > > #fitted coef is 0.4816191, fitted loglik is -16.95355 > > > #try non integer weights > try(qmedist(x, "exp", weights=c(rep(1, n/2), sqrt(1:(n/2))), probs=1/2)) Error in qmedist(x, "exp", weights = c(rep(1, n/2), sqrt(1:(n/2))), probs = 1/2) : weights should be a vector of (strictly) positive integers > > # (9) test the component optim.message > x <- rnorm(nsample) > #change parameter to obtain unsuccessful convergence > qmedist(x, "norm", probs=1:2/3, control=list(maxit=2), start=list(mean=1e5, sd=1), optim.method="L-BFGS-B", lower=0) $estimate mean sd 0.7855743 1.5281452 $convergence [1] 1 $value [1] 0.02628327 $hessian mean sd mean 2.000000e+00 -1.431494e-11 sd -1.431494e-11 3.710520e-01 $optim.function [1] "optim" $optim.method [1] "L-BFGS-B" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 11 11 $optim.message [1] "NEW_X" $loglik [1] -16.67426 $probs [1] 0.3333333 0.6666667 > > # (10) test bounds > x <- rnorm(nsample) > qmedist(x, "norm", probs=1:2/3, optim.method="L-BFGS-B", lower=c(-Inf, 0)) #via optim $estimate mean sd 0.2474441 0.9306178 $convergence [1] 0 $value [1] 1.016007e-21 $hessian mean sd mean 2.000000e+00 -2.774033e-14 sd -2.774033e-14 3.710520e-01 $optim.function [1] "optim" $optim.method [1] "L-BFGS-B" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 6 6 $optim.message [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH" $loglik [1] -13.62491 $probs [1] 0.3333333 0.6666667 > qmedist(x, "norm", probs=1:2/3, optim.method="Nelder", lower=c(-Inf, 0)) #via constrOptim $estimate mean sd 0.2474440 0.9306166 $convergence [1] 0 $value [1] 2.550675e-13 $hessian NULL $optim.function [1] "constrOptim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 122 NA $optim.message NULL $loglik [1] -13.62491 $probs [1] 0.3333333 0.6666667 > > > # (11) large sample size issue > > if(FALSE) + { + set.seed(123) + obs <- rlnorm(1e7, 3, 2) + for(i in 2:7) + cat(i, try(qmedist(obs[1:10^i], "lnorm", probs=1:2/3, control=list(trace=0, REPORT=1))$estimate, silent=TRUE), "\n") + + # 2 3.109257 1.767013 + # 3 3.022615 1.857885 + # 4 2.999376 1.978701 + # 5 3.003344 2.00941 + # 6 2.99881 2.001733 + # 7 2.999859 2.000227 + } > > > proc.time() user system elapsed 2.53 0.18 2.70