R Under development (unstable) (2024-07-10 r86888 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(fitdistrplus) Loading required package: MASS Loading required package: survival > nsample <- 10 > > # (1) basic fit of a normal distribution > # > > set.seed(1234) > x1 <- rnorm(n=nsample) > qmedist(x1, "norm", probs=c(1/3, 2/3)) $estimate mean sd -0.1486435 0.9892013 $convergence [1] 0 $value [1] 4.312801e-10 $hessian mean sd mean 4.000000e+00 5.548066e-14 sd 5.548066e-14 7.421040e-01 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 55 NA $optim.message NULL $loglik [1] -13.92195 $probs [1] 0.3333333 0.6666667 > > > # (2) defining your own distribution functions, here for the Gumbel > # distribution for other distributions, see the CRAN task view dedicated > # to probability distributions > > dgumbel <- function(x, a, b) 1/b*exp((a-x)/b)*exp(-exp((a-x)/b)) > qgumbel <- function(p, a, b) a - b*log(-log(p)) > qmedist(x1, "gumbel", probs=c(1/3, 2/3), start=list(a=10,b=5)) $estimate a b -0.4935571 0.8557281 $convergence [1] 0 $value [1] 2.735865e-06 $hessian a b a 4.000000 1.617345 b 1.617345 1.647498 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 61 NA $optim.message NULL $loglik [1] -16.79828 $probs [1] 0.3333333 0.6666667 > > # (3) fit a discrete distribution (Poisson) > # > > set.seed(1234) > x2 <- rpois(n=nsample,lambda = 2) > qmedist(x2, "pois", probs=1/2) $estimate lambda 1.7 $convergence [1] 0 $value [1] 0 $hessian lambda lambda 0 $optim.function [1] "optim" $optim.method [1] "BFGS" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 1 1 $optim.message NULL $loglik [1] -15.31626 $probs [1] 0.5 > > # (4) fit a finite-support distribution (beta) > # > > set.seed(1234) > x3 <- rbeta(n=nsample, shape1=5, shape2=10) > qmedist(x3, "beta", probs=c(1/3, 2/3)) $estimate shape1 shape2 4.010999 8.397853 $convergence [1] 0 $value [1] 4.99688e-11 $hessian shape1 shape2 shape1 0.01325492 -0.006123230 shape2 -0.00612323 0.002877022 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 61 NA $optim.message NULL $loglik [1] 6.642124 $probs [1] 0.3333333 0.6666667 > > > # (5) fit frequency distributions on USArrests dataset. > # > > x4 <- USArrests$Assault > qmedist(x4, "pois", probs=1/2) $estimate lambda 170.76 $convergence [1] 0 $value [1] 144 $hessian lambda lambda 0 $optim.function [1] "optim" $optim.method [1] "BFGS" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 1 1 $optim.message NULL $loglik [1] -1211.705 $probs [1] 0.5 > qmedist(x4, "nbinom", probs=c(1/3, 2/3)) $estimate size mu 2.518966 182.313344 $convergence [1] 0 $value [1] 0.2222222 $hessian size mu size 0 0 mu 0 0 $optim.function [1] "optim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 37 NA $optim.message NULL $loglik [1] -292.5969 $probs [1] 0.3333333 0.6666667 > > # (6) normal mixture > # > > #mixture of two normal distributions > #density > dnorm2 <- function(x, poid, m1, s1, m2, s2) + poid*dnorm(x, m1, s1) + (1-poid)*dnorm(x, m2, s2) > #numerically approximate quantile function > qnorm2 <- function(p, poid, m1, s1, m2, s2) + { + L2 <- function(x, prob) + (prob - pnorm2(x, poid, m1, s1, m2, s2))^2 + sapply(p, function(pr) optimize(L2, c(-20, 30), prob=pr)$minimum) + } > #distribution function > pnorm2 <- function(q, poid, m1, s1, m2, s2) + poid*pnorm(q, m1, s1) + (1-poid)*pnorm(q, m2, s2) > > > #basic normal distribution > x <- c(rnorm(nsample, 5), rnorm(nsample, 10)) > #QME > fit2 <- qmedist(x, "norm2", probs=c(1/6, 1/4, 1/3, 1/2, 2/3), + start=list(poid=1/3, m1=4, s1=2, m2=8, s2=2), + lower=c(0, 0, 0, 0, 0), upper=c(1/2, Inf, Inf, Inf, Inf)) > > > > # (7) test error messages > # > > dnorm3 <- qnorm3 <- function(x, a) + "NA" > x <- rexp(nsample) > > #should get a one-line error > res <- qmedist(x, "norm3", start=list(a=1), probs=1/2) > #as in > attr(try(log("a"), silent=TRUE), "condition") > > > > > > try(qmedist(c(x1, "a"), "gamma", probs=1/2)) Error in qmedist(c(x1, "a"), "gamma", probs = 1/2) : data must be a numeric vector of length greater than 1 for non censored data or a dataframe with two columns named left and right and more than one line for censored data > try(qmedist(c(x1, NA), "gamma", probs=1/2)) Error in checkUncensoredNAInfNan(data) : data contain NA values. > try(qmedist(c(x1, Inf), "gamma", probs=1/2)) Error in checkUncensoredNAInfNan(data) : data contain Inf (infinite) values. > try(qmedist(c(x1, -Inf), "gamma", probs=1/2)) Error in checkUncensoredNAInfNan(data) : data contain Inf (infinite) values. > try(qmedist(c(x1, NaN), "gamma", probs=1/2)) Error in checkUncensoredNAInfNan(data) : data contain NaN (not a numeric) values. > > > # (8) weighted QME > # > n <- 1e6 > x <- rpois(nsample, 10) > xtab <- table(x) > xval <- sort(unique(x)) > f1 <- qmedist(x, "pois", start=list(lambda=mean(x)), lower=0, upper=100, probs=1/2) #, control=list(trace=1, REPORT=1) Warning message: In qmedist(x, "pois", start = list(lambda = mean(x)), lower = 0, : The BFGS method cannot be used with bounds without provided the gradient. The method is changed to L-BFGS-B. > f2 <- qmedist(xval, "pois", weights=xtab, start=list(lambda=mean(x)), probs=1/2) Warning message: In qmedist(xval, "pois", weights = xtab, start = list(lambda = mean(x)), : weights are not taken into account in the default initial values > > f1$estimate lambda 9.6 > f2$estimate #should be identical lambda 9.6 > > x <- rexp(nsample) > f3 <- qmedist(x, "exp", probs=1/2) > w4 <- c(rep(1, nsample/2), round(sqrt(1:(nsample/2)))) > f4 <- qmedist(x, "exp", weights=w4, probs=1/2) Warning message: In qmedist(x, "exp", weights = w4, probs = 1/2) : weights are not taken into account in the default initial values > f3$estimate rate 0.4809858 > f4$estimate rate 0.4860816 > > f3$loglik [1] -14.43196 > f4$loglik [1] -18.91757 > > median(x) [1] 1.441099 > median(tail(x, 50)) [1] 1.441099 > > > #try non integer weights > try(qmedist(x, "exp", weights=c(rep(1, n/2), sqrt(1:(n/2))), probs=1/2)) Error in qmedist(x, "exp", weights = c(rep(1, n/2), sqrt(1:(n/2))), probs = 1/2) : weights should be a vector of (strictly) positive integers > > # (9) test the component optim.message > x <- rnorm(nsample) > #change parameter to obtain unsuccessful convergence > qmedist(x, "norm", probs=1:2/3, control=list(maxit=2), start=list(mean=1e5, sd=1), optim.method="L-BFGS-B", lower=0) $estimate mean sd 0.7855744 1.7499993 $convergence [1] 1 $value [1] 0.008861144 $hessian mean sd mean 4.000000e+00 -3.903128e-12 sd -3.903128e-12 7.421040e-01 $optim.function [1] "optim" $optim.method [1] "L-BFGS-B" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 11 11 $optim.message [1] "NEW_X" $loglik [1] -17.25942 $probs [1] 0.3333333 0.6666667 > > # (10) test bounds > x <- rnorm(nsample) > qmedist(x, "norm", probs=1:2/3, optim.method="L-BFGS-B", lower=c(-Inf, 0)) #via optim $estimate mean sd 0.2474441 0.9306178 $convergence [1] 0 $value [1] 3.081488e-33 $hessian mean sd mean 4.000000e+00 -1.110672e-13 sd -1.110672e-13 7.421040e-01 $optim.function [1] "optim" $optim.method [1] "L-BFGS-B" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 6 6 $optim.message [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH" $loglik [1] -13.62491 $probs [1] 0.3333333 0.6666667 > qmedist(x, "norm", probs=1:2/3, optim.method="Nelder", lower=c(-Inf, 0)) #via constrOptim $estimate mean sd 0.2474440 0.9306188 $convergence [1] 0 $value [1] 4.042279e-13 $hessian NULL $optim.function [1] "constrOptim" $optim.method [1] "Nelder-Mead" $fix.arg NULL $fix.arg.fun NULL $weights NULL $counts function gradient 124 NA $optim.message NULL $loglik [1] -13.62491 $probs [1] 0.3333333 0.6666667 > > proc.time() user system elapsed 4.73 0.29 5.04