R Under development (unstable) (2024-03-20 r86162 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ### Various tests for constrained optimization > ### > options(digits=4) > > ### -------------------- Normal mixture likelihood, no additional parameters -------------------- > ### param = c(rho, mean1, mean2) > ### > ### X = N(mean1) w/Pr rho > ### X = N(mean2) w/Pr 1-rho > ### > logLikMix <- function(param) { + ## a single likelihood value + rho <- param[1] + if(rho < 0 || rho > 1) + return(NA) + mu1 <- param[2] + mu2 <- param[3] + ll <- log(rho*dnorm(x - mu1) + (1 - rho)*dnorm(x - mu2)) + ll <- sum(ll) + ll + } > > gradLikMix <- function(param) { + rho <- param[1] + if(rho < 0 || rho > 1) + return(NA) + mu1 <- param[2] + mu2 <- param[3] + f1 <- dnorm(x - mu1) + f2 <- dnorm(x - mu2) + L <- rho*f1 + (1 - rho)*f2 + g <- matrix(0, length(x), 3) + g[,1] <- (f1 - f2)/L + g[,2] <- rho*(x - mu1)*f1/L + g[,3] <- (1 - rho)*(x - mu2)*f2/L + colSums(g) + g + } > > hessLikMix <- function(param) { + rho <- param[1] + if(rho < 0 || rho > 1) + return(NA) + mu1 <- param[2] + mu2 <- param[3] + f1 <- dnorm(x - mu1) + f2 <- dnorm(x - mu2) + L <- rho*f1 + (1 - rho)*f2 + dldrho <- (f1 - f2)/L + dldmu1 <- rho*(x - mu1)*f1/L + dldmu2 <- (1 - rho)*(x - mu2)*f2/L + h <- matrix(0, 3, 3) + h[1,1] <- -sum(dldrho*(f1 - f2)/L) + h[2,1] <- h[1,2] <- sum((x - mu1)*f1/L - dldmu1*dldrho) + h[3,1] <- h[1,3] <- sum(-(x - mu2)*f2/L - dldmu2*dldrho) + h[2,2] <- sum(rho*(-f1 + (x - mu1)^2*f1)/L - dldmu1^2) + h[2,3] <- h[3,2] <- -sum(dldmu1*dldmu2) + h[3,3] <- sum((1 - rho)*(-f2 + (x - mu2)^2*f2)/L - dldmu2^2) + h + } > > logLikMixInd <- function(param) { + ## individual obs-wise likelihood values + rho <- param[1] + if(rho < 0 || rho > 1) + return(NA) + mu1 <- param[2] + mu2 <- param[3] + ll <- log(rho*dnorm(x - mu1) + (1 - rho)*dnorm(x - mu2)) + ll <- sum(ll) + ll + } > > gradLikMixInd <- function(param) { + rho <- param[1] + if(rho < 0 || rho > 1) + return(NA) + mu1 <- param[2] + mu2 <- param[3] + f1 <- dnorm(x - mu1) + f2 <- dnorm(x - mu2) + L <- rho*f1 + (1 - rho)*f2 + g <- matrix(0, length(x), 3) + g[,1] <- (f1 - f2)/L + g[,2] <- rho*(x - mu1)*f1/L + g[,3] <- (1 - rho)*(x - mu2)*f2/L + colSums(g) + g + } > > ### -------------------------- > library(maxLik) Loading required package: miscTools Please cite the 'maxLik' package as: Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1. If you have questions, suggestions, or comments regarding the 'maxLik' package, please use its GitHub page and the issues there: https://github.com/arne-henningsen/maxLik > ## mixed normal > set.seed(1) > N <- 100 > x <- c(rnorm(N, mean=-1), rnorm(N, mean=1)) > > ## ---------- INEQUALITY CONSTRAINTS ----------- > ## First test inequality constraints, numeric/analytical gradients > ## Inequality constraints: rho < 0.5, mu1 < -0.1, mu2 > 0.1 > A <- matrix(c(-1, 0, 0, + 0, -1, 0, + 0, 0, 1), 3, 3, byrow=TRUE) > B <- c(0.5, 0.1, 0.1) > start <- c(0.4, -1, 1) # help to decide which one is positive/negative > ineqCon <- list(ineqA=A, ineqB=B) > estimates <- c(0.5000, -0.7750, 0.8767) > ## analytic gradient > cat("Inequality constraints, analytic gradient & Hessian\n") Inequality constraints, analytic gradient & Hessian > a <- maxLik(logLikMix, grad=gradLikMix, hess=hessLikMix, + start=start, + constraints=ineqCon) > all.equal(coef(a), estimates, tolerance=0.01) [1] TRUE > # TRUE: relative tolerance 0.045 > ## No analytic gradient > cat("Inequality constraints, numeric gradient & Hessian\n") Inequality constraints, numeric gradient & Hessian > a <- maxLik(logLikMix, + start=start, + constraints=ineqCon) > all.equal(coef(a), estimates, tolerance=0.01) [1] TRUE > # should be close to the true values, but N is too small > ## NR method with inequality constraints > try( maxLik(logLikMix, start = start, constraints = ineqCon, method = "NR" ) ) Error in maxRoutine(fn = logLik, grad = grad, hess = hess, start = start, : Inequality constraints not implemented for maxNR > # Error in maxRoutine(fn = logLik, grad = grad, hess = hess, start = start, : > # Inequality constraints not implemented for maxNR > > ## BHHH method with inequality constraints > try( maxLik(logLikMix, start = start, constraints = ineqCon, method = "BHHH" ) ) Error in maxNR(fn = fn, grad = grad, hess = hess, start = start, finalHessian = finalHessian, : Inequality constraints not implemented for maxNR > # Error in maxNR(fn = fn, grad = grad, hess = hess, start = start, finalHessian = finalHessian, : > # Inequality constraints not implemented for maxNR > > ## ---------- EQUALITY CONSTRAINTS ----------------- > cat("Test for equality constraints mu1 + 2*mu2 = 0\n") Test for equality constraints mu1 + 2*mu2 = 0 > A <- matrix(c(0, 1, 2), 1, 3) > B <- 0 > eqCon <- list( eqA = A, eqB = B ) > estimates <- c(0.3425, -1.0996, 0.5498) > # estimated values, given x > ## default, numeric gradient > mlEq <- maxLik(logLikMix, start = start, constraints = eqCon, tol=0) > # only rely on gradient stopping condition > all.equal(coef(mlEq), estimates, tolerance=0.01, scale=1) [1] TRUE > ## default, individual likelihood > mlEqInd <- maxLik(logLikMixInd, start = start, constraints = eqCon, tol=0) > # only rely on gradient stopping condition > all.equal(coef(mlEq), coef(mlEqInd), tol=1e-4) [1] TRUE > ## default, analytic gradient > mlEqG <- maxLik(logLikMix, grad=gradLikMix, + start = start, constraints = eqCon ) > all.equal(coef(mlEq), coef(mlEqG), tolerance=1e-4) [1] TRUE > ## default, analytic gradient, individual likelihood > mlEqGInd <- maxLik(logLikMixInd, grad=gradLikMixInd, + start = start, constraints = eqCon ) > all.equal(coef(mlEqG), coef(mlEqGInd), tolerance=1e-4) [1] TRUE > ## default, analytic Hessian > mlEqH <- maxLik(logLikMix, grad=gradLikMix, hess=hessLikMix, + start=start, + constraints=eqCon) > all.equal(coef(mlEqG), coef(mlEqH), tolerance=1e-2) [1] TRUE > > > ## BFGS, numeric gradient > eqBFGS <- maxLik(logLikMix, + start=start, method="bfgs", + constraints=eqCon, + SUMTRho0=1) > all.equal(coef(eqBFGS), estimates, tolerance=0.01, scale=1) [1] TRUE > > ## BHHH, analytic gradient (numeric does not converge?) > eqBHHH <- maxLik(logLikMix, gradLikMix, + start=start, method="bhhh", + constraints=eqCon, + SUMTRho0=1) > all.equal(coef(eqBFGS), coef(eqBHHH), tol=0.01) [1] TRUE > > > ### ------------------ Now test additional parameters for the function ---- > ### similar mixture as above but rho is give as an extra parameter > ### > logLikMix2 <- function(param, rho) { + mu1 <- param[1] + mu2 <- param[2] + ll <- log(rho*dnorm(x - mu1) + (1 - rho)*dnorm(x - mu2)) + # ll <- sum(ll) + ll + } > > gradLikMix2 <- function(param, rho) { + mu1 <- param[1] + mu2 <- param[2] + f1 <- dnorm(x - mu1) + f2 <- dnorm(x - mu2) + L <- rho*f1 + (1 - rho)*f2 + g <- matrix(0, length(x), 2) + g[,1] <- rho*(x - mu1)*f1/L + g[,2] <- (1 - rho)*(x - mu2)*f2/L + # colSums(g) + g + } > > hessLikMix2 <- function(param, rho) { + mu1 <- param[1] + mu2 <- param[2] + f1 <- dnorm(x - mu1) + f2 <- dnorm(x - mu2) + L <- rho*f1 + (1 - rho)*f2 + dldrho <- (f1 - f2)/L + dldmu1 <- rho*(x - mu1)*f1/L + dldmu2 <- (1 - rho)*(x - mu2)*f2/L + h <- matrix(0, 2, 2) + h[1,1] <- sum(rho*(-f1 + (x - mu1)^2*f1)/L - dldmu1^2) + h[1,2] <- h[2,1] <- -sum(dldmu1*dldmu2) + h[2,2] <- sum((1 - rho)*(-f2 + (x - mu2)^2*f2)/L - dldmu2^2) + h + } > > ## ---------- Equality constraints & extra parameters ------------ > A <- matrix(c(1, 2), 1, 2) > B <- 0 > start <- c(0, 1) > ## We run only a few iterations as we want to test correct handling > ## of parameters, not the final value. We also avoid any > ## debug information > iterlim <- 3 > estimates <- c(-0.9986, 0.4993) > cat("Test for extra parameters for the function\n") Test for extra parameters for the function > ## NR, numeric gradient > cat("Newton-Raphson, numeric gradient\n") Newton-Raphson, numeric gradient > a <- maxLik(logLikMix2, + start=start, method="nr", + constraints=list(eqA=A, eqB=B), + iterlim=iterlim, SUMTRho0=1, rho=0.5) > all.equal(coef(a), estimates, tol=0.01) [1] TRUE > ## NR, numeric hessian > a <- maxLik(logLikMix2, gradLikMix2, + start=start, method="nr", + constraints=list(eqA=A, eqB=B), + iterlim=iterlim, SUMTRho0=1, rho=0.5) > all.equal(coef(a), estimates, tol=0.01) [1] TRUE > ## nr, analytic hessian > a <- maxLik(logLikMix2, gradLikMix2, hessLikMix2, + start=start, method="nr", + constraints=list(eqA=A, eqB=B), + iterlim=iterlim, SUMTRho0=1, rho=0.5) > all.equal(coef(a), estimates, tol=0.01) [1] TRUE > ## BHHH > cat("BHHH, analytic gradient, numeric Hessian\n") BHHH, analytic gradient, numeric Hessian > a <- maxLik(logLikMix2, gradLikMix2, + start=start, method="bhhh", + constraints=list(eqA=A, eqB=B), + iterlim=iterlim, SUMTRho0=1, rho=0.5) > all.equal(coef(a), estimates, tol=0.01) [1] TRUE > ## BHHH, analytic > a <- maxLik(logLikMix2, gradLikMix2, + start=start, method="bhhh", + constraints=list(eqA=A, eqB=B), + iterlim=iterlim, SUMTRho0=1, rho=0.5) > all.equal(coef(a), estimates, tol=0.01) [1] TRUE > ## bfgs, no analytic gradient > a <- maxLik(logLikMix2, + start=start, method="bfgs", + constraints=list(eqA=A, eqB=B), + iterlim=10, SUMTRho0=1, rho=0.5) > all.equal(coef(a), estimates, tol=0.01) [1] TRUE > ## bfgs, analytic gradient > a <- maxLik(logLikMix2, gradLikMix2, + start=start, method="bfgs", + constraints=list(eqA=A, eqB=B), + iterlim=10, SUMTRho0=1, rho=0.5) > all.equal(coef(a), estimates, tol=0.01) [1] TRUE > ## SANN, analytic gradient > a <- maxLik(logLikMix2, gradLikMix2, + start=start, method="SANN", + constraints=list(eqA=A, eqB=B), + iterlim=1000, SUMTRho0=1, rho=0.5) > all.equal(coef(a), estimates, tol=0.1) [1] TRUE > ## NM, numeric > a <- maxLik(logLikMix2, + start=start, method="nm", + constraints=list(eqA=A, eqB=B), + iterlim=100, + # use more iters for NM + SUMTRho0=1, rho=0.5) > all.equal(coef(a), estimates, tol=0.01) [1] TRUE > > ## -------------------- NR, multiple constraints -------------------- > f <- function(theta) exp(-theta %*% theta) > # test quadratic function > ## constraints: > ## theta1 + theta3 = 1 > ## theta1 + theta2 = 1 > A <- matrix(c(1, 0, 1, + 1, 1, 0), 2, 3, byrow=TRUE) > B <- c(-1, -1) > cat("NR, multiple constraints\n") NR, multiple constraints > a <- maxNR(f, start=c(1,1.1,2), constraints=list(eqA=A, eqB=B)) > theta <- coef(a) > all.equal(c(theta[1] + theta[3], theta[1] + theta[2]), c(1,1), tolerance=1e-4) [1] TRUE > ## Error handling for equality constraints > A <- matrix(c(1, 1), 1, 2) > B <- -1 > cat("Error handling: ncol(A) != lengths(start)\n") Error handling: ncol(A) != lengths(start) > try(a <- maxNR(f, start=c(1, 2, 3), constraints=list(eqA=A, eqB=B))) Error in sumt(fn = function (theta) : Equality constraint matrix A must have the same number of columns as the parameter length (currently 2 and 3) > # ncol(A) != length(start) > A <- matrix(c(1, 1), 1, 2) > B <- c(-1, 2) > try(a <- maxNR(f, start=c(1, 2), constraints=list(eqA=A, eqB=B))) Error in sumt(fn = function (theta) : Equality constraint matrix A must have the same number of rows as the matrix B (currently 1 and 2) > # nrow(A) != nrow(B) > ## > ## -------------- inequality constraints & extra paramters ---------------- > ## > ## mu1 < 1 > ## mu2 > -1 > A <- matrix(c(-1, 0, + 0, 1), 2,2, byrow=TRUE) > B <- c(1,1) > start <- c(0.8, 0.9) > estimates <- c(-0.7748, 0.8767) > ## > inEGrad <- maxLik(logLikMix2, gradLikMix2, + start=start, method="bfgs", + constraints=list(ineqA=A, ineqB=B), + rho=0.5) > all.equal(coef(inEGrad), estimates, tol=0.01) [1] TRUE > ## > inE <- maxLik(logLikMix2, + start=start, method="bfgs", + constraints=list(ineqA=A, ineqB=B), + rho=0.5) > all.equal(coef(inEGrad), coef(inE), tol=1e-4) [1] TRUE > ## > inENM <- maxLik(logLikMix2, gradLikMix2, + start=start, method="nm", + constraints=list(ineqA=A, ineqB=B), + rho=0.5) > all.equal(coef(inEGrad), coef(inENM), tol=1e-3) [1] TRUE > # this is further off than gradient-based methods > ## ---------- test vector B for inequality -------------- > ## mu1 < 1 > ## mu2 > 2 > A <- matrix(c(-1, 0, + 0, 1), 2,2, byrow=TRUE) > B1 <- c(1,-2) > a <- maxLik(logLikMix2, gradLikMix2, + start=c(0.5, 2.5), method="bfgs", + constraints=list(ineqA=A, ineqB=B1), + rho=0.5) > theta <- coef(a) > all.equal(c(theta[1] < 1, theta[2] > 2), c(TRUE, TRUE)) [1] TRUE > # components should be larger than > # (-1, -2) > > ## > ## ---- ERROR HANDLING: insert wrong A and B forms ---- > ## > A2 <- c(-1, 0, 0, 1) > try(maxLik(logLikMix2, gradLikMix2, + start=start, method="bfgs", + constraints=list(ineqA=A2, ineqB=B), + print.level=1, rho=0.5) + ) Error in maxOptim(fn = fn, grad = grad, hess = hess, start = start, method = "BFGS", : Inequality constraint A must be a matrix Current dimension > # should explain that matrix needed > A2 <- matrix(c(-1, 0, 0, 1), 1, 4) > try(maxLik(logLikMix2, gradLikMix2, + start=start, method="bfgs", + constraints=list(ineqA=A2, ineqB=B), + print.level=1, rho=0.5) + ) Error in maxOptim(fn = fn, grad = grad, hess = hess, start = start, method = "BFGS", : Inequality constraint A must have the same number of columns as length of the parameter. Currently 4 and 2. > # should explain that wrong matrix > # dimension > B2 <- 1:3 > try(maxLik(logLikMix2, gradLikMix2, + start=start, method="bfgs", + constraints=list(ineqA=A, ineqB=B2), + print.level=1, rho=0.5) + ) Error in maxOptim(fn = fn, grad = grad, hess = hess, start = start, method = "BFGS", : Inequality constraints A and B suggest different number of constraints: 2 and 3 > # A & B do not match > cat("A & B do not match\n") A & B do not match > B2 <- matrix(1,2,2) > try(maxLik(logLikMix2, gradLikMix2, + start=start, method="bfgs", + constraints=list(ineqA=A, ineqB=B2), + print.level=1, rho=0.5) + ) Error in maxOptim(fn = fn, grad = grad, hess = hess, start = start, method = "BFGS", : Inequality constraint B must be a vector (or Nx1 matrix). Currently 2 columns > # B must be a vector > > ## ---- fixed parameters with constrained optimization ----- > ## Thanks to Bob Loos for finding this error. > ## Optimize 3D hat with one parameter fixed (== 2D hat). > ## Add an equality constraint on that > cat("Constraints + fixed parameters\n") Constraints + fixed parameters > hat3 <- function(param) { + ## Hat function. Hessian negative definite if sqrt(x^2 + y^2) < 0.5 + x <- param[1] + y <- param[2] + z <- param[3] + exp(-x^2-y^2-z^2) + } > sv <- c(1,1,1) > ## constraints: x + y + z >= 2.5 > A <- matrix(c(x=1,y=1,z=1), 1, 3) > B <- -2.5 > constraints <- list(ineqA=A, ineqB=B) > res <- maxBFGS(hat3, start=sv, constraints=constraints, fixed=3, + iterlim=3) > all.equal(coef(res), c(0.770, 0.770, 1), tol=0.01) [1] TRUE > > proc.time() user system elapsed 2.96 0.10 3.06