## Copyright (C) 2016 Marius Hofert, Ivan Kojadinovic, Martin Maechler, and Jun Yan ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software ## Foundation; either version 3 of the License, or (at your option) any later ## version. ## ## This program is distributed in the hope that it will be useful, but WITHOUT ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ## FOR A PARTICULAR PURPOSE. See the GNU General Public License for more ## details. ## ## You should have received a copy of the GNU General Public License along with ## this program; if not, see . ### Finite Mixtures of Copulas: mixCopula() ### ======================================== library(copula) isExplicit <- copula:::isExplicit (doExtras <- copula:::doExtras() && getRversion() >= "3.4") # so have withAutoprint(.) is32 <- .Machine$sizeof.pointer == 4 ## <- should work for Linux/MacOS/Windows isMac <- Sys.info()[["sysname"]] == "Darwin" isSun <- Sys.info()[["sysname"]] == "SunOS" mC <- mixCopula(list(gumbelCopula(2.5, dim=3), claytonCopula(pi, dim=3), tCopula(0.7, dim=3)), c(2,2,4)/8) mC stopifnot(dim(mC) == 3, inherits(mC, "mixCopula")) ## mix a Gumbel with a rotated Gumbel (with equal weights 1/2): mGG <- mixCopula(list(gumbelCopula(2), rotCopula(gumbelCopula(1.5)))) mGG # 4 parameters stopifnot(exprs = { dim(mGG) == 2 inherits(mGG, "mixCopula") is(mGG,"mixExplicitCopula") # ==> Jscore() works ==> var.mpl() all.equal( rho(mGG), 0.57886340158, tol=1e-10) all.equal(lambda(mGG), c(lower = 0.206299474016, upper = 0.292893218813), tol = 1e-10) }) RNGversion("3.5.0") # for sample() -- "biased" --> warning ---> *remove* in future! set.seed(17) uM <- rCopula( 600, mC) uGG <- rCopula(1000, mGG) ## Check dCopula(*, log= TRUE) ## --- this was __wrong__ for many days (not on CRAN) stopifnot( length(dCopula(uM[1,,drop=FALSE], mC, log=TRUE)) == 1,# was wrong all.equal(log(dCopula(uM, mC)), dCopula(uM, mC, log=TRUE), tol = 1e-12), all.equal(log(dCopula(uGG, mGG)), dCopula(uGG, mGG, log=TRUE), tol = 1e-12) ) (llGG1 <- loglikCopula(c(2.5, 1., w = c(2,4)/6), u=uGG, copula = mGG)) (llMC <- loglikCopula(c(2.5, pi, rho.1=0.7, df = 4, w = c(2,2,4)/8), u = uM, copula = mC)) ## discrepancy 32 bit <-> 64 bit --- still (after dMixCopula() bug fix): stopifnot( all.equal(llGG1, 177.452426, ## 32 bit (Windows, Linux(FC 24)): 188.0358 tol = if(isSun || isMac || is32) 0.08 else 7e-7), all.equal(llMC, 532.8757887, ## 32 bit: 551.8439 tol = if(isSun || isMac || is32) 0.05 else 7e-7) ) ## "free" weights ------------- optCtrl <- list(maxit = 1000, trace = TRUE) ## The real proof: using "arbitrary" initial parameters (not very close to truth): mC. <- setTheta(mC, c(1, 0.5, rho=0, df=7, w = c(1,1,1)/3)) if(doExtras) withAutoprint({ st0 <- system.time( f. <- fitCopula(mC, uM, optim.method = "BFGS", optim.control=list(maxit=999), traceOpt=TRUE)) ## converges, .. no var-cov ## (which would fail with Error in solve.default(Sigma.n, t(dlogcdtheta(copula, u) - S)) : ## system is computationally singular: reciprocal condition number = 3.88557e-17 st0 # was 4.4, now 9.3 sec (nb-mm4) coef(f.) ; logLik(f.) coef(f., orig=FALSE) # -> l-scale summary(f.) stopifnot(exprs = { all.equal(logLik(f.), structure(536.93419, nobs = 600L, df = 7L, class = "logLik"), tol = 8e-10) # 7.206e-11) }) ## using 'mC.' which "does not know" the true parameters (for 'start'); ## now works thanks to new getInitParam() "mixCopula" method : system.time( f.w <- fitCopula(mC., uM, optim.method = "BFGS", optim.control=list(maxit=999), traceOpt=TRUE)) ## param= 1.0, 0.5, 0.0, 7.0, 0.0, 0.0 => logL= 152.18517 ## param= 1.001, 0.500, 0.000, 7.000, 0.000, 0.000 => logL= 153.21439 ## param= 0.999, 0.500, 0.000, 7.000, 0.000, 0.000 => logL= -Inf ## Error in optim(start, logL, lower = lower, upper = upper, method = optim.method, : ## non-finite finite-difference value [1] ## Calls: system.time ... fitCopula -> .local -> -> fitCopula.ml -> optim ## MM: Note that 0.999 is not legal for a gumbelCopula (-> gives Error --> -Inf ) coef(f.w); logLik(f.w) summary(f.w) stopifnot(exprs = { all.equal( coef(f.), coef(f.w), tol=0.0006) # seen 0.000187 all.equal(logLik(f.), logLik(f.w), tol= 4e-8) # seen 1.89e-9 }) }) if(doExtras) withAutoprint({ stG <- system.time( fGG <- fitCopula(mGG, uGG, method = "ml", estimate.variance=TRUE, traceOpt=1)) stG # 3.7 (lynne, 2020) coef(fGG) ; logLik(fGG) stopifnot(exprs = { all.equal( ## dput(signif(*, 8)): c(m1.alpha = 2.0551924, m2.alpha = 1.5838548, w1 = 0.50994422, w2 = 0.49005578), coef(fGG), tol = 1e-7) all.equal(structure(256.799629, nobs = 1000L, df = 4L, class = "logLik"), logLik(fGG), tol = 8e-8) # 9.67 e-10) }) ## these two now work with "cheating" warning : summary(fGG) vcov(fGG) ## these now use 'l' aka 'lambda'-space : summary(fGG, orig=FALSE) (vcov(fGG, orig=FALSE) -> vGG) # "l-space" cov2cor(vGG) coef(fGG, orig=FALSE) # "l-space" }) if(doExtras) withAutoprint({ # slowish st1 <- system.time( ff <- fitCopula(mC, uM, optim.method = "Nelder-Mead", optim.control=optCtrl)) ## converges (vcov : Error in solve(..)... (rec.cond.number 4.783e-17) st1 # 11 sec (then lynne, 2020: 4.8'') logLik(ff) summary(ff) ## now using 'mC.' : ----- system.time( ff.w <- fitCopula(mC., uM, optim.method = "Nelder-Mead", optim.control=optCtrl)) coef(ff.w) ; logLik(ff.w) summary(ff.w) stopifnot(exprs = { all.equal( coef(ff), coef(ff.w), tol=0.008) # seen 0.00186 all.equal(logLik(ff), logLik(ff.w), tol= 4e-8) # seen 7.9e-9 }) }) if(doExtras) withAutoprint({ # slowish system.time( # 28 sec f2 <- fitCopula(mC, uM, optim.method = "L-BFGS-B", optim.control=optCtrl)) ## converges (vcov: Error in solve(..)... (rec.cond.number 3.691e-17) coef(f2) ; coef(f2, orig=FALSE); logLik(f2) summary(f2) ## now using 'mC.' : ----- system.time( f2.w <- fitCopula(mC., uM, optim.method = "Nelder-Mead", optim.control=optCtrl)) coef(f2.w) ; coef(f2.w, orig=FALSE) ; logLik(f2.w) summary(f2.w) stopifnot(exprs = { ## different, f2 was "poor" : all.equal( coef(f2), coef(f2.w), tol=0.004) # seen 0.00159 all.equal(coef(f2 , orig=FALSE), coef(f2.w, orig=FALSE), tol=0.004) # 0.001639 all.equal(logLik(f2), logLik(f2.w), tol=0.002) # seen 2.26e-7 }) }) ## === Partially fixed parameters -- the hard cases =================================== RNGversion("4.0.0") # back to normal (tX4 <- tCopula( 0.2, df = 5, df.fixed=TRUE)) (tn3 <- tCopula(-0.5, df = 3)) getTheta(tX4, attr = TRUE) # freeOnly = TRUE is default ## --> *not* showing df=5 as it is fixed (m3 <- mixCopula(list(normalCopula(0.4), tX4, tn3), w = (1:3)/6)) # -> shows 'm2.df := 5' as fixed ! (th. <- getTheta(m3, attr = TRUE))# ditto (th <- getTheta(m3, named= TRUE)) ##' an inverse function of which(.) : trueAt <- function(i, n) { r <- logical(n); r[i] <- TRUE; r } ## Functionality check of trueAt() : set.seed(17); summary(Ns <- rpois(1000,3)) table(Ns) ; M <- max(Ns) for(n in Ns) { i <- sort(sample.int(M, n)) stopifnot(identical(i, which(trueAt(i, M)))) }# takes ~ 0.05 sec stopifnot(exprs = { identical(th, structure(th., param.lowbnd=NULL, param.upbnd=NULL)) identical(names(th), c("m1.rho.1", "m2.rho.1", "m3.rho.1", "m3.df", paste0("w", 1:3))) identical(isFree(m3), !trueAt(3, n=8))# free everywhere but at [3] }) set.seed(47) Ut <- rCopula(2^12, m3) ## fixedParam(m3) <- trueAt(c(3, 8), n=8) stopifnot(exprs = { nParam(m3, freeOnly =FALSE) == 8 nParam(m3, freeOnly = TRUE) == 6 length(print( getTheta(m3, freeOnly=FALSE, named=TRUE))) == 8 length(print( getTheta(m3, freeOnly=TRUE, named=TRUE))) == 6 }) (iniP <- getIniParam(m3, Ut, default=NA)) # matching the freeOnly case: stopifnot(exprs = { identical(iniP, getIniParam(m3, Ut)) length(iniP) == 6 iniP == c(rep(iniP[1], 3), 3, 1/4, 1/4) }) ## does the inital value possibly "work" ? (llIni <- loglikCopula(u=Ut, copula=setTheta(m3, iniP, na.ok=FALSE))) # 205.0475; yes, good, ... if(doExtras) withAutoprint({ ## try with the true parameters of 'm3' system.time( # 28 sec f38 <- fitCopula(m3, Ut, traceOpt=TRUE) ) coef(f38) ; logLik(f38) summary(f38) ## More realistically, starting from our simple iniP[] (instead from the true \theta !!) system.time( ffI <- fitCopula(m3, Ut, start = iniP, optim.method = "BFGS", optim.control=list(maxit=999), traceOpt=TRUE) ) coef(ffI) ; logLik(ffI) summary(ffI) }) fixedParam(m3) <- trueAt(c(3, 7), n=8) # (fixed wgt in the middle) ## works with fixed at c(3,6) ... (iniP37 <- getIniParam(m3, Ut)) if(doExtras) withAutoprint({ # *much* slower (less parameters, why ??) system.time( # 28 sec f37 <- fitCopula(m3, Ut, start=iniP37, traceOpt=TRUE) ) coef(f37) ; logLik(f37) summary(f37) }) ## setting "arbitrary" (pretty wrong) initial values : mm <- setTheta(m3, c(0, 0, 0, df=13, w1 = .6, w2 = .4)) ## (not yet ... getIniParam() is more relevant !)