R Under development (unstable) (2024-08-15 r87022 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64
R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.
R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.
Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.
> ## Copyright (C) 2016 Marius Hofert, Ivan Kojadinovic, Martin Maechler, and Jun Yan
> ##
> ## This program is free software; you can redistribute it and/or modify it under
> ## the terms of the GNU General Public License as published by the Free Software
> ## Foundation; either version 3 of the License, or (at your option) any later
> ## version.
> ##
> ## This program is distributed in the hope that it will be useful, but WITHOUT
> ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> ## FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
> ## details.
> ##
> ## You should have received a copy of the GNU General Public License along with
> ## this program; if not, see .
>
> ### Finite Mixtures of Copulas: mixCopula()
> ### ========================================
>
> library(copula)
> isExplicit <- copula:::isExplicit
> (doExtras <- copula:::doExtras() && getRversion() >= "3.4") # so have withAutoprint(.)
[1] FALSE
>
> is32 <- .Machine$sizeof.pointer == 4 ## <- should work for Linux/MacOS/Windows
> isMac <- Sys.info()[["sysname"]] == "Darwin"
> isSun <- Sys.info()[["sysname"]] == "SunOS"
>
> mC <- mixCopula(list(gumbelCopula(2.5, dim=3),
+ claytonCopula(pi, dim=3),
+ tCopula(0.7, dim=3)),
+ c(2,2,4)/8)
> mC
mixCopula from 3 components
(Gmbc, Clyc, t-cp) with weights:
(0.25, 0.25, 0.50)
Dimension: 3
Parameters:
m1.alpha = 2.500000
m2.alpha = 3.141593
m3.rho.1 = 0.700000
m3.df = 4.000000
w1 = 0.250000
w2 = 0.250000
w3 = 0.500000
> stopifnot(dim(mC) == 3, inherits(mC, "mixCopula"))
>
> ## mix a Gumbel with a rotated Gumbel (with equal weights 1/2):
> mGG <- mixCopula(list(gumbelCopula(2), rotCopula(gumbelCopula(1.5))))
> mGG # 4 parameters
mixCopula from 2 components
(Gmbc , RccfGc) with weights:
(0.5, 0.5)
Dimension: 2
Parameters:
m1.alpha = 2.0
m2.alpha = 1.5
w1 = 0.5
w2 = 0.5
> stopifnot(exprs = {
+ dim(mGG) == 2
+ inherits(mGG, "mixCopula")
+ is(mGG,"mixExplicitCopula") # ==> Jscore() works ==> var.mpl()
+ all.equal( rho(mGG), 0.57886340158, tol=1e-10)
+ all.equal(lambda(mGG), c(lower = 0.206299474016,
+ upper = 0.292893218813), tol = 1e-10)
+ })
>
>
> RNGversion("3.5.0") # for sample() -- "biased" --> warning ---> *remove* in future!
Warning message:
In RNGkind("Mersenne-Twister", "Inversion", "Rounding") :
non-uniform 'Rounding' sampler used
> set.seed(17)
> uM <- rCopula( 600, mC)
> uGG <- rCopula(1000, mGG)
> ## Check dCopula(*, log= TRUE) ## --- this was __wrong__ for many days (not on CRAN)
> stopifnot(
+ length(dCopula(uM[1,,drop=FALSE], mC, log=TRUE)) == 1,# was wrong
+ all.equal(log(dCopula(uM, mC)),
+ dCopula(uM, mC, log=TRUE), tol = 1e-12),
+ all.equal(log(dCopula(uGG, mGG)),
+ dCopula(uGG, mGG, log=TRUE), tol = 1e-12)
+ )
> (llGG1 <- loglikCopula(c(2.5, 1., w = c(2,4)/6), u=uGG, copula = mGG))
[1] 177.4524
> (llMC <- loglikCopula(c(2.5, pi, rho.1=0.7, df = 4, w = c(2,2,4)/8), u = uM, copula = mC))
[1] 532.8758
> ## discrepancy 32 bit <-> 64 bit --- still (after dMixCopula() bug fix):
> stopifnot(
+ all.equal(llGG1, 177.452426, ## 32 bit (Windows, Linux(FC 24)): 188.0358
+ tol = if(isSun || isMac || is32) 0.08 else 7e-7),
+ all.equal(llMC, 532.8757887, ## 32 bit: 551.8439
+ tol = if(isSun || isMac || is32) 0.05 else 7e-7)
+ )
>
> ## "free" weights -------------
>
> optCtrl <- list(maxit = 1000, trace = TRUE)
> ## The real proof: using "arbitrary" initial parameters (not very close to truth):
> mC. <- setTheta(mC, c(1, 0.5, rho=0, df=7, w = c(1,1,1)/3))
>
> if(doExtras) withAutoprint({
+ st0 <- system.time(
+ f. <- fitCopula(mC, uM, optim.method = "BFGS", optim.control=list(maxit=999), traceOpt=TRUE))
+ ## converges, .. no var-cov
+ ## (which would fail with Error in solve.default(Sigma.n, t(dlogcdtheta(copula, u) - S)) :
+ ## system is computationally singular: reciprocal condition number = 3.88557e-17
+ st0 # was 4.4, now 9.3 sec (nb-mm4)
+ coef(f.) ; logLik(f.)
+ coef(f., orig=FALSE) # -> l-scale
+ summary(f.)
+ stopifnot(exprs = {
+ all.equal(logLik(f.), structure(536.93419, nobs = 600L, df = 7L, class = "logLik"),
+ tol = 8e-10) # 7.206e-11)
+ })
+ ## using 'mC.' which "does not know" the true parameters (for 'start');
+ ## now works thanks to new getInitParam() "mixCopula" method :
+ system.time(
+ f.w <- fitCopula(mC., uM, optim.method = "BFGS", optim.control=list(maxit=999), traceOpt=TRUE))
+ ## param= 1.0, 0.5, 0.0, 7.0, 0.0, 0.0 => logL= 152.18517
+ ## param= 1.001, 0.500, 0.000, 7.000, 0.000, 0.000 => logL= 153.21439
+ ## param= 0.999, 0.500, 0.000, 7.000, 0.000, 0.000 => logL= -Inf
+ ## Error in optim(start, logL, lower = lower, upper = upper, method = optim.method, :
+ ## non-finite finite-difference value [1]
+ ## Calls: system.time ... fitCopula -> .local -> -> fitCopula.ml -> optim
+
+ ## MM: Note that 0.999 is not legal for a gumbelCopula (-> gives Error --> -Inf )
+ coef(f.w); logLik(f.w)
+ summary(f.w)
+ stopifnot(exprs = {
+ all.equal( coef(f.), coef(f.w), tol=0.0006) # seen 0.000187
+ all.equal(logLik(f.), logLik(f.w), tol= 4e-8) # seen 1.89e-9
+ })
+ })
>
> if(doExtras) withAutoprint({
+ stG <- system.time(
+ fGG <- fitCopula(mGG, uGG, method = "ml", estimate.variance=TRUE, traceOpt=1))
+ stG # 3.7 (lynne, 2020)
+ coef(fGG) ; logLik(fGG)
+ stopifnot(exprs = {
+ all.equal( ## dput(signif(*, 8)):
+ c(m1.alpha = 2.0551924, m2.alpha = 1.5838548, w1 = 0.50994422, w2 = 0.49005578),
+ coef(fGG), tol = 1e-7)
+ all.equal(structure(256.799629, nobs = 1000L, df = 4L, class = "logLik"),
+ logLik(fGG), tol = 8e-8) # 9.67 e-10)
+ })
+ ## these two now work with "cheating" warning :
+ summary(fGG)
+ vcov(fGG)
+ ## these now use 'l' aka 'lambda'-space :
+ summary(fGG, orig=FALSE)
+ (vcov(fGG, orig=FALSE) -> vGG) # "l-space"
+ cov2cor(vGG)
+ coef(fGG, orig=FALSE) # "l-space"
+ })
>
>
> if(doExtras) withAutoprint({ # slowish
+ st1 <- system.time(
+ ff <- fitCopula(mC, uM, optim.method = "Nelder-Mead", optim.control=optCtrl))
+ ## converges (vcov : Error in solve(..)... (rec.cond.number 4.783e-17)
+ st1 # 11 sec (then lynne, 2020: 4.8'')
+ logLik(ff)
+ summary(ff)
+ ## now using 'mC.' : -----
+ system.time(
+ ff.w <- fitCopula(mC., uM, optim.method = "Nelder-Mead", optim.control=optCtrl))
+ coef(ff.w) ; logLik(ff.w)
+ summary(ff.w)
+ stopifnot(exprs = {
+ all.equal( coef(ff), coef(ff.w), tol=0.008) # seen 0.00186
+ all.equal(logLik(ff), logLik(ff.w), tol= 4e-8) # seen 7.9e-9
+ })
+ })
>
>
> if(doExtras) withAutoprint({ # slowish
+ system.time( # 28 sec
+ f2 <- fitCopula(mC, uM, optim.method = "L-BFGS-B", optim.control=optCtrl))
+ ## converges (vcov: Error in solve(..)... (rec.cond.number 3.691e-17)
+ coef(f2) ; coef(f2, orig=FALSE); logLik(f2)
+ summary(f2)
+
+ ## now using 'mC.' : -----
+ system.time(
+ f2.w <- fitCopula(mC., uM, optim.method = "Nelder-Mead", optim.control=optCtrl))
+ coef(f2.w) ; coef(f2.w, orig=FALSE) ; logLik(f2.w)
+ summary(f2.w)
+ stopifnot(exprs = { ## different, f2 was "poor" :
+ all.equal( coef(f2), coef(f2.w), tol=0.004) # seen 0.00159
+ all.equal(coef(f2 , orig=FALSE),
+ coef(f2.w, orig=FALSE), tol=0.004) # 0.001639
+ all.equal(logLik(f2), logLik(f2.w), tol=0.002) # seen 2.26e-7
+ })
+ })
>
>
> ## === Partially fixed parameters -- the hard cases ===================================
>
> RNGversion("4.0.0") # back to normal
>
> (tX4 <- tCopula( 0.2, df = 5, df.fixed=TRUE))
t-copula, dim. d = 2
Dimension: 2
Parameters (partly fixed, see ':='):
rho.1 = 0.2
df := 5.0
> (tn3 <- tCopula(-0.5, df = 3))
t-copula, dim. d = 2
Dimension: 2
Parameters:
rho.1 = -0.5
df = 3.0
> getTheta(tX4, attr = TRUE) # freeOnly = TRUE is default
rho.1
0.2
attr(,"param.lowbnd")
[1] -1
attr(,"param.upbnd")
[1] 1
> ## --> *not* showing df=5 as it is fixed
> (m3 <- mixCopula(list(normalCopula(0.4), tX4, tn3), w = (1:3)/6))
mixCopula from 3 components
(Nrmc, t-cp, t-cp) with weights:
(0.1666667, 0.3333333, 0.5000000)
Dimension: 2
Parameters (partly fixed, see ':='):
m1.rho.1 = 0.4000000
m2.rho.1 = 0.2000000
m2.df := 5.0000000
m3.rho.1 = -0.5000000
m3.df = 3.0000000
w1 = 0.1666667
w2 = 0.3333333
w3 = 0.5000000
> # -> shows 'm2.df := 5' as fixed !
> (th. <- getTheta(m3, attr = TRUE))# ditto
m1.rho.1 m2.rho.1 m3.rho.1 m3.df w1 w2 w3
0.4000000 0.2000000 -0.5000000 3.0000000 0.1666667 0.3333333 0.5000000
attr(,"param.lowbnd")
[1] -1.00 -1.00 -1.00 0.01 0.00 0.00 0.00
attr(,"param.upbnd")
[1] 1 1 1 Inf 1 1 1
> (th <- getTheta(m3, named= TRUE))
m1.rho.1 m2.rho.1 m3.rho.1 m3.df w1 w2 w3
0.4000000 0.2000000 -0.5000000 3.0000000 0.1666667 0.3333333 0.5000000
> ##' an inverse function of which(.) :
> trueAt <- function(i, n) { r <- logical(n); r[i] <- TRUE; r }
> ## Functionality check of trueAt() :
> set.seed(17); summary(Ns <- rpois(1000,3))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.000 2.000 3.000 2.949 4.000 10.000
> table(Ns) ; M <- max(Ns)
Ns
0 1 2 3 4 5 6 7 8 9 10
57 166 201 230 166 103 44 21 9 1 2
> for(n in Ns) {
+ i <- sort(sample.int(M, n))
+ stopifnot(identical(i, which(trueAt(i, M))))
+ }# takes ~ 0.05 sec
>
> stopifnot(exprs = {
+ identical(th, structure(th., param.lowbnd=NULL, param.upbnd=NULL))
+ identical(names(th), c("m1.rho.1", "m2.rho.1", "m3.rho.1", "m3.df",
+ paste0("w", 1:3)))
+ identical(isFree(m3), !trueAt(3, n=8))# free everywhere but at [3]
+ })
>
> set.seed(47)
> Ut <- rCopula(2^12, m3)
> ##
> fixedParam(m3) <- trueAt(c(3, 8), n=8)
> stopifnot(exprs = {
+ nParam(m3, freeOnly =FALSE) == 8
+ nParam(m3, freeOnly = TRUE) == 6
+ length(print( getTheta(m3, freeOnly=FALSE, named=TRUE))) == 8
+ length(print( getTheta(m3, freeOnly=TRUE, named=TRUE))) == 6
+ })
m1.rho.1 m2.rho.1 m2.df m3.rho.1 m3.df w1 w2
0.4000000 0.2000000 5.0000000 -0.5000000 3.0000000 0.1666667 0.3333333
w3
0.5000000
m1.rho.1 m2.rho.1 m3.rho.1 m3.df w1 w2
0.4000000 0.2000000 -0.5000000 3.0000000 0.1666667 0.3333333
> (iniP <- getIniParam(m3, Ut, default=NA)) # matching the freeOnly case:
[1] -0.1120962 -0.1120962 -0.1120962 3.0000000 0.2500000 0.2500000
> stopifnot(exprs = {
+ identical(iniP, getIniParam(m3, Ut))
+ length(iniP) == 6
+ iniP == c(rep(iniP[1], 3), 3, 1/4, 1/4)
+ })
> ## does the inital value possibly "work" ?
> (llIni <- loglikCopula(u=Ut, copula=setTheta(m3, iniP, na.ok=FALSE))) # 205.0475; yes, good, ...
[1] 205.0475
> if(doExtras) withAutoprint({ ## try with the true parameters of 'm3'
+ system.time( # 28 sec
+ f38 <- fitCopula(m3, Ut, traceOpt=TRUE) )
+ coef(f38) ; logLik(f38)
+ summary(f38)
+
+ ## More realistically, starting from our simple iniP[] (instead from the true \theta !!)
+ system.time(
+ ffI <- fitCopula(m3, Ut, start = iniP, optim.method = "BFGS",
+ optim.control=list(maxit=999), traceOpt=TRUE) )
+ coef(ffI) ; logLik(ffI)
+ summary(ffI)
+ })
>
>
> fixedParam(m3) <- trueAt(c(3, 7), n=8) # (fixed wgt in the middle)
> ## works with fixed at c(3,6) ...
> (iniP37 <- getIniParam(m3, Ut))
[1] -0.1120962 -0.1120962 -0.1120962 3.0000000 0.3333333 0.3333333
> if(doExtras) withAutoprint({ # *much* slower (less parameters, why ??)
+ system.time( # 28 sec
+ f37 <- fitCopula(m3, Ut, start=iniP37, traceOpt=TRUE) )
+ coef(f37) ; logLik(f37)
+ summary(f37)
+ })
>
> ## setting "arbitrary" (pretty wrong) initial values :
> mm <- setTheta(m3, c(0, 0, 0, df=13, w1 = .6, w2 = .4))
> ## (not yet ... getIniParam() is more relevant !)
>
> proc.time()
user system elapsed
1.70 0.17 1.87