## Copyright (C) 2012 Marius Hofert, Ivan Kojadinovic, Martin Maechler, and Jun Yan
##
## This program is free software; you can redistribute it and/or modify it under
## the terms of the GNU General Public License as published by the Free Software
## Foundation; either version 3 of the License, or (at your option) any later
## version.
##
## This program is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
## FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
## details.
##
## You should have received a copy of the GNU General Public License along with
## this program; if not, see .
require(copula)
set.seed(1)
n <- 2000
N <- 100 # number of variates used for the Kolmogorov-Smirnov test
## maximal deviation for deciding if sample versions of Kendall's tau are
## "close enough" to population versions; NB: depends on 'n'
eps.tau <- 0.06
(doExtras <- copula:::doExtras())
doPlots <- doExtras
if(doPlots && !dev.interactive(orNone=TRUE)) pdf("nac-experi.pdf")
options(warn = 2)# warning not allowed!
### 3d check functions #########################################################
##' correlation check function and run time measuring
##'
##' @title Check correlation matrix and measure run times of 3d fully nested Archimedean copulas
##' @param n number of variates to be drawn
##' @param th0 theta0
##' @param th1 theta1
##' @param cop acopula
##' @return a list containing run times for V0 and V01 and Kendall's taus
##' @author Marius Hofert, Martin Maechler
corCheck <- function(n, th0,th1, cop) {
mat <- matrix(0,nrow = n,ncol = 3)
V0time <- system.time(V0 <- cop@V0(n,th0))
V01time <- system.time(V01 <- cop@V01(V0,th0,th1))
mat <- cbind(runif(n),
exp(-V0*cop@iPsi(cop@psi(rexp(n)/V01,th1),th0)),
exp(-V0*cop@iPsi(cop@psi(rexp(n)/V01,th1),th0)))
mat[,] <- cop@psi(-log(mat[,])/V0,th0)
list(V0time,V01time, name = cop@name, cor = cor(mat, method = "kendall"))
}
##' create output
prt.tau.diff <- function(c1, c2){
stopifnot(is.matrix(c1))
delta.c <- 1000 * abs(c1 - c2)
cat(sprintf("Max & Mean distance * 1000 to true pairwise Kendall's taus: %7.1f %7.1f\n",
max(delta.c), mean(delta.c)))
invisible()
}
##' create output
corCheckout <- function(x, trCorr, famName = x$name) {
cat(sprintf("Time [ms] V0 for '%s': %7.1f\n", famName, 1000*x[[1]][1]))
cat(sprintf("Time [ms] V01 for '%s': %7.1f\n", famName, 1000*x[[2]][1]))
prt.tau.diff(x[["cor"]], trCorr) ; cat("\n")
}
##' Function implementing the chi^2 test
##'
##' @title The Chi-square test
##' @param n [integer] sample size
##' @param N [integer] number of replications
##' @param cop outer_nacopula to generate from
##' @param nInt positive integer: the number of intervals used for each grid
##' dimension
##' @return an "chiSqChk_cop" object; just a list(...) and a print method
##' @author Marius Hofert, Martin Maechler
chiSq_check_cop <- function(n,N,cop,nInt, verbose = interactive()){
copName <- deparse(substitute(cop)) # copula name
d <- dim(cop) # copula dimension
stopifnot(is.numeric(d), d >= 1, is.numeric(nInt), nInt >= 1)
pts <- (1:nInt)/nInt # (upper) division points [lower = upper - h
# = {0, ..., 1-h}; h=1/nInt]
mygrid <- do.call("expand.grid", rep.int(list(pts), d)) # build grid
m <- nInt^d # == grid length == nrow(mygrid)
v.cube <- nInt^(0:(d-1))
## build a function that returns the number of the cube in which each row
## of U falls
cube <- function(U, pts) {
di <- dim(U)
intervals <- array(cut(U, breaks = pts, include.lowest = TRUE,
labels = FALSE), # find "interval number" for
# each component of U; these numbers
# are in {NA,1,2,3,...,nInt-1}
dim = di)
intervals[is.na(intervals)] <- 0 # NAs correspond to smallest interval
as.vector(intervals %*% v.cube) + 1
}
## determine the expected number of observations in each cube
prob_up <- function(u) {
## probability mass in cube with upper corner 'u'
mesh <- 1/nInt
l <- u - mesh
prob(cop, l, u)
}
masscube <- apply(mygrid, 1, prob_up)
E_nobs <- n * masscube # expected number of observations in each cube
## now simulate data, count observations in each cube, and compute test
## statistic
k <- 0
CPU <- system.time({
T <- replicate(N, {
if(verbose) cat(sprintf("%2d%1s",{k <<- k+1}, if(k %% 20)""
else "\n"))
U <- rnacopula(n,cop) # generate data
cubenumbers <- cube(U,pts) # for each row vector of U, find the
# number of the cube in which the
# vector falls
nobs <- tabulate(cubenumbers, nbins = m) # number of observations
# in each cube
sum((nobs - E_nobs)^2 / E_nobs) # chi^2 test statistic
}); if(verbose) cat("done\n")
})[1]
structure(class = "chiSqChk_cop",
list( ## compute the result of the Kolmogorov-Smirnov test based
## on the N realizations of the chi^2 test statistics:
ks = ks.test(T, "pchisq", df = m-1),
T = T, CPU = CPU,
n=n, N=N, copName = copName, m = m,
## percentage of cubes that fulfill the rule of thumb:
percentrot = (sum(E_nobs >= 5)/m)*100
))
}## end{chiSq_check_cop}
##' a print method for this class:
print.chiSqChk_cop <- function(x, ...) {
stopifnot(is.list(x), all(c("ks","T","CPU","n","N") %in% names(x)),
is.numeric(pv <- x$ks[[2]]))
cat(sprintf("%s (3d)NAcopula (n=%d):\n %s (N=%d): %s\n ",
x$copName, x$n,
"P-value of the chi-square test", x$N, format.pval(pv)),
sprintf("Percentage fulfilling chi^2 rule of thumb: %4.1f\n",
x$percentrot),
sprintf("Time (user) needed = c(N,n; cop) = %8.1f [ms]\n",
1000 * x$CPU), sep="")
if(pv < 0.05) {
if(pv < 0.01)
cat("\n*************** P-value < 0.01 <<<<<<<<<<<<<<<<<<<<<<<<\n",
"\n*************** ============== <<<<<<<<<<<<<<<<<<<<<<<<\n\n")
else cat("\n*** > > > P-value < 0.05 <<<<<<<<<<<<<<<<<<<<<<<<\n\n")
stopifnot(pv > 0.001)
}
invisible(x)
}
##' compute the probability to fall in a cube with
##' lower point l and upper point u for d=3
probin3dcube <- function(cop,l,u) {
pCopula(u, cop)+
- pCopula(c(l[1],u[2],u[3]), cop)+
- pCopula(c(u[1],l[2],u[3]), cop)+
- pCopula(c(u[1],u[2],l[3]), cop)+
+ pCopula(c(l[1],l[2],u[3]), cop)+
+ pCopula(c(l[1],u[2],l[3]), cop)+
+ pCopula(c(u[1],l[2],l[3]), cop)+
- pCopula(l, cop)
}
### 3d examples ################################################################
### AMH ########################################################################
theta0 <- 0.7135 # tau_{12}=tau_{13}=0.2, tau_{23}=0.3
theta1 <- 0.9430
## check 1
corCheckAMH <- corCheck(n,theta0,theta1,copAMH)
trCorr <- rbind(c(1,0.2,0.2),
c(0.2,1,0.3),
c(0.2,0.3,1))
corCheckout(corCheckAMH,trCorr)
stopifnot(max(abs(corCheckAMH[["cor"]]-trCorr)) < eps.tau)
## check 2
AMH3d <-
new("outer_nacopula", copula = setTheta(copAMH, theta0),
comp = 1L,
childCops = list(new("nacopula",
copula = setTheta(copAMH, theta1),
comp = 2:3)) # no childCops
)
## constructor forms of the above:
rr <- onacopula("A", C(0.7135, 1, list(C(0.943, 2:3, NULL))))
r0 <- onacopula("A", C(0.7135, 1, C(0.943, 2:3, NULL)))
r1 <- onacopula("A", C(0.7135, 1, C(0.943, 2:3, )))
r2 <- onacopula("AMH", C(0.7135, 1, C(0.943, 2:3 )))
stopifnot(identical(AMH3d, rr), identical(rr, r0),
identical(r0, r1), identical(r1, r2))
## check
(chkAMH <- chiSq_check_cop(n,N,AMH3d,5))
## check probability
l <- c(.1, .05, .3)
u <- c(.4, .7, .6)
stopifnot(all.equal(print( prob(AMH3d,l,u)),
probin3dcube(AMH3d,l,u), tolerance=1e-14))
### Clayton ####################################################################
theta0 <- 0.5 # tau_{12}=tau_{13}=0.2, tau_{23}=0.5
theta1 <- 2
## check 1
corCheckClayton <- corCheck(n,theta0,theta1,copClayton)
trCorr <- rbind(c(1,0.2,0.2),
c(0.2,1,0.5),
c(0.2,0.5,1))
corCheckout(corCheckClayton,trCorr)
stopifnot(max(abs(corCheckClayton[["cor"]]-trCorr)) < eps.tau)
## check 2
Clayton3d <- onacopula("Clayton", C(theta0, 1, C(theta1, 2:3)))
(chkClayton <- chiSq_check_cop(512,100,Clayton3d,5))
## check probability
stopifnot(all.equal(print( prob(Clayton3d,l,u)),
probin3dcube(Clayton3d,l,u), tolerance=1e-14))
### Frank ######################################################################
theta0 <- 1.8609 # tau_{12}=tau_{13}=0.2, tau_{23}=0.5
theta1 <- 5.7363
## check 1
corCheckFrank <- corCheck(n,theta0,theta1,copFrank)
corCheckout(corCheckFrank,trCorr)
stopifnot(max(abs(corCheckFrank[["cor"]]-trCorr)) < eps.tau)
## check 2
Frank3d <- onacopula("F", C(theta0, 1, C(theta1, 2:3)))
(chkFrank <- chiSq_check_cop(n,N,Frank3d,5))
## check probability
stopifnot(all.equal(print( prob(Frank3d,l,u)),
probin3dcube(Frank3d,l,u), tolerance=1e-14))
### Gumbel #####################################################################
theta0 <- 1.25
theta1 <- 2 #--> tau_{12}=tau_{13}=0.2, tau_{23}=0.5
trCorr <- rbind(c(1,0.2,0.2),
c(0.2,1,0.5),
c(0.2,0.5,1))
## check 1
corCheckGumbel <- corCheck(n,theta0,theta1,copGumbel)
corCheckout(corCheckGumbel,trCorr)
stopifnot(max(abs(corCheckGumbel[["cor"]]-trCorr)) < eps.tau)
## check 2
Gumbel3d <- onacopula("Gumbel", C(theta0, 1, C(theta1, 2:3)))
(chkGumbel <- chiSq_check_cop(n,N,Gumbel3d,5))
## check probability
stopifnot(all.equal(print( prob(Gumbel3d,l,u)),
probin3dcube(Gumbel3d,l,u), tolerance=1e-14))
### Joe ########################################################################
theta0 <- 1.4438#tau_{12}=tau_{13}=0.2, tau_{23}=0.5
theta1 <- 2.8562
## check 1
corCheckJoe <- corCheck(n,theta0,theta1,copJoe)
corCheckout(corCheckJoe,trCorr)
stopifnot(max(abs(corCheckJoe[["cor"]]-trCorr)) < eps.tau)
## check 2
Joe3d <- onacopula("J", C(theta0, 1, C(theta1, 2:3)))
(chkJoe <- chiSq_check_cop(n,N,Joe3d,5))
## check probability
stopifnot(all.equal(print( prob(Joe3d,l,u)),
probin3dcube(Joe3d,l,u), tolerance=1e-14))
### Examples that check pnacopula() and rnacopula() ############################
## generate output for the examples
prt.stats <- function(c1,c2, rt) {
cat("Time [ms] for generating", n,
"vectors of variates: ", round(1000*rt[1],1), "\n")
prt.tau.diff(c1, c2) ; cat("\n")
}
### 3d Ali-Mikhail-Haq copula example ##########################################
c3 <- onacopula("A", C(0.7135, 1, list(C(0.943, 2:3))))
## basic check
d <- dim(c3)
stopifnot(d == 3,
allComp(c3) == 1:3,
allComp(c3@childCops[[1]]) == 2:3)
## test pCopula(., ) {was pnacopula()}
u <- c(.3, .4, .5)
## with function:
v <- pCopula(u, c3)
## by hand
psi <- function(t,theta) { (1-theta)/(exp(t)-theta) }
iPsi <- function(t,theta) { log((1-theta*(1-t))/t) }
th0 <- 0.7135
th1 <- 0.9430
level1 <- psi(iPsi(u[2],th1) + iPsi(u[3],th1), th1)
level0 <- psi(iPsi(u[1],th0) + iPsi(level1, th0), th0)
stopifnot(all.equal(v, level0, tolerance = 1e-14))
## test rnacopula()
rt <- system.time(rC3 <- rnacopula(n,c3))
C3 <- cor(rC3,method = "kendall")
trCorr <- rbind(c(1,0.2,0.2),
c(0.2,1,0.3),
c(0.2,0.3,1)) # tau_{12}=tau_{13}=0.2, tau_{23}=0.3
stopifnot(is.numeric(rC3), is.matrix(rC3),
dim(rC3) == c(n, 3),max(abs(C3-trCorr)) < eps.tau)
prt.stats(C3,trCorr,rt)
if(doPlots) {
stopifnot(require("KernSmooth"))## for smoothScatter()
pairs2(rC3, panel = function(...) { par(new = TRUE); smoothScatter(...) })
}
### 2d Clayton copula example ##################################################
c2 <- onacopula("Clayton", C(0.5, c(1,2))) # no childCops
## or simply c2 <- onacopula("Clayton", C(0.5, 1:2))
## basic check
d <- dim(c2)
stopifnot(d == 2,
allComp(c2) == 1:2)
## test pCopula()
v <- pCopula(c(.3, .4), c2)
stopifnot(all.equal(v,
local( { u1 <- .3; u2 <- .4
(u1^(-1/2)+u2^(-1/2)-1)^(-2) }),
tolerance = 1e-14))
## test rnacopula()
racopula <- copula:::racopula
set.seed(17) ; rt <- system.time(rC2 <- rnacopula(n,c2))
set.seed(17) ; rt. <- system.time(rc2 <- racopula (n, c2@copula, d=2))
stopifnot(identical(rC2, rc2))
C2 <- cor(rC2,method = "kendall")
trCorr <- rbind(c(1,0.2),
c(0.2,1)) # tau_{12}=0.2
stopifnot(is.numeric(rC2), is.matrix(rC2),
dim(rC2) == c(n, 2), max(abs(C2-trCorr)) < eps.tau)
prt.stats(C2,trCorr,rt)
if(doPlots)
smoothScatter(rC2)
### 3d Clayton copula example ##################################################
c3 <- onacopula("C", C(0.5, 1, C(2., c(2,3))))
## basic check
d <- dim(c3)
stopifnot(d == 3,
allComp(c3) == 1:3,
allComp(c3@childCops[[1]]) == 2:3)
## test pCopula()
v <- pCopula(c(.3, .4, .5), c3)
stopifnot(all.equal(v,
local( { u1 <- .3; u2 <- .4; u3 <- .5
1/((1/u2^2 +1/u3^2 -1)^(1/4) -1 +1/sqrt(u1))^2 }),
tolerance = 1e-14))
## test rnacopula()
rt <- system.time(rC3 <- rnacopula(n,c3))
C3 <- cor(rC3,method = "kendall")
trCorr <- matrix(c(1,0.2,0.2,0.2,1,0.5,0.2,0.5,1),nrow = 3,byrow = TRUE)
# tau_{12}=tau_{13}=0.2, tau_{23}=0.5
stopifnot(is.numeric(rC3), is.matrix(rC3),
dim(rC3) == c(n, 3),max(abs(C3-trCorr)) < eps.tau)
prt.stats(C3,trCorr,rt)
if(doPlots)
pairs2(rC3, panel = function(...) { par(new = TRUE); smoothScatter(...) })
### 9d Clayton copula example ##################################################
c9 <- onacopula("Clayton", C(0.5, c(3,6,1),
C(2., c(9,2,7,5),
C(3., c(8,4)))))
c9Lis <- list(0.5, c(3,6,1),
list(list(2., c(9,2,7,5),
list(list(3., c(8,4))))))
## consistency onacopula() <-> onacopulaL() :
stopifnot(identical(c9, onacopulaL("Clayton", c9Lis)))
## basic check
d <- dim(c9)
stopifnot(d == 9,
allComp(c9) == c(3,6,1,9,2,7,5,8,4),
allComp(c9@childCops[[1]]) == c(9,2,7,5,8,4),
allComp(c9@childCops[[1]]@childCops[[1]]) == c(8,4))
## test pCopula()
u <- seq(0.1,0.9,by = 0.1)
v <- pCopula(u, c9)
## by hand
psi <- function(t,theta) { (1+t)^(-1/theta) }
iPsi <- function(t,theta) { t^(-theta) - 1 }
th0 <- 0.5
th1 <- 2
th2 <- 3
level2 <- psi(iPsi(u[8],th2) + iPsi(u[4],th2), th2)
level1 <- psi(iPsi(u[9],th1)+
iPsi(u[2],th1)+
iPsi(u[7],th1)+
iPsi(u[5],th1) +
iPsi(level2, th1), th1)
level0 <- psi(iPsi(u[3],th0)+
iPsi(u[6],th0)+
iPsi(u[1],th0)+
iPsi(level1, th0), th0)
stopifnot(all.equal(v, level0, tolerance = 1e-14))
## test rnacopula()
rt <- system.time(rC9 <- rnacopula(n,c9))
C9 <- cor(rC9,method = "kendall")
## Theoretical values:
## (11,12,13,14,15,16,17,18,19)=(1,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2)
## (22,23,24,25,26,27,28,29)=(1,0.2,0.5,0.5,0.2,0.5,0.5,0.5)
## (33,34,35,36,37,38,39)=(1,0.2,0.2,0.2,0.2,0.2,0.2)
## (44,45,46,47,48,49)=(1,0.5,0.2,0.5,0.6,0.5)
## (55,56,57,58,59)=(1,0.2,0.5,0.5,0.5)
## (66,67,68,69)=(1,0.2,0.2,0.2)
## (77,78,79)=(1,0.5,0.5)
## (88,89)=(1,0.5)
C9.true <- rbind(c(1. ,rep(0.2,8)),
c(0.2,1. ,0.2,0.5,0.5,0.2, rep(0.5,3)),
c(0.2,0.2,1. , rep(0.2,6)),
c(0.2,0.5,0.2,1. ,0.5,0.2,0.5,0.6,0.5),
c(0.2,0.5,0.2,0.5,1. ,0.2, rep(0.5,3)),
c(rep(0.2,5), 1. , rep(0.2,3)),
c(0.2,0.5,0.2,0.5,0.5,0.2,1. ,0.5,0.5),
c(0.2,0.5,0.2,0.6,0.5,0.2,0.5,1. ,0.5),
c(0.2,0.5,0.2,0.5,0.5,0.2,0.5,0.5,1. ))
stopifnot(dim(rC9) == c(n, 9),
max(abs(C9-C9.true)) < eps.tau)
prt.stats(C9,C9.true,rt)
if(doPlots && dev.interactive(orNone=TRUE)) # "large"
pairs2(rC9, gap = .1, pch = 20, cex = 0.2, col = rgb(.2,.1,.7, alpha = .5),
main = paste0(n," vectors of a ", d,"-dimensional nested Clayton copula"))
### 25d Clayton ==============================================
c25 <- onacopula("Clayton", C(0.5, 17,
list(C(2, 20:18),
C(2.5, c(25,23, 8:12)),
C(2.25,c(24,21), C(4, 3:5)),
C(1.5, c(22,15:16), C(1.7, 1:2)),
C(3, c(6:7, 14:13)))))
stopifnot(identical(sort(allComp(c25)), 1:25))
c25
stopifnot(
all.equal(pCopula(rep(.01, 25), c25), 0.0001734511294041, tol = 9e-9)# 1.84e-13
,
all.equal(pCopula(rep(.99, 25), c25), 0.79506048556858, tol = 9e-9)# 3.77e-15
)
### 125d Clayton copula example ################################################
c125 <- onacopula("Clayton", C(0.5, , # no direct components
list(C(2, 1:10),
C(3, 11:40),
C(2, 41:60),
C(2, 61:85),
C(3, 86:105),
C(2,106:125))))
c125Lis <- list(0.5, integer(0), # <- could use NULL
list(list(2, 1:10),
list(3, 11:40),
list(2, 41:60),
list(2, 61:85),
list(3, 86:105),
list(2,106:125)))
## consistency onacopula() <-> onacopulaL() :
stopifnot(identical(c125, onacopulaL("C", c125Lis)))
## basic check
d <- dim(c125)
stopifnot(d == 125,
allComp(c125) == 1:125,
allComp(c125@childCops[[1]]) == 1:10,
allComp(c125@childCops[[2]]) == 11:40,
allComp(c125@childCops[[3]]) == 41:60,
allComp(c125@childCops[[4]]) == 61:85,
allComp(c125@childCops[[5]]) == 86:105,
allComp(c125@childCops[[6]]) == 106:125
)
## test rnacopula()
rt <- system.time(rC125 <- rnacopula(n,c125))
stopifnot(is.numeric(rC125), is.matrix(rC125), dim(rC125) == c(n, 125))
cat("Time elapsed for generating ",n," vectors of variates:\n",sep = "")
rt
summary(p125 <- pCopula(rC125, c125))
stopifnot(is.finite(p125),
all.equal(range(p125), c(4.13323986e-07, 0.03966520023))# tol=4.9e-11
,
all.equal(quantile(p125, (1:3)/4, names=FALSE),
c(0.002422534035, 0.005433890506, 0.009965210908)) # tol=1.5e-11
)
theta <- c(2,8)
copG4 <- onacopulaL("Gumbel",
list(theta[1], NULL,
list(list(theta[2], c(1,2)),
list(theta[2], c(3,4)))))
set.seed(11)
uG4 <- rCopula(1000, copG4)
u4. <- rbind(c(1, 0.5, 1, 0.5),
c(0.8, 0.4, 0.8, 0.5),
c(0.9, 0.5, 0.9, 0.5))
(pu4 <- pCopula(u4., copula = copG4))
stopifnot(
all.equal(
pu4, c(0.375214227246, 0.316973265328, 0.375214214144), tol = 9e-9)# 8e-13
,
all.equal(print(
prob(copG4, l = rep(0.9, 4), u = rep(1, 4))
), 0.056602230621, tol = 1e-9)
,
## and the probability of an empty corner is very small
all.equal(print(
prob(copG4,
l = c(0, 0.7, 0, 0.7),
u = c(0.4, 1 , 0.4,1 ))
, digits = 15), 8.832425e-10, tol = 1e-5)
)
## Less "even" example:
theta <- c(2, 4, 8, 25)
copG8 <- onacopulaL("Gumbel",
list(theta[1], 5:6,
list(list(theta[2], c(1,2,7)),
list(theta[3], 3, list(list(theta[4], c(8,4)))))))
uu <- rCopula(500, copG8)
if(doPlots)
splom2(uu, col = adjustcolor("darkseagreen4", 0.5), cex = 0.25)
summary(puu <- pCopula(uu, copG8))
stopifnot(is.finite(puu),
all.equal(min(puu), 5.353253447e-06, tol= 9e-9) # 2.33e-12
,
all.equal(max(puu), 0.99840742713, tol= 9e-9) # 2.34e-12
)
cat('Time elapsed: ', proc.time(),'\n') # for ``statistical reasons''