#### Used to be part of /u/maechler/R/MM/NUMERICS/hyper-dist.R -- till Jan.24 2020 library(DPQ) #### First version committed is the code "as in Apr 1999": #### file | 11782 | Apr 22 1999 | hyper-dist.R options(rErr.eps = 1e-30) rErr <- function(approx, true, eps = getOption("rErr.eps", 1e-30)) { ifelse(Mod(true) >= eps, 1 - approx / true, # relative error true - approx) # absolute error (e.g. when true=0) } if(!dev.interactive(orNone=TRUE)) pdf("hyper-dist-ex.pdf") ### ----------- ----------- rErr.phypDiv <- function(q, m,n,k) { ph <- phyper(q, m=m, n=n, k=k) cbind( rERR.Ibeta= rErr(phyperIbeta (q, m=m, n=n, k=k) , ph), rERR.as151= rErr(phyperApprAS152 (q, m=m, n=n, k=k) , ph), rERR.1mol = rErr(phyper1molenaar (q, m=m, n=n, k=k) , ph), rERR.2mol = rErr(phyper2molenaar (q, m=m, n=n, k=k) , ph), rERR.Peizer=rErr(phyperPeizer (q, m=m, n=n, k=k) , ph)) } ## Plotting of the rel.error: p.rErr.phypDiv <- function(m,n,k, q = .suppHyper(m,n,k), abslog = FALSE, logx= "", type = "b") { rE <- rErr.phypDiv(m=m, n=n, k=k, q=q) nc <- ncol(rE) cc <- adjustcolor(1:nc, 0.5) ppch <- as.character(1:nc) cl <- sys.call() cl[[1]] <- quote(phyper) matplot(q, if(abslog) abs(rE) else rE, type = type, lwd=2, col=cc, pch=ppch, ylab = if(abslog) quote(abs(rE)) else quote(rE), log = paste0(logx, if(abslog) "y" else ""), main = sprintf("%s", deparse1(cl)))# mtext(paste(if(abslog) "|rel.Err|" else "rel.Err.", "of diverse phyper() pbinom* approximations")) if(!abslog) abline(h=0, col=adjustcolor("gray", 0.5), lty=2) legend("topright", legend = paste(1:ncol(rE), colnames(rE), sep=": "), lwd=2, col=cc, lty=1:4, pch=ppch, bty="n") invisible(rE) } k <- c(10*(1:9),100*(1:9),1000*(1:9)) k1 <- k options(digits=4) rErr12 <- p.rErr.phypDiv(q=k, 2*k,2*k,2*k) p.rErr.phypDiv(q=k, 2*k,2*k,2*k, abslog=TRUE) k <- c(10*(7:9),100*(1:9),1000*(1:9), 1e4*(1:9)) k2 <- k p.rErr.phypDiv(q=k, 2*k,2*k,2*k, abslog=TRUE, logx="x") k <- c(10*(1:9),100*(1:3)) # small k only (afterwards --> all phyper*() == 1 !)(revert) ## Here, the Ibeta fails (NaN) ; *also* Molenaar's (???? contradiction to below) (rErr1215 <- p.rErr.phypDiv(q=k, 1.2*k, 2*k, 1.5*k)) p.rErr.phypDiv(q=k, 1.2*k, 2*k, 1.5*k, abslog=TRUE, logx="x") (rErr1215 <- p.rErr.phypDiv(q=k, 1.2*k, 2*k, 1.5*k)) p.rErr.phypDiv(q=k, 1.2*k, 2*k, 1.5*k, abslog=TRUE, logx="x") k <- c(10*(7:9), t(outer(10^(2:5), 1:9))) x <- round(.8*k); ph.k2k <- phyper(x,1.6*k, 2*k,1.8*k) (rErr1618 <- p.rErr.phypDiv(q=x, 1.6*k, 2*k, 1.8*k, logx="x")) ## AS151 is really unusable here ! p.rErr.phypDiv(q=x, 1.6*k, 2*k, 1.8*k, abslog=TRUE, logx="x") ## interestingly, Peizer suffers from some erratic behavior for large q ## --> the L has 4 log(.) terms, each of which has argument ~= 1 <--> could use log1p(.) or ??? k <- k1 # the old set par(mfrow=c(2,1)) for(Reps in c(0,1)) { options(rErr.eps = Reps) ## 0: always RELATIVE error; 1: always ABSOLUTE x <- round(.6*k); ph.k2k <- phyper(x,1.6*k, 2*k,1.8*k) ## ^^^ print(ph.mat <- cbind(k=k, phyper= ph.k2k, rERR.Ibeta= rErr(phyperIbeta (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.as151= rErr(phyperApprAS152 (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.1mol = rErr(phyper1molenaar (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.2mol = rErr(phyper2molenaar (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.Peiz = rErr(phyperPeizer (x,1.6*k,2*k,1.8*k) , ph.k2k)) ) ## The two molenaar's ``break down''; Peizer remains decent! Err.phyp <- abs(ph.mat[,-(1:2)]) matplot(k, Err.phyp, ylim= if((ee <- .Options$rErr.eps) > 0) c(1e-200,1), main="|relE.{phyper(x = 0.6k, 1.6k, 2k, 1.8k)}|", type='o', log='xy') } hp1 <- list(n = 10, n1 = 12, n2 = 2) for(h.nr in 1:1) { attach(hplist <- get(paste("hp",h.nr, sep='')) ) # defining n, n1, n2 cat("\n"); print(unlist(hplist)) N <- n1 + n2 x <- 0:max(0,min(n1, n2 - n)) d <- dhyper(x, n1, n2, n) names(d) <- as.character(x) print(d) } ### Binomial Approximation(s) ================================================= ph.m <- cbind(ph = phyper (0:5, 5,15, 7), pM1 = phyperBinMolenaar.1(0:5, 5,15, 7), pM2 = phyperBinMolenaar.2(0:5, 5,15, 7), pM3 = phyperBinMolenaar.3(0:5, 5,15, 7), pM4 = phyperBinMolenaar.4(0:5, 5,15, 7)) cc <- adjustcolor(1:(1+ncol(ph.m)), 0.5); ppch <- as.character(0:4) matplot(0:5, ph.m, type = "b", lwd=2, col=cc, pch=ppch, main = "all 4 phyper() binomial approximations via Molenaar's") legend("right", legend = c("phyper", paste("ph.appr.Mol", 1:4)), lwd=2, col=cc, lty=1:5, pch=ppch, bty="n") ## relative errors : ph.err <- 1 - ph.m[,-1] / ph.m[,"ph"] matplot(0:5, ph.err, type = "b", lwd=2, col=cc, pch=ppch, main = "rel.Err. of the 4 phyper() binom.Molenaar's approximations") legend("topright", legend = paste("ph.appr.Mol", 1:4), lwd=2, col=cc[-1], lty=2:5, pch=ppch[-1], bty="n") ## quite interesting: if we take the *mean* of approx. 1 and 2 -- should become good? ## have rErr() above {but does not work with *matrix* ?} rErr.Mol.bin <- function(m,n,k, q = .suppHyper(m,n,k), lower.tail=TRUE, log.p=FALSE) { ph <- phyper (q, m=m, n=n, k=k, lower.tail=lower.tail, log.p=log.p) phM <- phyperAllBinM(m=m, n=n, k=k, q=q, lower.tail=lower.tail, log.p=log.p) apply(phM, 2, rErr, true = ph) } ## Plotting of the rel.error: p.rErr.Mol.bin <- function(m,n,k, q = .suppHyper(m,n,k), abslog = FALSE, lower.tail=TRUE, log.p=FALSE) { rE <- rErr.Mol.bin(m=m, n=n, k=k, q=q, lower.tail=lower.tail, log.p=log.p) cc <- adjustcolor(1:4, 0.5) ppch <- as.character(1:4) matplot(q, if(abslog) abs(rE) else rE, type = "b", lwd=2, col=cc, pch=ppch, log = if(abslog) "y" else "", main = sprintf("phyper(*, m = %d, n = %d, k = %d)", m,n,k)) mtext(paste(if(abslog) "|rel.Err|" else "rel.Err.", "of the 4 phyper() binom.Molenaar's approximations")) if(!abslog) abline(h=0, col=adjustcolor("gray", 0.5), lty=2) legend("topright", legend = paste("phyp.Mol", 1:4), lwd=2, col=cc, lty=1:4, pch=ppch, bty="n") invisible(rE) } p.rErr.Mol.bin (5,15, 7) # same plot, as the "manual" one above p.rErr.Mol.bin (70,100, 7)# here, 1 and 4 are good p.rErr.Mol.bin (70,100, 20)# 4 (and 1) ((maybe their *mean* !) p.rErr.Mol.bin (70,100, 20, abslog=TRUE)# 4 (and 1) p.rErr.Mol.bin (70,100, 50)# 4 p.rErr.Mol.bin (70,100, 50, abslog=TRUE) -> rE.71c50 (p.rErr.Mol.bin (70,100, 70,abslog=TRUE) -> rE.71c70)# 1 & 2; but *none* is good for small q par(new=TRUE) plot(0:70, phyper(0:70, 70,100, 70), type = "l", col="blue", ann=FALSE,axes=FALSE) ## rel error on log-scale ... really nonsense (p.rErr.Mol.bin (70,100, 70, log.p=TRUE))# 1 & 2; but *none* is good for small q ## however, *again* the *mean* of 1|2 with 3|4 seems fine ## Total Variation error -- what Kuensch(1998) used for binom() -- hyper() approx. TVerr <- function(approx, true) { d <- true - approx max(sum( d[d > 0]), sum(- d[d < 0])) } supErr <- function(approx, true) max(abs(true - approx)) TVerr.bin <- function(m,n,k, q = .suppHyper(m,n,k), lower.tail=TRUE, log.p=FALSE) { ph <- phyper (q, m=m, n=n, k=k, lower.tail=lower.tail, log.p=log.p) phM <- phyperAllBin(m=m, n=n, k=k, q=q, lower.tail=lower.tail, log.p=log.p) apply(phM, 2, TVerr, true = ph) } supErr.binM <- function(m,n,k, q = .suppHyper(m,n,k), lower.tail=TRUE, log.p=FALSE) { ph <- phyper (q, m=m, n=n, k=k, lower.tail=lower.tail, log.p=log.p) phM <- phyperAllBinM(m=m, n=n, k=k, q=q, lower.tail=lower.tail, log.p=log.p) apply(phM, 2, supErr, true = ph) } (ph.TV <- apply(ph.m[,-1], 2, TVerr, true = ph.m[,1])) ## in all three cases, Molenaar is clearly better than "simple" TVerr.bin(5,15,7) TVerr.bin(1000,2000,7) # 1 & 4 are very accurate TVerr.bin(1000,2000, 1500)# not very good TVerr.bin(100000, 200000, 150000) ## but TV error really *grows* with the support of the distribution supErr.binM(100000, 200000, 150000) supErr.binM(1000000, 2000000, 150000) supErr.binM(1e6, 1e9, 150000) round(-log10(supErr.binM(1e6, 1e9, 1e7)), 2) #=> correct #{digits} ## pM1 pM2 pM3 pM4 ## 5.98 7.98 7.99 5.99 -- m = 1e6 < k = 1e7 ==> approx. with size = m are better system.time(sE <- supErr.binM(1e7, 1e9, 1e7)) ## 20 sec. elapsed! ==> very slow !! round(-log10(sE), 2) ## pM1 pM2 pM3 pM4 ## 6.00 6.00 6.01 6.01 --- m = k ==> all are the same system.time(sE <- supErr.binM(1e7, 1e9, 100))# fast round(-log10(sE), 2) ## pM1 pM2 pM3 pM4 ## 15.35 5.22 5.21 14.65 k << m ==> 1 & 4 are much better {why '1' better than '4' ?} ### Here, we currently only explore normal approx: k <- c(10*(1:9),100*(1:9),1000*(1:9)) options(digits=4) ph.k2k <- phyper(k,2*k,2*k,2*k) # rel.err ~ 10^-7 cbind(k=k, phyper= ph.k2k, rERR.Ibeta= rErr(phyperIbeta (k,2*k,2*k,2*k) , ph.k2k), rERR.as151= rErr(phyperApprAS152 (k,2*k,2*k,2*k) , ph.k2k), rERR.1mol = rErr(phyper1molenaar (k,2*k,2*k,2*k) , ph.k2k), rERR.2mol = rErr(phyper2molenaar (k,2*k,2*k,2*k) , ph.k2k), rERR.Peizer=rErr(phyperPeizer (k,2*k,2*k,2*k) , ph.k2k)) ## Here, the Ibeta fails (NaN) ; moleaar's are both very good : ph.k2k <- phyper(k, 1.2*k, 2*k,1.5*k) cbind(k=k, phyper= ph.k2k, rERR.Ibeta= rErr(phyperIbeta (k,1.2*k,2*k,1.5*k) , ph.k2k), rERR.as151= rErr(phyperApprAS152 (k,1.2*k,2*k,1.5*k) , ph.k2k), rERR.1mol = rErr(phyper1molenaar (k,1.2*k,2*k,1.5*k) , ph.k2k), rERR.2mol = rErr(phyper2molenaar (k,1.2*k,2*k,1.5*k) , ph.k2k), rERR.Peizer=rErr(phyperPeizer (k,1.2*k,2*k,1.5*k) , ph.k2k)) x <- round(.8*k); ph.k2k <- phyper(x,1.6*k, 2*k,1.8*k) (ph.mat <- cbind(k=k, phyper= ph.k2k, rERR.Ibeta= rErr(phyperIbeta (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.as151= rErr(phyperApprAS152 (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.1mol = rErr(phyper1molenaar (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.2mol = rErr(phyper2molenaar (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.Peiz = rErr(phyperPeizer (x,1.6*k,2*k,1.8*k) , ph.k2k)) ) matplot(k, abs(ph.mat[,-(1:2)]), type='o', log='xy') op <- if(require("sfsmisc")) mult.fig(2)$old.par else par(mfrow=c(2,1)) for(Reps in c(0,1)) { options(rErr.eps = Reps) ## 0: always RELATIVE error; 1: always ABSOLUTE tit <- paste("phyper() approximations:", if(Reps == 0) "relative" else "absolute", "error") cat(tit,":\n") x <- round(.6*k); ph.k2k <- phyper(x,1.6*k, 2*k,1.8*k) print(ph.mat <- cbind(k=k, phyper= ph.k2k, rERR.Ibeta= rErr(phyperIbeta (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.as151= rErr(phyperApprAS152 (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.1mol = rErr(phyper1molenaar (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.2mol = rErr(phyper2molenaar (x,1.6*k,2*k,1.8*k) , ph.k2k), rERR.Peiz = rErr(phyperPeizer (x,1.6*k,2*k,1.8*k) , ph.k2k), rErr.binMol=rErr(phyperBinMolenaar.1(x,1.6*k,2*k,1.8*k) , ph.k2k)) ) ## The two molenaar's ``break down''; Peizer remains decent! Err.phyp <- abs(ph.mat[,-(1:2)]) matplot(k, Err.phyp, ylim= if((ee <- .Options$rErr.eps) > 0) c(1e-200,1), main = tit, type='o', log='xy', lty=1:6, col=1:6, pch=as.character(1:6)) if(Reps == 1) legend("bottomleft", c("Ibeta", "AS 152", "1_Molenaar", "2_Molenaar", "Peizer", "binom_Mol."), bty = "n", lty=1:6, col=1:6, pch=as.character(1:6)) } par(op) hp1 <- list(n = 10, n1 = 12, n2 = 2) for(h.nr in 1:1) { attach(hplist <- get(paste("hp",h.nr, sep='')) ) # defining n, n1, n2 cat("\n"); print(unlist(hplist)) N <- n1 + n2 x <- 0:max(0,min(n1, n2 - n)) d <- dhyper(x, n1, n2, n) names(d) <- as.character(x) print(d) } ### ----------- ----------- lfastchoose() etc ----------------------------- sapply(0:10, function(n) lfastchoose(n, c(0,n))) ## System lchoose gives non-sense: sapply(0:10, function(n) lchoose(n, c(0,n))) ## This is ok: sapply(0:10, function(n) f05lchoose(n, c(0,n))) ###---------- some gamma testing: ------------- pi - gamma(1/2)^2 for(n in 1:20) cat(n,":",formatC(log(prod(1:n)) - lgamma(n+1)),"\n") for(n in 1:20) cat(n,":",formatC(prod(1:n) - gamma(n+1)),"\n") ## in math/gamma.c : p1 <- c(0.83333333333333101837e-1, -.277777777735865004e-2, 0.793650576493454e-3, -.5951896861197e-3, 0.83645878922e-3, -.1633436431e-2) ### Bernoulli numbers Bern() : n <- 0:12 Bn <- n; for(i in n) Bn[i+1] <- Bern(i); cbind(n, Bn) system.time(Bern(30)) system.time(Bern(30)) if(FALSE){ ## rm(.Bernoulli) system.time(Bern(30)) system.time(Bern(80)) } ### lgammaAsymp() --- Asymptotic log gamma function : lgammaAsymp(3,0) for(n in 0:10) print(log(2) - lgammaAsymp(3,n)) for(n in 0:12) print((log(2*3*4*5) - lgammaAsymp(5+1,n)) / .Machine$double.eps) for(n in 0:12) print((log(720) - lgammaAsymp(6+1,n)) / .Machine$double.eps) for(n in 0:12) print((log(720) - lgamma (6+1) ) / .Machine$double.eps) for(n in 0:12) print((log(5040) - lgammaAsymp(7+1,n)) / .Machine$double.eps) ## look ok: curve(lgamma(x),-70,10, n= 1001) # ok curve(lgamma(x),-70,10, n=20001) # slowness of x11 !! curve(abs(gamma(x)),-70,10, n= 201, log = 'y') curve(abs(gamma(x)),-70,70, n=10001, log = 'y') par(new=T) curve(lgamma(x), -70,70, n= 1001, col = 'red') ## .., we should NOT use log - scale with negative values... [graph ok, now] curve(gamma(x),- 40,10, n= 2001, log = 'y') curve(abs(gamma(x)),-40,10, n= 2001, log = 'y') ##- Warning: NAs produced in function "gamma" x <- seq(-40,10, length = 201) plot(x, gamma(x), log = 'y', type='h') ###------------------- early dhyper() problem --------------------------------- ##- Date: 30 Apr 97 11:05:00 EST ##- From: "Ennapadam Venkatraman" ##- Subject: R-beta: dhyper bug?? ##- To: "r-testers" ##- Sender: owner-r-help@stat.math.ethz.ch ## I get the following incorrect answer ---- FIXED dhyper(34,410,312,49) # [1] 2.244973e-118 ##- when the coorect value is ##- dhyper(34,410,312,49)# (from S-Plus) ##- [1] 0.0218911 ##================== R is now correct ! stopifnot(all.equal(dhyper(34,410,312,49), 0.021891095726, tol=1e-11)) ##E.S. Venkatraman (venkat@biosta.mskcc.org) ### phyperR() === R version of pre-2004 C version in /src/nmath/phyper.c : ## This takes long, currently (1999??) {18 sec, on sophie [Ultra 1]} -- now all fast! k <- (1:100)*1000 system.time(phyper.k <- phyper(k, 2*k, 2*k, 2*k)) k <- 1000; phyperR(k, 2*k, 2*k, 2*k) - phyper.k[1] ## Debug: ------------ if(FALSE) # for now, when I source this if(interactive()) debug(phyperR) phyperR(k, 2*k, 2*k, 2*k) ## before while(): xb <- 2000 xr <- 0 ltrm <- -2768.21584356709 NR <- 2000 NB <- 0# new value