#### NON-central [dpq]chisq() #### =========== ======= (Since *central [dpq]Chisq <===> [dpq]gamma) library(DPQ) ### originally was ~/R/MM/NUMERICS/dpq-functions/chisq-nonc-ex.R ### - - - - - - - - - - - - - - - - - - - - - - - ### with _long_ history since R 0.63.x in 1999 ! ###__FIXME__ Already have 3 different ./wienergerm*.R files ### ===== Do remove things we have twice !!! stopifnot(exprs = { require(graphics) require(sfsmisc) # eaxis(), lseq(), p.m(), mult.fig(), sessionInfoX() }) source(system.file(package="DPQ", "test-tools.R", mustWork=TRUE))# ../inst/test-tools.R ## => showProc.time(), ... list_() , loadList() , readRDS_() , save2RDS() unlist(.Platform) ## For package-testing "diagnostics": sessionInfoX(c("DPQ","Rmpfr")) (noLdbl <- (.Machine$sizeof.longdouble <= 8)) ## TRUE when --disable-long-double ## very large ncp gave "infinite" loop in R <= 3.6.1 : ## ==> need new enough "3.6.1 patched" or R{-devel} > 3.6.x (okR_Lrg <- (getRversion() > "3.6.1" || getRversion() == "3.6.1" && R.version$`svn rev` >= 77145)) (doExtras <- okR_Lrg && DPQ:::doExtras() && !grepl("valgrind", R.home())) ## save directory (to read from): (sdir <- system.file("safe", package="DPQ")) ## on "my" platform, and if doExtras, I'm very strict: (myPlatf <- all(Sys.info()[c("sysname", "machine", "login")] == c("Linux", "x86_64", "maechler"))) (beStrict <- doExtras && !noLdbl && myPlatf) (is32 <- .Machine$sizeof.pointer == 4) ## <- should work uniformly on Linux/MacOS/Windows if(!dev.interactive(orNone=TRUE)) pdf("chisq-nonc-1.pdf") .O.P. <- par(no.readonly=TRUE) showProc.time() ### Part 1 : Densities dchisq(*, ncp) ### ---------------------------------- ### densities alone : ## ===> shows Normal limit (for lambda -> Inf; true also for nu -> Inf) nu <- 12 nS <- length(ncSet <- if(doExtras) 10^(0:9) else 10^(0:6)) np <- if(doExtras) 201 else 64 cpUse <- numeric(nS); names(cpUse) <- formatC(ncSet) mult.fig(nS, main = paste("non-central chisq(*, df=",nu, ") and normal approx"))$old.par -> op for(NC in ncSet) { m <- NC + nu s <- sqrt(2*(nu + 2*NC)) x <- seq(from= m - 3*s, to= m + 3*s, length = np) cpUse[formatC(NC)] <- system.time(y <- dchisq(x, df=nu, ncp=NC))[1] plot(x, y, ylim=c(0,max(y)),type = "l", ylab='f(x)', main=paste("ncp =",NC)) lines(x, dnorm(x,m=m,s=s), col = 'blue') } par(op)# resetting mult.fig() showProc.time() cbind(ncSet, cpUse, "c/ncp"= cpUse / ncSet) ## fails on Win 32b: "need finite 'ylim' values" : try(plot(cpUse ~ ncSet, log = "xy", type = 'b', col = 2)) if(doExtras) try(# fails occasionally (too many zeros) print(summary(lmll <- lm(log(cpUse) ~ log(ncSet), subset = ncSet >= 1e4))) ) ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -9.099690 0.100188 -90.83 1.20e-10 *** ## log(ncSet) 0.494316 0.005548 89.09 1.35e-10 *** ## ## Residual standard error: 0.08279 on 6 degrees of freedom ## Multiple R-Squared: 0.9992, Adjusted R-squared: 0.9991 <<- ! ## F-statistic: 7938 on 1 and 6 DF, p-value: 1.347e-10 ## => log(cpUse) ~= -9.1 + 0.494*log(ncSet) ## <==> cpUse proportional to sqrt(ncp) ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## further experimenting shows, that only values in the center of the density ## take longer times! ==> exactly where the normal approx (or others!) is good ###--- Now, limit for nu = df --> Inf : ncp <- 16 nS <- length(dfSet <- 10^(if(doExtras) 0:11 else 0:8)) cpUse <- numeric(nS); names(cpUse) <- formatC(dfSet) oPar <- mult.fig(nS, main = "non-central chisq(ncp = 16) and normal approx")$old.par for(DF in dfSet) { m <- DF + ncp s <- sqrt(2*(DF + 2*ncp)) x <- seq(from= m - 3*s, to= m + 3*s, length = np) cpUse[formatC(DF)] <- system.time(y <- dchisq(x, df=DF, ncp=ncp))[1] plot(x, y, ylim=c(0,max(y)),type = "l", ylab='f(x)', main=paste("df =",DF)) lines(x, dnorm(x,m=m,s=s), col = 'blue', lty=2) } par(oPar) cbind(dfSet, cpUse, "c/df"= cpUse / dfSet) ## remains fast! showProc.time() ## source("~/R/MM/NUMERICS/dpq-functions/dnchisq-fn.R")# dnoncentchisq() etc ## R curve(dnoncentchisq(x, df=3, ncp=0), 0, 10) curve(dchisq (x, df=3), 0, 10, add=TRUE, col='purple') ## ok curve(dnoncentchisq(x, df=3, ncp=1), 0, 10) curve(dchisq (x, df=3, ncp=1), 0, 10, add=TRUE, col='purple') #ditto x <- seq(0, 10, length=101) del <- c(0:4,10,40) res <- matrix(NA, nr=length(x), nc=length(del), dimnames=list(NULL, paste0("ncp=",del))) for(id in seq(along=del)) res[,id] <- dnoncentchisq(x=x, df=3, ncp=del[id]) matplot(x, res) title("dnoncentchisq(*, df=3, ncp = ..) & dchisq(..)") l.pch <- as.character(seq_along(del)) legend("topright", paste("ncp =", del), col=1:6, pch=l.pch) res2 <- outer(x, del, function(x,del)dchisq(x=x, 3, ncp=del)) matplot(x, res2, add=TRUE) # practically no difference visible! signif( cbind(x, abs(1 - res/res2)) , digits=4) matplot(x, abs(1 - res/res2)[,-1], type="b", lty=1, log="y", yaxt="n"); eaxis(2) title("Rel.Err |1 - dnoncentchisq(*, df=3, ncp = ..) / dchisq(..)|") legend("bottomright", paste("ncp =", del[-1]), col=1:6, lty=1, pch=l.pch, bty="n") showProc.time() ###---- March 2008 ----- "large ncp" : n <- if(doExtras) 1e4 else 512 ## From: Martin Maechler ## To: Peter Dalgaard ## Subject: Re: non-central chisq density ## Date: Thu, 27 Mar 2008 22:22:15 +0100 ## [...............] ## Hi Peter, ## I've recently looked at problems in computing the non-central ## beta density for largish non-centrality parameter, ## which made me eventually considering the non-central chisq, ## and I have recalled your R News (Vol.1, nr.1; 2001) ## article on its big improvement by finding the maximal term in ## the sum and then sum outwards; ## and of course, the same idea will be applicable to the dnbeta() ## as well. ## However, for bigger ncp things, become eventually unfeasible as ## you've also noted in your article. ## Hoever, there, you've mentioned that the new method would work ## even up to ncp = 100'000^2 which I found astonishing (but not wrong) ## because I saw much potential for underflow already in the ## central term. ## Also, I saw that the current pnchisq() does not compute the ## log-density more accurately in places where the density ## underflows to zero... curve(dchisq(x, df=3, ncp=30000, log=TRUE), 0, 50000) ## also not a big deal, but for me one more reason to look ## at normal / central approximation formula for "large" ncp .. ## for all (or many of) the non-central distributions. ## Anyway, I started to look a bit more closely and then saw this d <- 1e6; curve(dchisq(x, df=3, ncp=d, log=TRUE), .98*d, 1.02*d, n=n) ## when going to smaller ncp and looking more closely something like curve(dchisq(x, df=3, ncp=30000, log=TRUE), 27300, 27500, n=n) ## which you may find amusing.... ## all this no need for immediate action, but something to ## consider, and as said, ## I'm looking into this first for dnbeta(), but then more ## generally. ## All in all, your R News article has been again a very nice piece ## of inspiration. ## ## Martin ## dchisqAsym (x, df, ncp, log = FALSE) --> ../R/dnchisq-fn.R ## ---------- ~~~~~~~~~~~~ curve(dchisq(x, df=3, ncp=30000, log=TRUE), 26000, 34000, n=n) curve(dchisqAsym(x, df=3, ncp=30000, log=TRUE), add=TRUE, col="purple", n=n) curve(dnorm(x, m=3+30000, sd=sqrt(2*(3 + 2*30000)), log=TRUE), add = TRUE, col="blue", n=n) ##==> It seems the chisqAsym() approximation is slightly better; ## also from this : x <- rchisq(if(doExtras) 1e6 else 1e4, df=3, ncp=30000) (sN <- sum(dnorm(x, m=3+30000, sd=sqrt(2*(3 + 2*30000)), log=TRUE))) (sCh<- sum(dchisqAsym(x, df=3, ncp=30000, log=TRUE))) ## larger (less negative) <-> better all.equal(sN, sCh) # ... 2.6887e-6" [Win 32b: 2.873e-5] ## dnchisqBessel(x, df, ncp, log = FALSE) --> ../R/dnchisq-fn.R ## ------------- ~~~~~~~~~~~~ ## From ?pl2curves() [ == ../man/pl2curves.Rd ] : p.dnchiB <- function(df, ncp, log=FALSE, from=0, to = 2*ncp, p.log="", n = if(doExtras) 2001 else 512, ...) { pl2curves(dnchisqBessel, dchisq, df=df, ncp=ncp, log=log, from=from, to=to, p.log=p.log, n=n, ...) } ## simple check stopifnot(all.equal(dchisq(1:30, df=3, ncp=1:30), dnchisqBessel(1:30, df = 3, ncp = 1:30), tol = 1e-13)) ## tol=0 --> "Mean rel.diff.: 2.3378e-14" p.dnchiB(df=1.2, ncp=500,, 200, 800) p.dnchiB(df=1.2, ncp=500, log=TRUE)# differ in tail p.dnchiB(df=20, ncp=500,, 200, 800) p.dnchiB(df=20, ncp=500, log=TRUE) # ok (differ for large x) p.dnchiB(df=20, ncp=100) # looks good p.dnchiB(df=20, ncp=100, , 0, 500, p.log="y") # looks good too (differ large x) p.dnchiB(df=20, ncp=100, log=TRUE, 0,500) # the same p.dnchiB(df=20, ncp=200, log=TRUE, 0,600) # the same p.dnchiB(df=35, ncp=400, log=TRUE, 0,1500) # the same p.dnchiB(df= 3, ncp=600, log=TRUE, 0,2500) # the same p.dnchiB(df= 3, ncp=800, log=TRUE, 0,3500) # for large x --> NaN in besselI ## However, large ncp -- gives overflow in besselI(): dnchisqBessel(8000, df=20, ncp=5000) ## NaN -- no longer: now 1.3197e-78 ## Hmm, I'm slightly confused that the cutoff seems at 1500 [ < 1e4 !] x <- if(doExtras) 1000:1600 else seq(1000, 1600, by = 5) plot (x, besselI(x, 9, TRUE), type="l") ## Warning message: ## In besselI(x, nu, 1 + as.logical(expon.scaled)) : NaNs produced lines(x, besselI(x, 1.2, TRUE), col=2) lines(x, besselI(x, 1.0, TRUE), col=2) lines(x, besselI(x, 0.1, TRUE), col=2) lines(x, besselI(x, 1.8, TRUE), col=2) ### OTOH: Bessel asymptotic I_a(y) ~ exp(y) / sqrt(2*pi*y) for y >> a lines(x, 1/sqrt(2*pi*x), col=3, lty=3, lwd=3) ## hmm, looks like the nu=1.2 case, but *not* the nu=9 one ?? lines(x, besselI(x, 2.2, TRUE), col="blue") lines(x, besselI(x, 3.2, TRUE), col="blue") lines(x, besselI(x, 4.2, TRUE), col="blue") lines(x, besselI(x, 5.2, TRUE), col="blue") lines(x, besselI(x, 6.2, TRUE), col="blue") lines(x, besselI(x, 7.2, TRUE), col="blue") lines(x, besselI(x, 8.2, TRUE), col="blue") ##--> Need asymptotic for besselI(x, nu) with a term that depends on nu ##--> ...bessel-large-x.R and better ~/R/Pkgs/Bessel/ ## ~~~~~~~~~~~~~~~~~~~[April 2008] ================ showProc.time() ### Part 2 : pchisq (non-central!) ### ------------------------------- if(!dev.interactive(orNone=TRUE)) { dev.off(); pdf("chisq-nonc-2.pdf") } ## source("/u/maechler/R/MM/NUMERICS/dpq-functions/pnchisq.R")#-> pnchisq(), pnchisqV() ## In examples ../man/pnchisqAppr.Rd --------- ## ((again there at beginning)) ### Note Brian's change (which completely broke df=0 case !) for R 2.3.0: ## r37287 | ripley | 2006-02-07 23:12:38 +0100 (Tue, 07 Feb 2006) | 6 lines ## improvements to [pq]nchisq ## - use direct formula which allows for lower_tail = FALSE if ncp < 80 ## (this is often a lot more accurate). ## - use starting point and lower_tail in qnchisq ## - can be slower, so make interruptible ## --- df = 0 ------------ stopifnot(pchisq(0:10, 0,1) >= exp(-1/2)) ## gave NaN from 2.3.0 to 2.6.1 ## For a series of ncp = lambda : lam <- seq(0,100, by=.25) p00 <- pchisq(0, df=0, ncp=lam) p.0 <- pchisq(1e-300, df=0, ncp=lam) stopifnot(all.equal(p00, exp(-lam/2), tol=2e-16),# '0' (when compiled alike) all.equal(p.0, exp(-lam/2), tol=4e-16))# was "1e-100" aka tol=0 .. ###------ ### Accuracy buglet(s) : ## df -> 0 : pnchisq() allows this, but it's pretty wierd : ## ------- ## (and S-plus does it better !!) ## Theory: (df=0, ncp=0 ) is point mass (1) at 0 --> 1-P == 0 everywhere ## ------ (df=0, ncp= L) has point mass exp(-L/2) at 0 plot(function(x)pchisq(x, df=0, ncp=0, lower=FALSE),1e-1, 5000,log="x")## fine (all 0) plot(function(x)pchisq(x, df=0, ncp=0, lower=FALSE),2000, 5000) ## all = 0 plot(function(x)pchisq(x, df=1e-4, ncp=0, lower=FALSE),2000, 5000) ## all = 0 still ## this is ok (2014-04) plot(function(x)pchisq(x, df=1e-4, ncp=0, lower=FALSE),1e-1, 5000, log="xy")## !? ## The R version of this: curve( pnchisqV (x, df=1e-4, ncp=0, lower=FALSE), add=TRUE, col=adjustcolor(2,1/2), lwd=4) ## central chisq is ok here: curve( pchisq(x, df=1e-4, lower=FALSE),add = TRUE, col = "red") curve( pchisq(x, df=1e-4, lower=FALSE),1e-1, 5000)#, add = TRUE) curve( pchisq(x, df=1e-4, lower=FALSE),1e-1, 5000, log = 'xy') ##--- but the problem persists for df > 0 for small non-zero ncp: curve( pchisq(x, df=0.01, ncp = 0.1), 1e-1, 5000, log="x") # fine par(new=TRUE) curve( pchisq(x, df=0.01, ncp = 0.1, lower=FALSE,log=TRUE), 1e-1, 5000, log="x", ylab="", yaxt="n", col=2) axis(4, col.axis=2); mtext("log(1 - p)", 4, col=2) ## --> underflows to -Inf [because it computes log(.) ###--- this was "noncentral-ex.R" : x <- x10 <- 10^(-300:300)#-> this is x-range is NOT plottable! x <- x10 <- 10^(-150:150)#-> *is* plottable system.time(pch.x10 <- pchisq(x,x ))# 0.01 in R; 4.77 in Splus 3.4 system.time(pch.x10n <- pchisq(x,x,ncp=1e-10))#-- hangs for ever [R <= 0.63.3] ## R 1.2.x : 0.57 ## in S-plus 3.4: ##- Error in .C("S_ncchisq_prob",: subroutine S_ncchisq_prob: ##- 284 Inf value(s) in argument 2 ##- Dumped ##- Timing stopped at: 4.77 0.00999999 5 0 0 stopifnot(is.na(pch.x10) == is.na(pch.x10n))#> TRUE R & Splus 3.4 stopifnot(!any(is.na(pch.x10))) summary(pch.x10 - pch.x10n) ## Splus: ##- Min. 1st Qu. Median Mean 3rd Qu. Max. NA's ##- 0 0 0 0 0 0 284 ## R 1.2.x: ##- Min. 1st Qu. Median Mean 3rd Qu. Max. ##- -9.054e-09 5.000e-11 5.000e-11 2.426e-01 5.000e-01 5.000e-01 ## Much better now: Max = 5e-11 ## However, closer inspection reveals summary(pch.x10[pch.x10 != 1]) ##-R Min. 1st Qu. Median Mean 3rd Qu. Max. ##-R 0.5000 0.5000 0.5000 0.5271 0.5000 1.0000 ##-S+3.4 Min. 1st Qu. Median Mean 3rd Qu. Max. ##-S+3.4 -Inf -Inf -Inf -Inf -Inf 1 summary(pch.x10[pch.x10 != 1 & is.finite(pch.x10)]) ##- Min. 1st Qu. Median Mean 3rd Qu. Max. ##- 0.4912 0.5009 0.9294 0.7696 1 1 ###----- less extreme x values: rele.pch <- function(x) 1 - pchisq(x,x,ncp=1e-100) / pchisq(x,x) (rl <- rele.pch(2^(-12:10))) stopifnot(rl == 0)## << S-plus 3.4, later R ## The uniroot no longer works, but look at curve(1-pchisq(x,1,1), 69, 1500, n = 2001, main="1-pchisq(x,1,1), x in [69, 1500]", sub = R.version.string) axis(1, at= c(69, 1500)) abline(h=0, col="gray70") ##--> lots of noise -- but around correct value = 1 ## now, all fine : pc1 <- curve(pchisq(x,1,1, lower.tail=FALSE), 69, 1500, n = 2001, log = "y", main="1-pchisq(x,1,1), x in [69, 1500]", sub = R.version.string) ## 1-P of course jumps to 0 *much* earlier: plot(pc1, type="l", log="y", yaxt="n", ylab="", main="1-pchisq(x,1,1), x in [69, 1500]", sub = R.version.string) eaxis(2) ## whereas this underflows much much much earlier (at x ~ 100) curve(1-pchisq(x,1,1), add=TRUE, col=adjustcolor("red", 0.5), lwd=3, n = 2001) x <- 100:1511 p <- pchisq(x,1,1, lower=FALSE) stopifnot(0 <= p, p <= 2e-19) lp <- log(p) stopifnot(is.finite(lp), lp <= -43, abs(diff(lp) + 0.475) < 0.02) showProc.time() ### try other (df,ncp) -- compare with Wienergerm approx. ## setwd("/u/maechler/R/MM/NUMERICS/dpq-functions/") ## source("wienergerm_nchisq-fn.R") ## dyn.load("wienergerm_nchisq.so") p.pchUp <- function(df, ncp, from, to, log.p=FALSE, n=2001) { c1 <- curve(pchisq(x, df=df, ncp=ncp, lower.tail=FALSE, log.p=log.p), from=from, to=to, n=n, main = paste0("pchisq(x, ",formatC(df),", ",formatC(ncp), ", lower=F", if(log.p)", log=T", "),", " x in [",formatC(from),", ", formatC(to), "]"), sub = R.version.string) axis(1, at= c(from, to)) abline(h=0, col="gray70", lty=3) c2 <- curve(pchisqW(x, df=df, ncp=ncp, lower.tail=FALSE, log.p=log.p), n=n, add = TRUE, col = 2) legend("topright", xjust = 1/2, yjust = 1.1, c("normal", "Wienerg.approx"), col=1:2, lty=1) invisible(data.frame(x = c1$x, pchisq = c1$y, pchisqW = c2$y)) } ## in all these, Wienerg.approx. looks very good: x >> df+ncp p.pchUp( 0.1, 0.1, 51, 1500) p.pchUp( 1, 1, 69, 1500) p.pchUp( 1, 1, 69, 1500, log=TRUE)# small discrepancy for largish x p.pchUp( 2.5, 0.02, 61, 1500, log=TRUE)# (nothing visible) p.pchUp( 25, 10, 150, 1600, log=TRUE)# discrepancy largish x summary(warnings()) ; showProc.time() p.pchUp( 100, 500, 980, 1900)# normal:noise(and cutoff at 1); Wienerg: smooth p.pchUp( 100, 500, 980, 1900, log=TRUE)# pchisq() breaks down p.pchUp( 500, 100, 897, 3000) p.pchUp( 500, 100, 897, 3000, log=TRUE)# pchisq break down p.pchUp( 5e3, 100, 5795, 1.e4) # zoom in.. p.pchUp( 5e3, 100, 5777, 6000) # --> Wiener has less noise long before! ## (but it also is systematically a bit larger - correct?) summary(warnings()) ; showProc.time() if(doExtras) withAutoprint({ # ----------------------------------- ## Now have m + 5*s cutoff, ... cc <- p.pchUp( 5e3, 5e3, 10400, 11e3) # still pchisq() jumps to 0 at 10866.2, too early p.m(cc, type="l", log="y", lwd=c(1,3), col=c("black", adjustcolor("red",0.5))) ## now (larger cutoff) "fine" but also too early to jump to zero: c2 <- p.pchUp( 1e5, 2e4, 11.6e4, 12.6e4, n=1001) # see Wienergerm-singularity !! p.m(c2, type="l", log="y", lwd=c(1,3), col=c("black", adjustcolor("red",0.5))) ## Still shows that the m + 5*s cutoff is too early! p.pchUp( 5e3, 5e3, 10800, 11e3) cc <- p.pchUp( 5e3, 5e3, 8000, 20e3) p.m(cc, type="l", log="y", lwd=c(1,3), col=c("black", adjustcolor("red",0.5))) p.pchUp( 1e5, 2e4, 12.25e4, 12.35e4)# m + 5*s __much__ too early here.. showProc.time() # ~ 0.5 sec }) ## only if(doExtras) ----------------------------------- ### NOTA BENE: We have the *big* problem when s ~= 1, x <= ncp+df ### --------- --------------------------------------------------- ### this is unsolved and the reason we have not yet ... ### ==> conclusion: Use Wienergerm as soon as P > 1 - 1e-12, ## ---------- but probably much earlier ## To *find* that P > 1 - 1e-12 we could try a cheap qchisq() approx. ## unfortunately, these are quite INaccurate in the tails... ## when pnchisq.c is RE-compiled with -DDEBUG_pnch ## these give interesting output ## Simulate this, using pnchisq() ## Ok, "now" for ncp <= 80, we use direct formula ## "now" := r37287 | ripley | 2006-02-07 23:12:38 ## ## ---> these no longer use old algo: ## Case lt n(#it) pnchisq(1000, 1,1, verbose=2)# 2 -496.8 666 pnchisq(1300, 1,1)# 2 -646.6 838 pnchisq(1400, 1,1)# 2 -696.6 895 pnchisq(1420, 1,1)# 2 -706.6 906 pnchisq(1422, 1,1)# 2 -707.6 907 pnchisq(1425, 1,1)# 2 -709.6 L + x large --> 1 pnchisq(1430, 1,1)# 2 -711.6 L + x large --> 1 pnchisq(1490, 1,1)# 2 -741.6 L + x large --> 1 ## With the newly (2003-01-16) visible warning [no longer; 2004-02] pchisq(1e-5, df=100, ncp=1) ## [1] 0 ##- Warning message: ##- too large x (= 1e-05) or noncentrality parameter 1 for current algorithm; ##- result is probably invalid! pnchisq(1e-5, df=100, ncp=1, verbose = TRUE) ## lt= -758.8, t=exp(lt)= 0 ##- _OL_: n= 1 fx.2n = 102 > 0 ==> flag ## [1] 0 ## where as pnchisq(10e-5, df=100, ncp=1, verbose = TRUE) ## lt= -643.7, t=exp(lt)= 2.92e-280 ## _OL_: n= 1 fx.2n = 102 > 0 ==> flag ## [1] 1.771154e-280 ##---------------- another bug: large x with ncp > 0 ### NOTA BENE: Fix this with "Wiener Germ Approx." formula !!! ### now (R 1.8.x at least) ok mult.fig(3, main = "pchisq(x >= 1497,*, ncp=) BUG (no longer!)")$old.par -> op curve(pchisq(x, df=1, ncp= 1), from=1,to=1e4, log='x', main="ncp = 1") curve(pchisq(x, df=1, ncp=300), from=1,to=1e4, log='x', main="ncp = 300") curve(pchisq(x, df=1, ncp=0), from=1,to=1e4, log='x', main="ncp = 0") par(op) ## still (2004-01 ... 2014-01 !!) true: ## now looking closer, at the upper tail {algorithm is not good on log scale!} curve(pchisq(x, df=1, ncp=0, lower=FALSE,log=TRUE), from=1,to=1e4, log='x', main="ncp = 0")# -> goes down to -700 or so ## ncp > 80 is different .. xp <- curve(pchisq(x, df=1, ncp=300, lower=FALSE,log=TRUE), xaxt="n", from=1, to=1e4, log='x', main="ncp = 300, log=TRUE")# only down to ~ -25 eaxis(1, sub10=2) ## .. hmm, really bad... ## .. the reason is that we compute on (lower=TRUE, log=FALSE) scale and only then transform: ## --> gives warnings! (and 'verbose' output): curve(pchisq(x, df=1, ncp=300, lower=FALSE), from=100,to=2000, log='xy', main="ncp = 300, upper tail", axes=FALSE) -> pxy summary(warnings()) eaxis(1, sub10=3); eaxis(2) curve(pnchisqV(x, df=1, ncp=300, errmax = 4e-16, lower=FALSE, verbose=1),# ,log=TRUE), add = TRUE, col=2); mtext("ncp = 300 -- pnchisqV() pure R", col=2) showProc.time() ## also seems to hang (or take much too long?) on Winbuilder [32 bit *and* 64 bit ] if(.Platform$OS.type == "unix" && !noLdbl) withAutoprint({ pncRC <- pnchisqRC(pxy$x, df=1, ncp=300, lower=FALSE, verbose=1) all.equal(pxy$y, pncRC, tol = 0)# "often" TRUE, depends on exact R version, etc stopifnot( all.equal(pxy$y, pncRC, tol = if(noLdbl) 5e-14 else 0)# noLdbl: seen 1.129e-14 ) summary(warnings()) showProc.time() })# ---------------------only if(.. "unix" ....)---------------------------- ## Really large 'df' and 'x' -- "case I": ## no problem anymore: f <- c(.9,.999,.99999, 1, 1.00001,1.111, 1.1) x <- 1e18*f stopifnot(exprs = { all.equal(pchisq(x, df=1e18, ncp=1) -> p, c(0,0,0, 1/2, 1,1,1)) all.equal(p, pnchisqRC(x, df=1e18, ncp=1), tol = 4e-16) # see 0 }) ## case I -- underflow protection large x --> 1 tt <- 10^-(6:12) stopifnot(!is.unsorted(xm <- 1e18*(1 + c(-tt, 0, rev(tt))))) (pn <- pnchisqV (xm, df=1e18, ncp=1)) #-> 0...1 is correct pp <- pchisq (xm, df=1e18, ncp=1) ## if(.Platform$OS.type == "unix") withAutoprint({ #------------------- pp. <- pnchisqRC(xm, df=1e18, ncp=1, verbose=1) ## Pnchisq_R(x, f, th, ... lower.tail=1, log.p=0, cut_ncp=80, it_simple=110, ## errmax=1e-12, reltol=1.77636e-15, epsS=8.88178e-16, itrmax=1000000, verbose=1) ## --> n:= max(length(.),..) = 15 ## but then does *NOT* terminate in time on Winbuilder all(pp == pp.)# >>> TRUE : *RC is also C code, perfect all.equal(pp, pn, tol = 0) # see 1.6e-16 (even on Win 32b) if(doExtras && !noLdbl) # who knows .. stopifnot(pp == pp.) stopifnot(exprs = { all.equal(pp, pp., tol = 1e-15) # see 0 all.equal(pp, pn, tol = 1e-15) # see 1.6e-16 }) })## only if( .. unix .. ) ## (also "problematic" with Wienergerm: s=0) showProc.time()#----------------- ### largish f and ncp {no problem visible here, but see below} curve(pchisq(x, df= 10000, ncp=300), from=1e-3, to=20000, log='x', main="ncp = 300") curve(pchisq(x, df= 10000, ncp=300), from=2000, to=20000, log='x', main="ncp = 300") x <- seq(3000,11000, length=201) if(FALSE)## to see the break: x <- seq(5500,11000, length=201) px <- pchisq(x, df=10000, ncp=300, log=TRUE) plot(x, px, type='l', col=2, lwd=2, main="pchisq(*, df=10000,ncp=300, log=TRUE))") head(px, 20) ## for the (5500,11000): -Inf -Inf ..... -Inf -650.2379 -640.3743 -630.6.. showProc.time() pnchisq(5500, df= 10000, ncp=300, verbose=2) ## lt= -744.4 => exp(lt) underflow protection ln(x)= 8.612503 ## _OL_: n= 1 n= 1, fx.2n = 4502 > 0 ## BREAK n= 1 ; bound= 0 <= errmax rel.err= NaN <= reltol ## New: allow large ncp {this now (that we use reltol !) *TAKES TIME*} curve(pnchisqV(x, df= 10000, ncp= 4000), from=12000, to= 16000, main="df = 10000, ncp = 4000") curve(400*dchisq(x, df= 10000, ncp= 4000), add = TRUE, col = "green") ## R's pchisq() looks fine now: curve(pchisq(x, df= 10000, ncp= 4000), add=TRUE, col=adjustcolor("blue",1/4), lwd=4) curve(pnchisqV(x, df= 16e3, ncp= 16e3), from=30e3, to= 35e3, main="df = 16e3, ncp = 16e3") curve(400*dchisq(x, df= 16e3, ncp= 16e3), add = TRUE, col = adjustcolor("green4",.5), lwd=3) showProc.time() if(doExtras) withAutoprint({ ## current R version: -- (also relatively slow, but much faster!) *and* non-convergence warning rr <- curve(pchisq(x, df= 10000, ncp=3e5), type = "o", cex = 1/2, n = 49) summary(warnings()) ## all non-convergences (but *looks* ok) ## non-convergence in 100000 iterations : -- S.L.O.W. (~ 1 min. on 2014 lynne) rV <- curve(pnchisqV(x, df= 10000, ncp=3e5), n = 49, from=3.13e5, to= 3.14e5, main="ncp = 3e5 - pnchisqV()") summary(warnings()) identical(rr$x, rV$x) showProc.time()#----------------- }) ### NOTA BENE: dnchisq() has a similar sum and the following i.Max imaxD <- function(x,df,lambda) pmax(0, ceiling(0.25* (-(2+df) +sqrt((df-2)^2 + 4*lambda* x)))) ## A few test comparisons with ssR4[,,] : ## ==> imaxD() is too small (has correct order of magnitude, unless for ## p(x) > 1-1e-4 ### Investigate pnchisq() algorithm sum(v[i] * t[i]) : ### ### rr(i) : is it increasing / decreasing / maximal / .. ? plRpois(lambda = 1000) mult.fig(8, main = r_pois_expr, tit.wid = 6)$old.par -> op for(la in c(5,20,50,100,200,500,1500,5000)) plRpois(lambda = la, do.main=FALSE) par(op) ## -> Wow! ## 1) Always decreasing; ## 2) r(i,lambda) < lambda / i and ~=~ for i < lambda) ## How well approximation from *ratio* point of view: ## Interesting i < ~ lambda (ok, clearly improvable): ## ----------- i >= lambda : need better approx plotrq <- function(lambda, i = 1:(3*round(lambda))) { lab <- as.expression(substitute(lambda==la, list(la=lambda))) plot(i, r_pois(i,lam=lambda) / (lambda/i), type='b', cex=.4, col=2) abline(v=lambda, col='gray', lty=2) axis(3, at = lambda, label = lab) } plotrq(10) mult.fig(4, main = " r_pois(i) / (lambda/i)")$old.par -> op plotrq(20) plotrq(50) plotrq(100) plotrq(500) par(op) showProc.time() ## How well approximation from difference point of view: ## Interesting i < ~ lambda (ok, clearly improvable): ## ----------- i >= lambda : need better approx plotDr <- function(lambda, i = 1:(4*round(lambda))) { lab <- as.expression(substitute(lambda==la, list(la=lambda))) plot(i, (lambda/i) - r_pois(i,lam=lambda), type='b', cex=.4, col=2) abline(v=lambda, col='gray', lty=2) axis(3, at = lambda, label = lab) } mult.fig(9, main = quote(plotDr(lambda)))$old.par -> op plotDr( 4) plotDr(10) plotDr(20) plotDr(50) plotDr(100) plotDr(200) plotDr(500) plotDr(1000) plotDr(2000)## oops: problem (no longer !) par(op) ### Now back to the original problem: ### Using ss() terms and see where they are maximal, etc. (pR <- pnchisq (1.2,df=1,ncp=3, verbose=FALSE))# iter = 12, now 13 all.equal(c(pR), pnchisq_ss(1.2,df=1,ncp=3), tol=0)# 2.19e-12, now 9.61e-14, ## 6.4e-16 on Win 32b ! (pR <- pnchisq (1.2,df=1,ncp=30, verbose=FALSE))# iter = 12, now 16 all.equal(pR, pnchisq_ss(1.2,df=1,ncp=30), tol= 2e-13) ## was 2.616 e-8 (thanks to 'reltol'!) (pR <- pnchisq (1.2,df=1,ncp=30, verbose=FALSE,reltol=3e-16))# 19 it. all.equal(pR, pnchisq_ss(1.2,df=1,ncp=30), tol= 2e-16) str(sss <- ss(1.2,df=1,ncp=30))# s[1:161], max = 3 plot(sss$s, type="h", col=2) ## i: for log-ax bug (warning) {still not nice looking range(which(i <- 1e8*sss$s > .Machine$double.xmin * sss$s[sss$max]))# 1:160 plot(sss$s[i], type="b", col=2, log = 'xy') ## Which indices are relevant for the sum? range(which(ii <- sss$s > .Machine$double.eps * sss$s[sss$max])) ## 1:19 -- as we had 19 iterations above! stopifnot(sum(sss$s[ii]) == sum(sss$s)) ## Left tail probabilities are now much better: (pR <- pnchisq (1.2, df=100, ncp=30, verbose=FALSE,reltol=3e-16)) ## 5.384254 e-83 , 12 iter. pchisq (1.2, df=100, ncp=30) ## 4.461632 e-83, which is identical to pnchisq (1.2, df=100, ncp=30, reltol=1)# =^= "old" C code (1 iter!) ### What about large df and x -- #{terms} ? str(sss <- ss(100,100, 1e-3))# 1 469 pnchisq_ss(100,100,1e-3) pchisq (100,100,1e-3) ((Ss <- sum(sss$s)) - sum(rev(sss$s)))/Ss # -1.9286 e-16 ss2(100,100, 1e-3) ##- i1 i2 iN1 iN2 max ##- 1 469 1 71 1 Ns <- 2^c(-200, -15, -5, -1:15, 30, 100) names(Ns) <- paste("2",formatC(log(Ns,2)),sep="^") tab.ss1c <- t(sapply(Ns, function(u) ss2(100,100,ncp=u, i.max=10000))) tab.ss1c ##-> i2 is "constant": 469 (or 468);problems from ncp >= 2^12 = 4096 tab.ss10 <- t(sapply(Ns, function(u) ss2(10,10, ncp=u, i.max=10000))) cbind(tab.ss10, tab.ss1c) ## only from ncp ~= 2^6, things change (t1k.1c <- t(sapply(Ns, function(u) ss2(1000,100, ncp=u)))) ## even with i.max = 100000, thing "go wrong" from ncp = 2^11 str(s.. <- ss(1000,10, 2048)) s..$s[1:400] #-- sequence diverges to +Inf -- can we better re-scale? ## (yes, we can: pnchisq() does so -- leave this for now) ## Now vary x from small to large: (t.x.1k <- t(sapply(Ns, function(x) ss2(x,df=100, ncp=100))))# probl. from 2^11 str(s <- ss(1000,100, ncp=3000)) str(s <- ss(100,100, ncp=1000)) ## $ s : num [1:469] 1.00e+00 4.91e+02 1.18e+05 1.86e+07 2.16e+09 ... ## $ i1 : int 1 ## $ max: int 136 s$s[s$max] # 1.4 e-130 : down scaled ss2(100,100, ncp=1000) ##- i1 i2 iN1 iN2 max ##- 1 468 68 216 136 ss2(100,100, ncp=2000) ##- i1 i2 iN1 iN2 max ##- 95 326 118 296 201 ## But: all( ss(100,100,5000)$s == 0) # TRUE -- no longer ## because lu needs much better scaling "-lambda" is too much ## "Fixed" : table( ss(100,100, ncp=5000)$s ) ## only values in {0, Inf}, mostly Inf ! ##==> give up for these high ncp for the moment! showProc.time() ## Instead use C - code which parallels pnchisq()'s in C: ## dyn.load("/u/maechler/R/MM/NUMERICS/dpq-functions/pnchisq-it.so") str(pit <- pnchisqIT(3,2,4))# 1:21 stopifnot(with(pit, all.equal(sum(terms), prob))) ## this is a bit funny: all 0 terms stopifnot(with(pit2 <- pnchisqIT(100,100,5000), all.equal(sum(terms), prob))) all(pit2$terms == 0)# TRUE stopifnot(with(pit3 <- pnchisqIT(100,100,5), all.equal(sum(terms), prob, tol=1e-15))) str(pit3)# 1:69 str(pit <- pnchisqIT(10000,10000,5))# 567 str(pit <- pnchisqIT(10004,10000,5))# 569 terms str(pit <- pnchisqIT(10010,10000,5))# 572 terms (i0=0) str(pit <- pnchisqIT(12000,10000,5))# 1612 terms (i0=0) ## hmm, quite interesting: plot(pit$terms,type='l') par(new=TRUE) plot(pit$terms,type='l', log = 'y',yaxt='n',col=2)# looks like -x^2 ! axis(4, col.axis=2) summary(pit$terms) # max = 0.005150251 -- the first few 100 are unneeded str(pit <- pnchisqIT(12000,10000,5000))# 2442 terms, i0=877; max.term= 2.5e-60 ! ## many unneeded terms! str(pit <- pnchisqIT(15000,10000,5000))# 3189 terms, i0=877; max=.003287 str(pit <- pnchisqIT(20000,10000,5))# -> 1 immediately {0 terms} ## Now use ss2.() for the "term statistics": ss2.(15000,10000, 5000)# 3189 terms, i0=877 ss2.(1, 10000, 5000)# immediate 0 -> 1 term only ss2.(1e5, 10000, 5000)# immediate 1 -> "0 terms" ## Takes (already quite a bit) time: rs <- sapply(14990:15010, function(x) ss2.(x,10000,5000)) t(rs) ## as expected : n.terms gives proper 'right border': stopifnot(rs["i2",] == rs["nT", ], rs["i2",] == rs["iN2", ]) ## swap df & ncp ===> the *double* number of terms! x <- c(1000,10000,14000,14500, 14990,15000,15010,15500, 15600, 15670, 15675) rs <- sapply(x, function(x) ss2.(x,5000,10000)) cbind(t(rs), prob = sapply(x, function(x) pnchisqIT(x, 5000,10000)$prob)) ## i0 nT i1 i2 iN1 iN2 iMax prob ## 1 1 NA NA 1 1 1 0.000000e+00 ## 2596 4298 2596 4298 3499 4298 3907 1.697298e-137 ## 2596 5290 2880 5290 4358 5290 4810 2.625129e-06 ## 2596 5469 2953 5469 4459 5469 4921 1.212588e-02 ## 2596 5684 3031 5684 4560 5684 5044 4.838253e-01 ## 2596 5689 3032 5689 4562 5689 5047 5.016652e-01 ## 2596 5694 3034 5694 4564 5694 5049 5.194951e-01 ## 2596 5942 3116 5942 4675 5942 5251 9.867808e-01 ## 2596 5995 3134 5995 4701 5995 5301 9.960697e-01 ## 2596 6031 3146 6031 4719 6031 5336 9.984810e-01 ## 2596 6034 3147 6034 4721 6034 5338 9.985853e-01 << was "1.00" ! ## but we cannot go too far : str(pp <- pnchisqIT(20000, 5000,10000)) ## $ prob : num 1 ## $ i0 : int 3995 ## $ n.terms: int 8283 ## $ terms : num [1:8283] 0 0 0 0 0 0 0 0 0 0 ... 1-pp$pr; 1-sum(sort(pp$terms)) ## -8.999912e-12 ## -8.999024e-12 ## i.e. P > 1 is certainly not okay anymore! ## E = df + ncp = 15000 ## sigma = sqrt( 2*(df + 2*ncp)) = 223.607 5000 / sqrt( 2*(5000 + 2*10000)) ## = 22.36 i.e. 20'000 is 22.3 sigma out of E[] showProc.time() set.seed(635) ## 1st simulation --------------------------------------------------------------- ## Collect data: --- this took about 2 hours on "nb-mm" (P III, 700 MHz) ## takes 4.5 sec on ada-17 (or alo nL <- 20 nF <- 16 nX <- length(pX <- c(0.01, (1:9)/10, 0.99, 0.9999)) sfil1 <- file.path(sdir, "tests_chisq-nonc-ssR.rds") if(!doExtras && file.exists(sfil1)) { ssR_l <- readRDS_(sfil1) str(ssR_l) ## dfs : num [1:16] 15.9 20.7 21 29.5 47.8 ... ## lam : num [1:20] 5.74 8.26 8.34 8.64 10.12 ... ## ssR : num [1:4, 1:20, 1:16, 1:12] 8.08 1 25 2 9.24 ... loadList(ssR_l) } else { ## do run the simulation always if(doExtras) : lam <- sort(rlnorm(nL, 3, 1)) dfs <- sort(rlnorm(nF, 4, 1)) ssR <- array(NA, dim=c(1+3, nL,nF,nX), dimnames = list(c("x","iN1","iN2", "iMax"), lam = formatC(lam,digits=5), df = formatC(dfs,digits=5), x = formatC(pX, width=1))) for(iL in 1:nL) { lm <- lam[iL] cat("lam=", formatC(lm),":") for(iF in 1:nF) { f <- dfs[iF] x <- qchisq(pX, df=f, ncp=lm) for(iX in 1:nX) ssR[, iL,iF,iX] <- c(x[iX], ss2.(x[iX], df=f, ncp=lm)[5:7]) cat(".") }; cat("\n") } save2RDS(list_(lam, dfs, ssR), file = sfil1) } # {run simulation} x. <- ssR["x" ,,,] iM <- ssR["iMax",,,] iN1 <- ssR["iN1" ,,,] iN2 <- ssR["iN2" ,,,] iS <- iN2 - iN1 # the "index Spread": how many terms need to be summed ## Visualize iM(x) for some (df,lambda): Sel <- function(i) round(quantile(i, names=FALSE)) mult.fig(mfrow=c(5,5), ## << since length(quantile()) == 5 marP = c(-1,-1,0,0))$old.par -> op for(iL in Sel(1:nL)) { lm <- lam[iL] for(iF in Sel(1:nF)) { f <- dfs[iF] plot(x.[iL,iF,], iM[iL,iF,], type = 'o', xlab = "x", ylab = "iMax", main=paste("df=",formatC(f),", lam=",formatC(lm))) } } par(op) ##--> 1st order, qualitatively "same" function (x) ## Same plot, but using "Wienergerm's" sW(x,df,lam) instead of x ## source("/u/maechler/R/MM/NUMERICS/dpq-functions/wienergerm_nchisq-fn.R") mult.fig(mfrow=c(5,5), ## << since length(quantile()) == 5 marP = c(-1,-1,0,0), main = "iMax vs. sW()")$old.par -> op for(iL in Sel(1:nL)) { lm <- lam[iL] for(iF in Sel(1:nF)) { f <- dfs[iF] plot(sW(x.[iL,iF,], df=f, ncp=lm)$s, iM[iL,iF,], type = 'o', xlab = "sW(x,...)", ylab = "iMax", main=paste("df=",formatC(f),", lam=",formatC(lm))) } } par(op) ## very similar showProc.time() ###--- visualize 'iN1' = the first index *needed* : ## Idea: use "current" algorithm (simply summing from i=1...) when ok : fCont <- function(ix = 6, kind = c("iN1","iN2","iMax", "d.i"), pch=1, cex=.5, sdat = ssR, lam = as.numeric(dimnames(sdat)[["lam"]]), dfs = as.numeric(dimnames(sdat)[["df"]]), ## what a horrible hack .. pX = as.numeric(dimnames(sdat)[["x"]]) ) { ## Purpose: ## ---------------------------------------------------------------------- ## Arguments: ## ---------------------------------------------------------------------- ## Author: Martin Maechler, Date: 18 Feb 2004, 18:31 kind <- match.arg(kind) datname <- deparse(substitute(sdat)) nx <- dim(sdat)[4] if(ix < 1 || ix > nx) stop("'ix' must be in 1:",nx) if(kind == "d.i") { m <- sdat["iN2" ,,, ix] - sdat["iN1" ,,, ix] mtxt <- paste("Spread ", datname,"[ 'iN2 - iN1' ,,, ", ix,"]", sep='') } else { m <- sdat[kind ,,, ix] mtxt <- paste(datname,"[",kind," ,,, ", ix,"]", sep='') } mtxt <- paste(mtxt, " (i.e., x=", formatC(100*pX[ix],digits=10,wid=1),"%-perc.)",sep='') if(kind == "iN1") { filled.contour(lam, dfs, m, levels=c(1,2,3,5,10,15,20,30)-.01, col = c("light gray", terrain.colors(6)), plot.axes={ points(expand.grid(lam,dfs),cex=cex,pch=pch) axis(1); axis(2)}, plot.title={ title(mtxt, xlab="ncp (lambda)",ylab="df (nu)")} ) } else { # automatic levels and color filled.contour(lam, dfs, m, color.palette = terrain.colors, plot.axes={ points(expand.grid(lam,dfs),cex=cex,pch=pch) axis(1); axis(2)}, plot.title={ title(mtxt, xlab="ncp (lambda)",ylab="df (nu)")} ) } } par(.O.P.)# just in case for filled.contour() to work fCont()# iN1, 6 fCont(1)# iN1, 11 fCont(kind="d", pch='.')## "spread" fCont(kind="iN2", cex=.25)## practically == "spread" (i.e. iN1 ~= 0 !) fCont(12, kind="iN2") showProc.time() ## ## 4th simulation: ---------------------------------------------------- ## ## pX: at these quantiles(), compute pnchisq() nx <- length(pX <- c(0.01, (1:9)/10, 0.99, 0.9999, 1-1e-6, 1-1e-9)) sfil4 <- file.path(sdir, "tests_chisq-nonc-ssR4.rds") if(!doExtras && file.exists(sfil4)) { ssR_l <- readRDS_(sfil4) str(ssR_l) loadList(ssR_l) } else { set.seed(41) ## smallish (lam,df) -- use several x --- ideally all these give "iN1"=1 ss <- c(.1, .2, .5, 1:5, 7,10,15,c(2:6,8,10,12,15,20,30,50)*10) nl <- length(lam4 <- ss) nd <- length(dfs4 <- ss) ## I use "round" numbers, and can pack all the info into ssR4's dimnames: ssR4 <- array(NA, dim=c(1+ 3, nl,nd,nx), dimnames = list(c("x", "iN1","iN2", "iMax"), lam = formatC(lam4), df = formatC(dfs4), pr.x= ifelse(pX < 0.999, formatC(pX), paste("1-", formatC(1-pX),sep='')))) for(il in 1:nl) { lm <- lam4[il] for(id in 1:nd) { f <- dfs4[id] ## use more than one x per (df,lam) pair: x <- qchisq(pX, df=f, ncp=lm) for(ix in 1:nx) ssR4[, il,id,ix] <- c(x[ix], ss2.(x[ix], df=f, ncp=lm)[5:7]) cat(".") }; cat(il,"") }; cat("\n") save2RDS(list_(lam4, dfs4, ssR4), file=sfil4) } ## Compute the 'iMax' value that corresponds to x = E[X] = df + ncp ## from that, with the above 'general f(x)', we can hopefully ## get a good estimated iMax(x, df, ncp) ... str(iM.E <- iM[,,1]) for(iL in 1:nL) { lm <- lam[iL] for(iF in 1:nF) { f <- dfs[iF] E <- f + lm # = E[X] iM.E[iL,iF] <- approx(x.[iL,iF,], iM[iL,iF,], xout = E)$y } } str(iM.E) persp(lam, dfs, iM.E)# pretty linear.. ##-> filled.contour(lam, dfs, iM.E, xlab="ncp lambda", ylab="df nu") ## indeed: very linear in "lambda / ncp", a bit less linear in "df: dat1 <- cbind(expand.grid(lam,dfs), c(iM.E)) names(dat1) <- c("ncp", "df", "iM") summary(lm1 <- lm(iM ~ ncp + df, data = dat1)) ## R^2 = 99.79% with(dat1, p.res.2x(x=ncp,y=df, residuals(lm1))) # pretty structured.. image (x=lam,y= dfs, array(residuals(lm1),dim=dim(iM.E))) summary(lm2 <- lm(iM ~ ncp * df, data = dat1)) ## R^2 = 99.84% summary(lm3 <- lm(iM ~ poly(ncp, df, degree=2), data = dat1)) ## R^2 = 99.96% ## Using sqrt() -- not really better (and predict/fitted is failing !? FIXME !! summary(lm3s <- lm(sqrt(iM) ~ poly(ncp, df, degree=2), data = dat1)) with(dat1, p.res.2x(x=ncp,y=df, residuals(lm3))) # pretty structured.. image (x=lam,y= dfs, array(residuals(lm3),dim=dim(iM.E))) library(mgcv) summary(gam1. <- gam(iM ~ s(ncp) + s(df), data = dat1)) ## df = 1 + 4 + 4; R^2 = 0.999; s^ = 0.226 summary(gam1.2 <- gam(iM ~ ncp + s(df), data = dat1)) ## df = 2 + 4 ; R^2 = 0.999; s^ = 0.280 plot(gam1.2) #pretty square summary(gam2. <- gam(iM ~ s(ncp, df), data = dat1)) ## 100% explained, ## but equiv.deg.freedom = 1+26.3 if(FALSE)## FAILS summary(gam2.10 <- gam(iM ~ s(ncp, df, 10), data = dat1)) ## df = 1 + 9 ; R^2 = 1.000; s^ = 0.107 with(dat1, p.res.2x(x=ncp,y=df, residuals(gam2.))) ## ok, but plot(fitted(gam2.), fitted(lm3)) ; abline(0,1,col=3) ## suggesting the quadratic fit being quite ok. ## OTOH, I do need an explicit formula; ## a simple 2d- regression spline instead of quadratic? showProc.time() ###---- 2nd "simulation": -- only go for one x = E[] nSim <- if(doExtras) 5000 else 500 sfil2 <- file.path(sdir, "tests_chisq-nonc-ssR2.rds") if(!doExtras && file.exists(sfil2)) { ssR2 <- readRDS_(sfil2) } else { set.seed(2) lam <- rlnorm(nSim, 5, 2) dfs <- rlnorm(nSim, 6, 1.5) ssR2 <- rbind(rbind(lam,dfs), matrix(NA, 3, nSim)) for(i in 1:nSim) { lm <- lam[i] f <- dfs[i] x <- f + lm ssR2[3:5, i] <- ss2.(x, df=f, ncp=lm)[5:7] cat("."); if(i %% 100 == 0) cat("\n",i) } dimnames(ssR2) <- list(c("lam","df","iN1","iN2", "iMax"),NULL) ssR2 <- t(ssR2) ssR2 <- ssR2[sort.list(ssR2[,"lam"]),] save2RDS(ssR2, file=sfil2) } ## 3rd simulation: --- this takes a little while (1 min ?) sfil3 <- file.path(sdir, "tests_chisq-nonc-ssR3.rds") if(!doExtras && file.exists(sfil3)) { ssR3_l <- readRDS_(sfil3) str(ssR3_l) loadList(ssR3_l) } else { set.seed(31) ss <- c(20,50, c(1:8,10,13,18,25)*100, 3000) nl <- length(lam3 <- c(ss, seq(5000, 100000, length=1+ 4*19))) nd <- length(dfs3 <- c(ss, seq(5000, 100000, length=1+ 2*19))) ssR3 <- array(NA, dim=c(3, nl,nd), dimnames = list(c("iN1","iN2", "iMax"), formatC(lam3), formatC(dfs3))) for(il in 1:nl) { lm <- lam3[il] for(id in 1:nd) { f <- dfs3[id] x <- f + lm ssR3[, il,id] <- ss2.(x, df=f, ncp=lm)[5:7] }; cat(il,"") }; cat("\n") save2RDS(list_(lam3, dfs3, ssR3), file=sfil3) } showProc.time() ### now change these "3" values into a data.frame as this one: dsR2 <- as.data.frame(ssR2) ## d3 <- dim(ssR3) ss3 <- matrix(ssR3, d3[1], prod(d3[2:3])) dsR3 <- cbind(expand.grid(lam = lam3, df = dfs3), t(ss3)) colnames(dsR3)[3:5] <- dimnames(ssR3)[[1]] dsR <- rbind(dsR2, dsR3) rownames(dsR) <- paste(1:nrow(dsR)) ## visualize "design space": iOutl <- c(648, 1841, 5000) plot(df ~ lam, data =dsR, log = "xy") points(dsR[iOutl,], col=2:4, cex=2) dsR[iOutl,] plot(df ~ lam, data =dsR, log = "") points(dsR[iOutl,], col=2:4, cex=2) points(dsR[4997:5000,], col="gray", cex=2, pch=3) with(dsR, which(lam > 100000))# 4998, 4999, 5000 ## -- leave these away for the regression (high leverage!) str(dsR. <- subset(dsR, lam <= 100000)) summary(l.1 <- lm(iMax ~ lam, data=dsR.))## R^2(adj) = 1; s^ = 23.73 summary(l.2 <- lm(iMax ~ lam + df, data=dsR.))## s^ = 12.49 summary(l.3 <- lm(iMax ~ lam * df, data=dsR.))## s^ = 12.22 summary(l.4 <- lm(iMax ~ lam * df+ I(lam^2), data=dsR.))## s^ = 8.251 summary(l.5 <- lm(iMax ~ lam * df+ I(lam^2)+I(df^2), data=dsR.))#s^= 7.812 ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 9.437e+00 1.105e-01 85.42 < 2e-16 ## lam 5.022e-01 1.034e-05 48573.09 < 2e-16 ## df 9.861e-04 1.095e-05 90.03 < 2e-16 ## I(lam^2) -1.333e-08 1.219e-10 -109.39 < 2e-16 ## I(df^2) -4.202e-09 1.257e-10 -33.44 < 2e-16 ## lam:df 6.433e-10 9.405e-11 6.84 8.42e-12 summary(l.6 <- update(l.5, . ~ . + log(lam)))## R^2(adj) = 1; s^ = 6.135 (7 p) ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -7.477e+00 2.348e-01 -31.85 <2e-16 ## lam 5.016e-01 1.179e-05 42528.90 <2e-16 ## df 8.951e-04 8.681e-06 103.11 <2e-16 ## I(lam^2) -8.579e-09 1.137e-10 -75.48 <2e-16 ## I(df^2) -3.804e-09 9.881e-11 -38.50 <2e-16 ## log(lam) 3.391e+00 4.373e-02 77.54 <2e-16 ## lam:df 1.546e-09 7.477e-11 20.68 <2e-16 summary(l.7 <- update(l.5, . ~ . + log(lam)*log(df)))## s^ = 5.389 (9 p) ##- Estimate Std. Error t value Pr(>|t|) ##- (Intercept) 6.315e+00 5.773e-01 10.938 < 2e-16 *** ##- lam 5.014e-01 1.066e-05 47049.885 < 2e-16 *** ##- df 4.992e-04 1.204e-05 41.449 < 2e-16 *** ##- I(lam^2) -7.278e-09 1.034e-10 -70.356 < 2e-16 *** ##- I(df^2) -4.404e-10 1.131e-10 -3.893 9.98e-05 *** ##- log(lam) 4.249e-02 8.164e-02 0.520 0.6028 ##- log(df) -2.062e+00 8.728e-02 -23.628 < 2e-16 *** ##- lam:df -1.775e-10 7.703e-11 -2.305 0.0212 * ##- log(lam):log(df) 5.348e-01 1.151e-02 46.476 < 2e-16 *** drop1(l.7)# cannot drop non-sign. log(lam) summary(l.8 <- update(l.7, . ~ . - log(lam)))## s^ = 5.389 (8 p) summary(l.9 <- update(l.8, . ~ . - lam:df)) ## s^ = 5.391 (7 p) summary(l.10<- update(l.9, . ~ . - I(df^2))) ## s^ = 5.396 (6 p) summary(l.11<- update(l.10, . ~ . - I(lam^2)))## s^ = 5.396 (6 p) summary(l.12<- update(l.11, . ~ . + log(lam)))## s^ = 5.396 (6 p) ## dsR3 instead of dsR. : summary(l.13<- lm(iMax ~ lam*df+ log(lam)*log(df), data=dsR3)) summary(l.14<- update(l.13, . ~ . - lam:df)) summary(l.15<- update(l.14, . ~ . - 1)) with(dsR3, p.res.2x(lam, df, residuals(l.15))) ## ok; it's really the low 'lam' (and the low 'df') if(doExtras) { ##-> try more ----------------------------------- iMaxR3 <- matrix(dsR3$iMax, length(lam3)) persp (log10(lam3), log10(dfs3), iMaxR3/ (lam3/2)) persp (log10(lam3), log10(dfs3), log10(iMaxR3/ (lam3/2))) filled.contour(log10(lam3), log10(dfs3), iMaxR3/ (lam3/2), plot.title = { contour(log10(lam3), log10(dfs3), iMaxR3/ (lam3/2),add=TRUE) title(main = "iMax / (lam/2) ['dsR3' data]", xlab = "log10(lam)", ylab = "log10(df)") ##points(expand.grid(log10(lam3), log10(dfs3)), pch='.') with(dsR3, points(log10(lam), log10(df), pch='.')) }) ## almost same with log(iMax / (lam/2)): filled.contour(log10(lam3), log10(dfs3), log10(iMaxR3/ (lam3/2)), plot.title = { contour(log10(lam3), log10(dfs3), log10(iMaxR3/ (lam3/2)),add=TRUE) title(main = "log10(iMax / (lam/2)) ['dsR3' data]", xlab = "log10(lam)", ylab = "log10(df)") ##points(expand.grid(log10(lam3), log10(dfs3)), pch='.') with(dsR3, points(log10(lam), log10(df), pch='.')) }) } #-- only if(doExtras) ------------------------------------- showProc.time() if(doExtras && require("interp")) { ## same with both data --> need interp ! : s/dsR3/dsR./ : ds1 <- subset(dsR., lam >= 1) sr.I <- with(ds1, interp(log(lam), log(df), iMax)) sr.Iq <- with(ds1, interp(log(lam), log(df), iMax / (lam/2))) filled.contour(sr.I, xlab="ln(lam)", ylab="ln(df)", main="iMax") filled.contour(sr.Iq, plot.title = { contour(sr.Iq, add=TRUE) title(main = "iMax / (lam/2) ['ds1' data]", xlab = "ln(lam)", ylab = "ln(df)") with(ds1, points(log(lam), log(df), pch='.')) }) print(summary(l.q1 <- lm(iMax / (lam/2) ~ log(lam) * log(df), data= ds1))) TA.plot(l.q1) plot(resid(l.q1) ~ lam, data=ds1, pch ='.', log = 'x') print(summary(l.q2 <- update(l.q1, .~. + I(1/lam)))) print(summary(l.q3 <- update(l.q2, .~. + I(log(lam)^2) + I(1/log(lam))))) plot(resid(l.q3) ~ lam, data=ds1, pch ='.', log = 'x') ### --> Aha! 1/lam seems the best term !! with(dsR., p.res.2x(lam, df, residuals(l.q3), scol=2:3)) ## -- maybe try lam^(-a) ? showProc.time() # 0.9 } # only if(.X.) ## This is impressive (but shows "non-fit") with(dsR., p.res.2x(log10(lam), log10(df), residuals(l.5), scol=2:3)) with(dsR., p.res.2x(lam, df, residuals(l.5))) with(dsR., p.res.2x(lam, df, residuals(l.10), scol=2:3)) with(dsR., p.res.2x(log(lam), log(df), residuals(l.6), scol=2:3)) plot(l.5, ask=FALSE) ## 2-3 outliers: ## 5000 : maximal lambda ## 1841 : maximal df if(doExtras) withAutoprint({ # ----------------------------------- ### Yet another idea: summary(lq2 <- lm(I(iMax/lam) ~ (lam+ log(lam) + df + log(df))^2, data=dsR.)) lq2s <- step(lq2) summary(lq2s, corr=TRUE, symb=TRUE) ## shows the complete non-sense (large lambda values fit very badly with(dsR., n.plot(fitted(lq2s)*lam, iMax)) if(doExtras)## GAM -- needs tons of cpu + memory: summary(g.5 <- gam(iMax ~ s(lam) + s(df) + s(lam,df), data=dsR.))#s^=4.489 ## -> (too) many deg.freedom s }) #-------------------------------------------- showProc.time() if(doExtras && require("interp")) { ## visualize more: ---------- sr2I <- with(dsR., interp(log(lam), log(df), iMax)) filled.contour(sr2I, xlab="ln(lam)", ylab="ln(df)", main="iMax") sr2I <- with(dsR., interp(log(lam), log(df), log(iMax))) sr2I <- with(dsR., interp(log(lam), log(df), log(iN2))) filled.contour(sr2I, xlab="ln(lam)", ylab="ln(df)", main="ln(iN2)") ## linear for large lambda persp(sr2I,xlab="log(lam)", ylab="log(df)",zlab="log(iMax)",ticktype="detailed") ## restrict on those where iN1 > 1 str(dsR2r <- subset(dsR2, iN1 > 1))# only 3383 instead of 5000 sr2I <- with(dsR., interp((lam), (df), (iN2))) filled.contour(sr2I, xlab="(lam)", ylab="(df)", main="(iN2)") ## Looks *very* nicely linear persp(sr2I,xlab="(lam)", ylab="(df)",zlab="(iMax)",ticktype="detailed") ## sr2I <- with(dsR., interp((lam), (df), iMax/lam)) filled.contour(sr2I, xlab="(lam)", ylab="(df)", main="iMax/lam") persp(sr2I,xlab="(lam)", ylab="(df)",zlab="iMax/lam",ticktype="detailed") ## restrict on those where iN1 > 1 str(dsR.r <- subset(dsR., iN1 > 1)) sr2rI <- with(dsR.r, interp((lam), (df), (iN2))) persp(sr2rI,xlab="(lam)",ylab="(df)",zlab="(iN2)",ticktype="detailed") sr2rI <- with(dsR.r, interp(log(lam), log(df), log(iN2))) persp(sr2rI,xlab="log(lam)",ylab="log(df)",zlab="log(iN2)",ticktype="detailed") sr2rI <- with(dsR.r, interp(log(lam), log(df), log(iMax))) persp(sr2rI,xlab="log(lam)",ylab="log(df)",zlab="log(iMax)",ticktype="detailed") } else { cat("Define dsR2r : \n") ; str(dsR2r <- subset(dsR., iN1 > 1)) cat("and also dsR.r : \n") ; str(dsR.r <- subset(dsR., iN1 > 1)) } showProc.time() summary(ll.2 <- lm(log(iMax) ~ log(lam) + log(df), data=dsR.r)) summary(ll.3 <- lm(log(iMax) ~ log(lam) * log(df), data=dsR.r)) summary(ll.4 <- lm(log(iMax) ~ log(lam) * log(df) + I(log(lam)^2), data=dsR.r)) plot(residuals(ll.2) ~ dsR.r$lam, log='x') plot(residuals(ll.3) ~ dsR.r$lam, log='x') plot(residuals(ll.4) ~ dsR.r$lam, log='x') plot(dsR.r$iMax - exp(fitted(ll.4)) ~ dsR.r$lam, log='x') if(doExtras) { summary(gl.4 <- gam(log(iMax) ~ s(lam) + log(df), data=dsR.r))## very bad ## but this is very good: summary(gl.4 <- gam(log(iMax) ~ s(log(lam)) + log(df), data = dsR.r)) plot(gl.4, ask=FALSE) if(FALSE) { # fails now summary(gl.5 <- gam(log(iMax) ~ s(log(lam),4) + log(df)*log(lam), data=dsR.r)) plot(gl.5, ask=FALSE) } } # only if(.X.) ##-> try summary(ll.5 <- lm(log(iMax) ~ (log(lam) + poly(pmax(0,log(lam)-5),2))*log(df), data=dsR.r)) summary(dsR.r$iMax - exp(fitted(ll.5))) # one very negative plot(ll.5, ask=FALSE) showProc.time() ## First try to find formula for maximal number of terms needed summary(l.N2.1 <- lm(iN2 ~ lam*df , data=dsR2r)) summary(l.N2.2 <- lm(iN2 ~ lam*df + I(lam^2)+I(df^2), data=dsR2r),corr=TRUE) summary(l.N2.3 <- lm(iN2 ~ lam*df + I(lam^2)+I(lam^3), data=dsR2r),corr=TRUE) summary(l.N2.4 <- lm(iN2 ~ lam*df + I(lam^2)+I(lam^3)+I(df^2), data=dsR2r),corr=TRUE) plot(residuals(l.N2.2) ~ dsR2r$lam) dsR2r$lamP20k <- pmax(0, dsR2r$lam - 20000) dsR2r$lamM20k <- pmin(0, dsR2r$lam - 20000) summary(l.N2.1P <- lm(iN2 ~ lam+df + I(lamP20k ^2)+I(lamM20k ^2) , data=dsR2r)) ## This is to save typing: all variables 'log'ged: dLr <- as.data.frame(lapply(dsR2r, log)) summary(l.N2. <- lm(iN2 ~ lam*df + I(lam^2)+I(df^2), data=dLr),corr=TRUE) summary(l.N2 <- lm(iN2 ~ lam*df + I(lam^2)*I(df^2), data=dLr)) ## back transformed residuals: r <- dsR2r$iN2 - exp(fitted(l.N2)) n.plot(dsR2r$lam, r, log='x'); abline(h=0,lty=3) ## extreme (negative): 3383, also 3381, 3382 n.plot(dsR2r$lam, r, log='x', ylim=500*c(-1,1)); abline(h=0,lty=3) showProc.time() ###---- older tests --------------------------------------- if(dev.interactive(TRUE)) { cat("sys.function(): "); str(sys.function()) } if(.do.ask <- dev.interactive() && is.null(sys.function())) par(ask=TRUE); cat(".do.ask : ", .do.ask, "\n") mult.fig(2)$old.par -> op ## large NC -- still (2018-08) very expensive!! ## 10^(3:10) is (still!) much too expensive, 10^8 alone costs 31.8 sec ! NC2 <- if(doExtras) 10^(2:7) else 10^(2:6) for(NC in NC2) { cat("ncp=",NC,":\n") curve(dchisq(x, df=1, ncp=NC), from=NC/10,to=NC*100, log='x', main=paste("Density ncp =",NC)) curve(pchisq(x, df=1, ncp=NC), from=NC/10,to=NC*100, log='x', main=paste("CDF ncp =",NC)) showProc.time() } par(op) if(.do.ask) par(ask=FALSE) ## NOTE: I found that the median = qchisq(1/2, *) is "mostly" ## ---- in (m-1, m) where m = mean = nu + lambda = df + ncp ## One exception (I've carefully searched for) where ## median < m-1 <==> m - median > 1 : (maximal ~ 1.6): df <- .0005; curve((df+x) - qchisq(1/2, df, ncp=x), 1, 2, col=2) df <- .005 ; curve((df+x) - qchisq(1/2, df, ncp=x), add=TRUE, col=3) df <- .05 ; curve((df+x) - qchisq(1/2, df, ncp=x), add=TRUE, col=4) ## These are all quite close (and quite CPU-costly !!) : df <- 1e-4; curve((df+x) - qchisq(1/2, df, ncp=x), 0.5, 40, col=2) abline(h=1, col='gray') df <- 1e-4; curve((df+x) - qchisq(1/2, df, ncp=x), 1.2, 2, col=2) if(doExtras) { df <- 2e-4; curve((df+x) - qchisq(1/2, df, ncp=x), add=TRUE,col=3) df <- 4e-4; curve((df+x) - qchisq(1/2, df, ncp=x), add=TRUE,col=4) df <- 8e-4; curve((df+x) - qchisq(1/2, df, ncp=x), add=TRUE,col=5) } df <-16e-4; curve((df+x) - qchisq(1/2, df, ncp=x), add=TRUE,col=6) showProc.time() # 0.41 {2019-09} df <- 1e-100; curve((df+x) - qchisq(1/2, df, ncp=x), 1.38, 1.40, col=2) dfs <- 2^(if(doExtras) seq(-300,-2, length=21) else -2) # as they are costly for(df in dfs) { curve((df+x) - qchisq(1/2, df, ncp=x), add=TRUE,col=3) cat(formatC(df)," ") }; cat("\n") showProc.time() if(doExtras) { ## -- show irregularity more closely --------------- df <- 1e-300; curve((df+x) - qchisq(1/2, df, ncp=x), 1.38628, 1.38630, col=2) for(df in dfs) { curve((df+x) - qchisq(1/2, df, ncp=x), add=TRUE,col=3) cat(formatC(df)," ") }; cat("\n") curve((0+x) - qchisq(1/2, df=0, ncp=x), 1.386294, 1.386295, col=2) showProc.time() # doExtras: ~ 0.6 {2019-09} } # only if(.X.) ------------------------------------------------- ff <- function(ncp) (0+ncp)-qchisq(1/2, df=0, ncp=ncp) str(oo <- optimize(ff, c(1.3,1.4), maximum=TRUE, tol=1e-15),digits=16) ## $ maximum : num 1.386294373354218 ## $ objective: num 1.386294355703814 qchisq(1/2, df=0, ncp = 1.386294373354218)## = 1.765e-8 ## This is the case df -> 0 where the distribution has a point mass at 0 ! x <- c(0,1e-8,1e-5,1e-3,seq(0.01, 3, length = if(doExtras) 1001 else 125)) plot (x, pchisq(x, df=1e-4, ncp = 1.4), type ='l', col=2, ylim = 0:1) lines(x, pchisq(x, df=1e-4, ncp = 1.6), col=1) lines(x, pchisq(x, df=1e-4, ncp = 1.2), col=3) lines(x, pchisq(x, df=1e-4, ncp = 1.1), col=4) lines(x, pchisq(x, df=1e-4, ncp = 0.1), col=5) plot (x, pchisq(x, df=1e-2, ncp = 1.4), type ='l', col=2, ylim = 0:1) lines(x, pchisq(x, df=1e-2, ncp = 1.6), col=1) lines(x, pchisq(x, df=1e-2, ncp = 1.2), col=3) lines(x, pchisq(x, df=1e-2, ncp = 1.1), col=4) lines(x, pchisq(x, df=1e-2, ncp = 0.1), col=5) plot (x, pchisq(x, df=0.1, ncp = 1.4), type ='l', col=2, ylim = 0:1) lines(x, pchisq(x, df=0.1, ncp = 1.6), col=1) lines(x, pchisq(x, df=0.1, ncp = 1.2), col=3) lines(x, pchisq(x, df=0.1, ncp = 1.1), col=4) lines(x, pchisq(x, df=0.1, ncp = 0.1), col=5) showProc.time() ## MM: from something *not* put into ~/R/D/r-devel/R/tests/d-p-q-r-tests.R ## 1) PR#14216 (r51179, 2010-02-25) x <- 80:200; lp <- pchisq(x, 4, ncp=1, log.p=TRUE) stopifnot(is.finite(lp), all.equal(lp[1],-2.5519291e-14), lp < 0, ## underflowed to 0, in R <= 2.10.x -.4635 < (dll <- diff(log(-lp))), dll < -.4415, max(abs(diff(dll))) < 3.75e-4) ## showProc.time() ###---- again {may repeating from above --- "sorry I can't check that now"} : x <- 250; pchisq(x, 1.01, ncp = 80, log=TRUE) ## R-2.10.0 and earlier --> quite a noisy picture ! -- ## note that log P > 0 <==> P > 1 --- of course nonsense! xy <- curve(pchisq(x, 1.01, ncp = 80, log=TRUE), 250, 600, n=1001, ylim = c(-1,1)*8e-14); abline(h=0, lty=3) ## still noisy, and still slightly (5e-14) above 0 ! ## bigger picture: for theta = ncp < 80, it works by using the other tail plot(p1 <- -pchisq(1:400, 1.01, ncp = 80*(1-.Machine$double.eps), log=TRUE), log="y", type="o", pch=".") ## However, for ncp >= 80 -- the P = sum_i term_i computation lines(p2 <- -pchisq(1:400, 1.01, ncp = 80, log=TRUE), col=adjustcolor(2,0.5),lwd=2)## underflow to 0 .. not good ___ FIXME ___ ## here, "the other tail", log1p(- pnchisq(....., !lower_tail) ) does not work !! summary(1 - p1/p2) ## From: Prof Brian Ripley ## To: Martin Maechler ## cc: R-core@r-project.org ## Subject: Re: p[n]chisq() warnings [was "d-p-q-r tests failures"] ## Date: Tue, 24 Nov 2009 15:14:16 +0000 (GMT) ## Martin, ## I mistyped 'mendacious": the error message lies. I think it is ## generally wrongly worded, and should be something like 'full precision ## may not have been achieved'. ## Here is why I added the warning I added: ## MM: added 'lower.tail' 'log.p' and 'x' arguments ##__ FIXME 2019-09: Compare with my new pnchis1sq() function !! t1 <- function(p, ncp, lower.tail = FALSE, log.p = FALSE, x = qchisq(p, df = 1, ncp, lower.tail=lower.tail, log.p=log.p)) { ## X ~ chi^2(df=1, ncp = L^2) <==> X = Z^2 where Z ~ N(L, 1) ## -------------------------- ------- ----------- ## P[ X > x ] = P[ |Z| > sqrt(x) ] = P[Z > sx] + P[Z < -sx] , sx := sqrt(x) p1 <- pchisq(x, df = 1, ncp = ncp, lower.tail=lower.tail, log.p=log.p) sx <- sqrt(x) sL <- sqrt(ncp) p2 <- if(!log.p) { if(lower.tail) pnorm(sx, sL) - pnorm(sx, -sL, lower.tail=FALSE) else ## lower.tail = FALSE pnorm(sx, sL, lower.tail=FALSE) + pnorm(sx, -sL, lower.tail=FALSE) } else { ## log scale -- MM: use logspace.add() and *.sub() for the above if(lower.tail) logspace.sub(pnorm(sx, sL, log.p=TRUE), pnorm(sx, -sL, log.p=TRUE, lower.tail=FALSE)) else ## lower.tail = FALSE logspace.add(pnorm(sx, sL, log.p=TRUE, lower.tail=FALSE), pnorm(sx, -sL, log.p=TRUE, lower.tail=FALSE)) } c(if(!missing(p)) c(p=p), x=x, pnchisq=p1, p.true=p2, relErr=abs(p1-p2)/p2) } t1(1e-12, 85) ## [1] 1.000000e-12 2.642192e+02 1.003355e-12 9.943394e-13 9.066654e-03 ## Warning messages: ## 1: In qchisq(p, df = 1, ncp, lower.tail = FALSE) : ## full precision was not achieved in 'qnchisq' ## 2: In pchisq(x, df = 1, ncp = ncp, lower.tail = FALSE) : ## full precision was not achieved in 'pnchisq' ## so the answer is out by about 1%. And t1(1e-14, 100) ## [1] 1.000000e-14 5.208816e+02 0.000000e+00 6.107835e-38 1.000000e+00 ## has lost all precision. [MM: still true, Aug.2019] ## This sort of thing (because we compute 1 - answer) does not happen in ## the other tail. So unless someone can show examples of precision ## loss, I believe that the warning in that tail should not be there (and ## would need conditional wording). ## MM: As soon as you go to log scale, completely inaccurate values around 1 ## are completely unuseful, too: t1(x = 500, ncp=80, lower.tail=TRUE, log.p=TRUE) ## x pnchisq p.true relErr ## 5.000000e+02 3.552714e-15 -2.423206e-41 -1.466121e+26 ## Brian showProc.time() ## On Tue, 24 Nov 2009, Martin Maechler wrote: ## >>>>>> Prof Brian Ripley ## >>>>>> on Tue, 24 Nov 2009 12:22:48 +0000 (GMT) writes: ## > ## > > On Tue, 24 Nov 2009, Peter Dalgaard wrote: ## > >> Prof Brian Ripley wrote: ## > >>> I only picked up that change this morning, and am seeing the failures ## > >>> too. I don't see why the warning is being given (isn't the test that ## > >>> full accuracy was achieved?), so updating the .save file does not look ## > >>> to me to be the solution. ## > >> ## > >> Hmm, I get the warnings, but it doesn't seem to stop the build for me ## > >> and make check is failing at a different spoot: ## > ## > > At an *earlier* spot in the check. ## > ## > [.............] ## > ## > >> The relevant diff is ## > >> --- src/nmath/pnchisq.c (revision 50552) ## > >> +++ src/nmath/pnchisq.c (revision 50553) ## > >> @@ -40,9 +40,15 @@ ## > >> if (df < 0. || ncp < 0.) ML_ERR_return_NAN; ## > >> ## > >> ans = pnchisq_raw(x, df, ncp, 1e-12, 8*DBL_EPSILON, 1000000, ## > >> lower_tail); ## > >> - if(!lower_tail && ncp >= 80) { ## > >> - if(ans < 1e-10) ML_ERROR(ME_PRECISION, "pnchisq"); ## > >> - ans = fmax2(ans, 0.0); /* Precaution PR#7099 */ ## > >> + if(ncp >= 80) { ## > >> + if(lower_tail) { ## > >> + if(ans >= 1-1e-10) ML_ERROR(ME_PRECISION, "pnchisq"); ## > >> + ans = fmin2(ans, 1.0); /* e.g., pchisq(555, 1.01, ncp = 80) */ ## > >> + } ## > >> + else { /* !lower_tail */ ## > >> + if(ans < 1e-10) ML_ERROR(ME_PRECISION, "pnchisq"); ## > >> + ans = fmax2(ans, 0.0); /* Precaution PR#7099 */ ## > >> + } ## > >> } ## > >> return log_p ? log(ans) : ans; ## > >> } ## > >> ## > >> which warns if you get too close to 1.0 and truncates to 1.0 if you ## > >> overshoot. All the cases tested should give the result 1.0 and thus ## > >> trigger the warning. Are you implying that this is unintentional? ## > ## > > I don't know nor can I guess Martin's intention, but I am confident ## > > the warning is medacious here. ## > ## > Hmm, I don't understand "medacious". ## > ## > But anyway: The new code of `` pmin(ans, 1) '' is indeed necessary; ## > previously, pchisq(x, df, ncp) *would* return values larger ## > than one, ... somewhat embarrassingly. ## > ## > If you study a bit further, you'll find that currently, ## > pnchisq() for ncp > 80 use identical code for TRUE or FALSE ## > lower_case; and the old code ## > had a check for ncp >= 80 and accuracy warnings for "upper tail" ## > and P < 1e-10. ## > The logical extension is to give the same accuracy warning for ## > "lower tail" and P > 1 - 1e-10. ## > Of course, this is all just a workaround for the fact that our ## > current algorithm(s) are not good enough currently in those ## > extreme tail cases, and indeed, ## > I've start investigating better algorithms quite a while in the ## > past. ## > The creating of package 'Rmpfr' (for multi-precision arithmetic) ## > has BTW been influenced by my desire to get tools for exploring ## > such extreme tail misbehavior of current R algorithms. ## > ## > Here an example from one of my R scripts on this : ## > ## > ## R-2.10.0 and earlier --> quite a noisy picture ! -- ## > ## note that log P > 0 <==> P > 1 --- of course nonsense! ## > curve(pchisq(x, 1.01, ncp = 80, log=TRUE), 250, 600, ## > n=1001, ylim = c(-1,1)*5e-14) ## > ## > So, again: these warning are a *substitute* and "cheap ## > workaround" for now, but not ## > only for the new case that I've added, but also already for the ## > case Brian had added earlier: ## > if(!lower_tail && ncp >= 80) { ## > if(ans < 1e-10) ML_ERROR(ME_PRECISION, "pnchisq"); ## > ans = fmax2(ans, 0.0); /* Precaution PR#7099 */ ## > } ## > ## > Martin ## > ## > ## > > The save file in R-devel (which also gives the warnings) was updated ## > > in r50552. ## > ## > >> ## > >> -p ## > >> ## > >>> Brian ## > >>> ## > >>> On Tue, 24 Nov 2009, Kurt Hornik wrote: ## > >>> ## > >>>>>>>>> Kurt Hornik writes: ## > >>>> ## > >>>>> I can no longer build r-patched. Most likely from ## > >>>>> r50553 | maechler | 2009-11-23 23:50:13 +0100 (Mon, 23 Nov 2009) | 1 ## > >>>>> line ## > >>>> ## > >>>>> ported r50552 [pchisq(*, ncp > 80) from trunk ## > >>>> ## > >>>>> I now get ## > >>>> ## >>>>>> ##-- non central Chi^2 : ## >>>>>> xB <- c(2000,1e6,1e50,Inf) ## >>>>>> for(df in c(0.1, 1, 10)) ## > >>>>> + for(ncp in c(0, 1, 10, 100)) stopifnot(pchisq(xB, df=df, ## > >>>>> ncp=ncp) == 1) ## > >>>>> There were 12 warnings (use warnings() to see them) ## > >>>> ## > >>>>> and as the last line does not show in the .save file, make fails. ## > >>>> ## > >>>>> Is anyone seeing this too? ## > >>>> ## > >>>> This persists for both GCC 4.3 and 4.4 for me, the warnings coming from ## > >>>> ## > R> xB <- c(2000,1e6,1e50,Inf) ## > R> for(df in c(0.1, 1, 10)) ## > >>>> + for(ncp in c(0, 1, 10, 100)) stopifnot(pchisq(xB, df=df, ncp=ncp) == 1) ## > >>>> ## > >>>> -k ## > >>>> ## > >>>>> Best ## > >>>>> -k ## > >>>> ## Reproducing an "inverse" of Table 29.2 (p.464) of Johnson, Kotz, Balakr.(1995) Vol.2 nu. <- c(2,4,7) lam <- c(1,4,16,25) (pnl <- expand.grid(ncp=lam, df=nu., KEEP.OUT.ATTRS=FALSE)[,2:1]) nl <- with(pnl, df+ncp) pars <- rbind(cbind(pnl, q = nl/2), cbind(pnl, q = nl ), cbind(pnl, q = nl*2)) pch <- with(pars, pchisq(q=q, df=df, ncp=ncp)) pchAA <- with(pars, pnchisqAbdelAty (q=q, df=df, ncp=ncp)) pchSa <- with(pars, pnchisqSankaran_d(q=q, df=df, ncp=ncp)) cbind(pars, R = pch, AA = pchAA, San = pchSa) showProc.time() ### Reproducing part of 'Table 29.2' (p.464) of Johnson, Kotz, Balakr.(1995) Vol.2 ### ### as in ../man/pnchisqAppr.Rd -- do run over *all* current pnchisq*() approximations! pkg <- "package:DPQ" ## NB: use versions of the functions that return numeric *vector* (of correct length) : pnchNms <- c(paste0("pchisq", c("", "V", "W", "W.R")), # + R's own, but *not* "W." ! ls(pkg, pattern = "^pnchisq")) ## drop some : pnchNms <- pnchNms[!grepl("Terms$", pnchNms)] pnchNms <- pnchNms[is.na(match(pnchNms, c("pnchisqIT", paste0("pnchisqT93.", c("a", "b")))))] pnchF <- sapply(pnchNms, get, envir = as.environment(pkg)) ## shorten the longer names for nicer tables : n.n <- nchar(pnNms <- setNames(,pnchNms)) L8 <- n.n > 8 pnNms[n.n > 10] <- sub("pnchisq", "pn", pnNms[n.n > 10]) pnNms[n.n > 8] <- sub("pnchisq","pnch", pnNms[n.n > 8]) names(pnchF) <- pnNms <- unname(abbreviate(pnNms, 8)) str(pnchF) op <- options(warn = 1, digits = 5, width = 110)# warn: immediate .. ## TODO --- want also "x ~ ncp" and or "df ~ ncp" ## TODO: write a *function* that computes all this *and* stores in nicely dimnamed array qq <- c(.001, .005, .01, .05, (1:9)/10, 2^seq(0, 10, by= 0.5)) nncp <- c(0, 1/8, 1/2, 1, 2, 5, 20, 100, 200, 1000) ddf <- c(2:4, 7, 20, 50, 100, 1000, 1e4, 1e10) # 1e300: fails for pchisqW.R() << FIXME AR <- array(NA_real_, # [ncp,df, q] dim=c(length(nncp), length(ddf), length(qq), length(pnchF)), dimnames= list(ncp = formatC(nncp, width=1), df = formatC( ddf, width=1), q = formatC( qq, width=1), Fn = pnNms)) CT <- AR[,,1,1] # (w/ desired dim and dimnames) sfil5 <- file.path(sdir, "tests_chisq-nonc-ssAp.rds") if(!doExtras && file.exists(sfil5)) { ssAp_l <- readRDS_(sfil5) str(ssAp_l) AR <- ssAp_l$AR ## loadList(ssAp_l)# attach it } else { ## do run the simulation always if(doExtras) : for(incp in seq_along(nncp)) { cat("\n~~~~~~~~~~~~~\nncp: ", ncp <- nncp[incp], "\n=======\n") pnF <- if(ncp == 0) pnchF[!grepl("T93", pnNms)] else pnchF # Temme('93) : ncp > 0 for(idf in seq_along(ddf)) { df <- ddf[idf] ct <- system.time( r <- vapply(pnF, function(F) Vectorize(F, names(formals(F))[[1]])(qq, df=df, ncp=ncp), qq) )[["user.self"]] AR[incp, idf, , names(pnF)] <- r CT[incp, idf] <- ct } } showProc.time() cat("User times in milli-sec.:\n") print(CT * 1000) save2RDS(list_(pnchNms, pnchF, qq, nncp, ddf, AR, CT), file=sfil5) } ## else *do* run .. ## Rather, show absolute and also relative "errors" .. stopifnot(dimnames(AR)[[4]][1] == "pchisq") ## Absolute "error" , i.e., delta to R's pchisq() which is AR[,,,1] : dAR <- AR[,,,-1] - c(AR[,,,1]) ## if we were perfect, using same compilers as R etc, then these deltas are all = 0 : summary(dAR[,,,"pnchRC"]) if(beStrict) stopifnot(dAR[,,,"pnchRC"] == 0) apply(dAR, 4, summary) # quite some NA's for some Fn's aperm(apply(dAR, 3:4, function(x) {u <- x[!is.na(x)]; c(min=min(u), max=max(u))}), c(2,3,1L)) ftable(apply(dAR[c("1", "2", "5"), c(1,3,4),,], c(1,2,4), median)) ftable(apply(dAR[c("1", "2", "5"), c(1,3,4),,], c(1,2,4), function(x) max(abs(x[!is.na(x)])))) ## (Note: for large df=1e10, p*() == 0 everywhere for the small 'q' we have ## ---- FIXME: qq: should be "realistic" in mu +/- 5 sd options(warn = 0, digits = 7)# partial revert ###----------- Much testing pnchisqRC() notably during my experiments (ptol <- if(noLdbl) 8e-13 else if(doExtras) 3e-16 else if(is32) 1e-14 else 1e-15) set.seed(123) for(df in c(.1, .2, 1, 2, 5, 10, 20, 50, 1000, if(doExtras) c(1e10, 1e200))) { ## BUG! (df=1e200, ncp=1000) takes forever cat("\n============\ndf = ",df,"\n~~~~~~~~~\n") for(ncp in c(0, .1, .2, 1, 2, 5, 10, 20, 50, if(df < 1e10) c(1000, 1e4) else c(100,200))) { # BUG: large ncp take forever cat("\nncp = ",ncp,": qq = ") qch <- if(ncp+df < 1000) qchisq((1:15)/16, df=df, ncp=ncp) else { qq <- qnchisqPatnaik((1:15)/16, df=df, ncp=ncp) if(qq[1] < qq[length(qq)]) qq else { # they all coincide ==> take mu +/- (1:3) SD mu <- df+ncp sigma <- sqrt(2*(df + 2*ncp)) mu + seq(-4,4, length.out=15)*sigma } } str(qq <- c(0, qch, Inf), digits=4) for(lower.tail in c(TRUE, FALSE)) { cat(sprintf("lower.tail = %-5s : ", lower.tail)) for(log.p in c(FALSE, TRUE)) { cat("log.p=", log.p, "") AE <- all.equal( pchisq (qq, df=df, ncp=ncp, lower.tail=lower.tail, log.p=log.p) , pnchisqRC(qq, df=df, ncp=ncp, lower.tail=lower.tail, log.p=log.p) , tol = ptol) if(is.character(AE)) { dd <- sub(".*:", "", AE) cat("pchisq() differ by", dd,"(dd/ptol = ",as.numeric(dd)/ptol," < 100 ?)\n") ## fails for first df=0.1, ncp=10000 on Windows 64-bit (winbuilder 2019-10) ## ... also on 'florence' (32-bit Fedora 28, 2019-10) if(myPlatf || ncp <= 1000) stopifnot(as.numeric(dd) < 100 * ptol) else if ( !(as.numeric(dd) < 100 * ptol)) cat("not stop()ing even though dd < 100 * ptol\n") } }; cat("\n") } }# for(ncp .) showProc.time() }# for(df .) summary(warnings()) ### L. Emphasis on very large df + ncp =============================================== ##=== L 1. very large df, ncp/df << 1 ==================== mkPnch <- function(k, df, ncp, lower.tail=TRUE, log.p=FALSE, twoExp = -53) { stopifnot(is.numeric(k), length(k) > 1, k == (k <- as.integer(k)), is.numeric(df), length(df) == 1L, length(ncp) == 1L, ncp >= 0, if((k. <- min(k)) >= 0) TRUE else twoExp < -log2(-k.)) ones <- 1 + k * 2^twoExp qs <- ones*(df+ncp) # df+ncp = E[chi'^2] xtra <- if(df == 1) { ## use exact formula {incl Taylor for small x = q} cbind(pnchi1sq = pnchi1sq(qs, ncp, lower.tail=lower.tail, log.p=log.p)) } else if(df == 3) { cbind(pnchi3sq = pnchi3sq(qs, ncp, lower.tail=lower.tail, log.p=log.p)) } else { array(NA_real_, c(length(qs), 0L)) } cbind(pchisq = pchisq (qs,df,ncp, lower.tail=lower.tail, log.p=log.p), xtra, pcAbdelA = pnchisqAbdelAty (qs,df,ncp, lower.tail=lower.tail, log.p=log.p), pcBolKuz = pnchisqBolKuz (qs,df,ncp, lower.tail=lower.tail, log.p=log.p), pcPatnaik= pnchisqPatnaik (qs,df,ncp, lower.tail=lower.tail, log.p=log.p), pcPearson= pnchisqPearson (qs,df,ncp, lower.tail=lower.tail, log.p=log.p), pcSanka_d=pnchisqSankaran_d(qs,df,ncp, lower.tail=lower.tail, log.p=log.p)) } if(doExtras) { ## really slow because pchisq() is slow! df <- 1e30; ncp <- 99 ks <- c(-40, -20, -15, -10, -6:6, 10, 15, 20, 40) twoExp <- -25 ## === system.time(suppressWarnings( Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) )) ## 6.7 sec print(Pn., digits=3) # ">>" annotated: shows bug ! ## pchisq pcAbdelA pcBolKuz pcPatnaik pcPearson pcSanka_d ## 0.00e+00 0.0 0.0 0.0 0.0 0.0 ## 0.00e+00 0.0 0.0 0.0 0.0 0.0 ## 0.00e+00 0.0 0.0 0.0 0.0 0.0 ## 0.00e+00 0.0 0.0 0.0 0.0 0.0 ## 0.00e+00 0.0 0.0 0.0 0.0 0.0 ## >> 1.00e+00 0.0 0.0 0.0 0.0 0.0 ## >> 1.00e+00 0.0 0.0 0.0 0.0 0.0 ## >> 1.00e+00 0.0 0.0 0.0 0.0 0.0 ## >> 1.00e+00 0.0 0.0 0.0 0.0 0.0 ## >> 1.00e+00 0.0 0.0 0.0 0.0 0.0 ## 4.17e-09 0.5 0.5 0.5 0.5 0.5 ## 1.00e+00 1.0 1.0 1.0 1.0 1.0 ## 1.00e+00 1.0 1.0 1.0 1.0 1.0 ## 1.00e+00 1.0 1.0 1.0 1.0 1.0 ## 1.00e+00 1.0 1.0 1.0 1.0 1.0 ## 1.00e+00 1.0 1.0 1.0 1.0 1.0 ## ..... .... tExp <- substitute(`pchisq*`(list(q == mu(1 + k * 2^TWOEXP), df==DF, ncp==NCP)), list(TWOEXP = twoExp, DF=df, NCP=ncp)) matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main = tExp) kk <- seq(min(ks), max(ks), length.out=401) qs <- (1 + kk * 2^twoExp)*df fq <- dchisq(qs, df, ncp) par(new=TRUE) plot(kk, fq, type="l", col=adjustcolor(2, 1/3), lwd=3, lty=3, axes=FALSE, ann=FALSE) showProc.time() }## only if(doExtras) ## BUG (FIXME) e.g. here: pchisq (0.99999989*(df+ncp), df, ncp) ## --> Warning ... : not converged in 1000'000 iter pnchisqRC(0.99999989*(df+ncp), df, ncp, verbose = 1) # The same with more output! ERROR on Winbuilder 64bit ## both give '1', but really should give 0 showProc.time() ### Much less extreme df ==> pchisq() *is* fast too df <- 1e9 ncp <- 99 ks <- c(-40, -20, -15, -10, -6:6, 10, 15, 20, 40) ks <- if(doExtras) -200:200 else seq(-200, 200, by=5)# more here for plot twoExp <- -18 # well chosen for this "range" and behavior of pchisq() ## === Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) showProc.time() tit <- substitute(`pchisq*`(list(q == mu(1 + k * 2^TWOEXP), df==DF, ncp==NCP)), list(TWOEXP = twoExp, DF=df, NCP=ncp)) matplot(ks, Pn., type = "l", xlab = quote(k), ylab = "pchisq*(q, ..)", main = tit) cat("'Error' (difference to pchisq(*)):\n") dP <- Pn.[,-1] - Pn.[,1] print(cbind(ks, q=(1+ks*2^twoExp)*df, pchisq=Pn.[,1], dP), digits = 4) matplot(ks, dP, type = "l", xlab = quote(k), main = paste("Difference", tit," - pchisq(..)")) abline(h=0, lty=3) ## the difference to all 5 approx. is almost *IDENTICAL* ## ==> are the approximations all more accurate than pchisq() here ? options(op) # revert summary(warnings()) ## Look at "smoothness" via first differences: matplot(ks[-1], diff(Pn.), type = "l", xlab = quote(k)) abline(h=0, lty=3) nk <- length(ks) matplot(ks[-c(1,nk)], diff(Pn., differences= 2), type = "l", xlab = quote(k)) abline(h=0, lty=3) matplot(ks[-c(1:2,nk)], diff(Pn., differences= 3), type = "l", xlab = quote(k)) abline(h=0, lty=3) ##---> start seeing noise ## Here, we see a LOT of noise : only in the first curve == pchisq() ! matplot(ks[-c(1:2,nk-1L,nk)], diff(Pn., differences= 4), type = "l", xlab = quote(k)) abline(h=0, lty=3) ## And zooming in to "zero" : log |.| scale matplot(ks[-c(1:2,nk-1L,nk)], abs(diff(Pn., differences= 4)), log = "y", type = "l", xlab = quote(k)) showProc.time() ##=== L 2. large ncp, df/ncp << 1 ==================== pchiTit <- function(twoE, df, ncp, fN = "pchisq*", xtr = "", ncN = "ncp") { substitute(FUN(list(q == mu(1 + k* 2^TWO_E)*XTR, mu == {nu+lambda == DF+NCP}, df == DF, NCN == NCP)), list(FUN = fN, TWO_E = twoE, XTR=xtr, DF=df, NCN=ncN, NCP=ncp)) ## sprintf("%s(q = μ(1 + k* 2^%g)%s, µ = ν+λ = df+ncp; df=%g, %s=%g)", ## fN, twoE, xtr, df, ncN, ncp) } pchiTit.1 <- function(twoE, df, ncp) pchiTit(twoE, df, ncp, fN = "pchi*", xtr = " - pchi.1") pchiTit.n.d <- function(twoE, df, ncp) pchiTit(twoE, df, ncp=ncp/df, ncN="ncp/df") ks <- c(-40, -20, -15, -10, -6:6, 10, 15, 20, 40) if(okR_Lrg) { ## R <= 3.6.1 gave an (almost ?) infinite loop here !! ncp <- 1e20; df <- 99 twoExp <- -35 ## === system.time(suppressWarnings( Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) )) ## ~ 0.3 print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" ## pchisq pcAbdelA pcBolKuz pcPatnaik pcPearson pcSanka_d ## 0 2.93e-09 1 2.93e-09 2.93e-09 2.93e-09 ## 0 1.80e-03 1 1.80e-03 1.80e-03 1.80e-03 ## 0 1.45e-02 1 1.45e-02 1.45e-02 1.45e-02 ## 0 7.28e-02 1 7.28e-02 7.28e-02 7.28e-02 ## 0 1.91e-01 1 1.91e-01 1.91e-01 1.91e-01 ## 0 2.33e-01 1 2.33e-01 2.33e-01 2.33e-01 ## 0 2.80e-01 1 2.80e-01 2.80e-01 2.80e-01 ## 0 3.31e-01 1 3.31e-01 3.31e-01 3.31e-01 ## 0 3.86e-01 1 3.86e-01 3.86e-01 3.86e-01 ## 0 4.42e-01 1 4.42e-01 4.42e-01 4.42e-01 ## 0 5.00e-01 1 5.00e-01 5.00e-01 5.00e-01 ## 0 5.58e-01 1 5.58e-01 5.58e-01 5.58e-01 ## 0 6.14e-01 1 6.14e-01 6.14e-01 6.14e-01 ## 0 6.69e-01 1 6.69e-01 6.69e-01 6.69e-01 ## 0 7.20e-01 1 7.20e-01 7.20e-01 7.20e-01 ## 0 7.67e-01 1 7.67e-01 7.67e-01 7.67e-01 ## 0 8.09e-01 1 8.09e-01 8.09e-01 8.09e-01 ## 0 9.27e-01 1 9.27e-01 9.27e-01 9.27e-01 ## 0 9.85e-01 1 9.85e-01 9.85e-01 9.85e-01 ## 0 9.98e-01 1 9.98e-01 9.98e-01 9.98e-01 ## 1 1.00e+00 1 1.00e+00 1.00e+00 1.00e+00 matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main= pchiTit(twoExp,df,ncp)) if(doExtras) { ## less extreme, same phenomenom: ncp <- 1e9; df <- 99 ; twoExp <- -17 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main= pchiTit(twoExp,df,ncp)) ## less extreme, same phenomenom: still ncp <- 1e7; df <- 99 ; twoExp <- -14 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main= pchiTit(twoExp,df,ncp)) ## even less extreme, same phenomenom: still ncp <- 4e6; df <- 99 ; twoExp <- -13 ## --- Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # pchisq = Pearson = Sanka_d ~~ AbdelA, Patnaik matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main= pchiTit(twoExp,df,ncp)) } # only if(.X.) } # only if(okR..) showProc.time() ## Here pchisq() seems "perfect" ncp <- 1e6; df <- 99 ; twoExp <- -12 ## --- Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # pchisq = Pearson = Sanka_d ~~ AbdelA, Patnaik matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main= pchiTit(twoExp,df,ncp)) kk <- seq(min(ks), max(ks), length.out=401) qs <- (1 + kk * 2^twoExp)*(df+ncp) fq <- dchisq(qs, df, ncp) par(new=TRUE) plot(kk, fq, type="l", col=adjustcolor(2, 1/3), lwd=3, lty=3, axes=FALSE, ann=FALSE) showProc.time() ##=== df=1 and df=3 ==== here we have exact formula ! ================== ## 10'000 : "too small" for asymptotic approx: ncp <- 10000; df <- 1 ; twoExp <- -7 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main = pchiTit.1(twoExp,df,ncp)) ## now absolute *difference* to true pchi1sq() : print(Pn.[,-c(2,4)]-Pn.[,2], digits=3) matplot(ks, Pn.[,-c(2,4)]-Pn.[,2], type = "b", xlab = quote(k), ylab = "pchisq*(q, ..) - pchi1sq()", main = pchiTit.1(twoExp,df,ncp)) legend("topright", colnames(Pn.)[-c(2,4)], lty=1:5, col=1:5, bty="n") j.dr <- 2:5 # drop matplot(ks, Pn.[,-j.dr]-Pn.[,2], type = "b", xlab = quote(k), ylab = "pchisq*(q, ..) - pchi1sq()", main = pchiTit.1(twoExp,df,ncp)) legend("topright", colnames(Pn.)[-j.dr], lty=1:5, col=1:5, bty="n") j.d2 <- 2:6 # drop matplot(ks, Pn.[,-j.d2]-Pn.[,2], type = "b", xlab = quote(k), ylab = "pchisq*(q, ..) - pchi1sq()", main = pchiTit.1(twoExp,df,ncp)) legend("topright", colnames(Pn.)[-j.d2], lty=1:5, col=1:5, bty="n") showProc.time()# -- ## 1e6 : ??? ncp <- 1e6; df <- 1 ; twoExp <- -13 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) showProc.time() print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main = pchiTit.1(twoExp,df,ncp)) ## now absolute *difference* to true pchi1sq() : print(Pn.[,-c(2,4)]-Pn.[,2], digits=3) matplot(ks, Pn.[,-c(2,4)]-Pn.[,2], type = "b", xlab = quote(k), ylab = "pchisq*(q, ..) - pchi1sq()", main = pchiTit.1(twoExp,df,ncp)) legend("topright", colnames(Pn.)[-c(2,4)], lty=1:5, col=1:5, bty="n") j.dr <- 2:5 # drop matplot(ks, Pn.[,-j.dr]-Pn.[,2], type = "b", xlab = quote(k), ylab = "pchisq*(q, ..) - pchi1sq()", main = pchiTit.1(twoExp,df,ncp)) legend("topright", colnames(Pn.)[-j.dr], lty=1:5, col=1:5, bty="n") ## Convincing: here, Sanka_d is *better* than R's pchisq() ! j.d2 <- 2:6 # drop matplot(ks, Pn.[,-j.d2]-Pn.[,2], type = "b", xlab = quote(k), ylab = "pchisq*(q, ..) - pchi1sq()", main = pchiTit.1(twoExp,df,ncp)) abline(h=0, lty=3, col=adjustcolor(1, 1/4)) legend("topright", colnames(Pn.)[-j.d2], lty=1:5, col=1:5, bty="n") showProc.time() if(okR_Lrg) { ## R <= 3.6.1 gave an (almost ?) infinite loop here !! ## vvvv ncp <- 1e9; df <- 3 ; twoExp <- -17 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main = pchiTit(twoExp,df,ncp)) showProc.time() } # only if(okR..) ##=== L 3. BOTH large ncp, large df ==================== if(okR_Lrg) { ## R <= 3.6.1 gave an (almost ?) infinite loop here !! df <- 1e9; ncp <- 1 * df ; twoExp <- -17 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main = pchiTit.n.d(twoExp,df,ncp)) if(doExtras) { ## because it's a bit costly here : df <- 1e8; ncp <- 2 * df ; twoExp <- -16 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main = pchiTit.n.d(twoExp,df,ncp)) df <- 1e7; ncp <- 1/2 * df ; twoExp <- -14 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main = pchiTit.n.d(twoExp,df,ncp)) ## pnchisq still "broken"; others good df <- 1e6; ncp <- 10 * df ; twoExp <- -14 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main = pchiTit.n.d(twoExp,df,ncp)) ## pchisq *NON*-monotone !! df <- 4e6; ncp <- .5 * df ; twoExp <- -14 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main = pchiTit.n.d(twoExp,df,ncp)) } # only if(.X.) showProc.time() ## pchisq ok df <- 2e6; ncp <- .5 * df ; twoExp <- -14 Pn. <- mkPnch(ks, df=df, ncp=ncp, twoExp=twoExp) print(Pn., digits=3) # ">>" pcPolKuz is *NOT* for this; R's pchisq is full wrong; other "coincide" matplot(ks, Pn., type = "b", xlab = quote(k), ylab = "pchisq*(q, ..)", main = pchiTit.n.d(twoExp,df,ncp)) } # only if(okR..) showProc.time() ### Part 3 : qchisq (non-central!) ### ------------------------------- if(!dev.interactive(orNone=TRUE)) { dev.off(); pdf("chisq-nonc-3.pdf") } ### Bug 875 {see also ~/R/r-devel/R/tests/d-p-q-r-tests.R (q49.7 <- qchisq(0.025, 31, ncp=1, lower.tail=FALSE))## now ok: 49.7766 pb <- pchisq(q49.7, 31, ncp=1, lower.tail=FALSE) all.equal(pb, 0.025, tol=0) # 2.058e-13 [Lnx 64b]; 2.0609e-13 [Win 32b] stopifnot(all.equal(pb, 0.025, tol= 1e-12)) ## Ensuing things I tried : x <- seq(0, 20, len = 101) plot(x, pnc <- pchisq(x, 5, ncp = 1.1), type = 'l', col = 'red') xx <- qchisq(pnc, 5, ncp = 1.1) stopifnot(all.equal(x, xx))#TRUE all.equal(x, xx, tol = 0) # 1.9012e-13, later 1.835842e-14 (Linux) plot(x, pncR <- pchisq(x, 5, ncp = 1.1, lower = FALSE), type = 'l', col = 'red') (pnc + pncR) - 1 stopifnot(all.equal(pnc + pncR, rep(1, length(pnc)))) xx0 <- qchisq(pncR, 5, ncp = 1.1, lower = FALSE) all.equal( x, xx0, tol = 0) # 1.877586e-13; then 1.8364e-14 all.equal(xx, xx0, tol = 0) # 5.942721e-13; then 6.2907e-13, 6.2172e-14 stopifnot(all.equal(x, xx0)) plot(x, LpncR <- pchisq(x, 5, ncp = 1.1, lower = FALSE, log = TRUE), type = 'l', col = 'red') Lxx0 <- qchisq(LpncR, 5, ncp = 1.1, lower = FALSE, log = TRUE) all.equal(x, Lxx0, tol = 0)# 1.8775..e-13; 1.8364e-14 all.equal(log(pncR), LpncR, tol = 0)# 0, now 2.246e-16 all.equal(log(1 - pnc), LpncR, tol = 0)# 4.661586e-16; now 2.246e-16 all.equal(log1p(- pnc), LpncR, tol = 0)# 4.626185e-16; now TRUE showProc.time() ## source("/u/maechler/R/MM/NUMERICS/dpq-functions/qnchisq.R")#-> qnchisq.appr*() ## The values from Johnson et al (1995), Table 29.2, p.464 p. <- c(0.95, 0.05) nu. <- c(2,4,7) lam <- c(1,4,16,25) str(pars <- expand.grid(ncp=lam, df=nu., p= p., KEEP.OUT.ATTRS=FALSE)[,3:1]) ## 'data.frame': 24 obs. of 3 variables: ## $ p : num 0.95 0.95 0.95 0.95 0.95 0.95 0.95 0.95 0.95 0.95 ... ## $ df : num 2 2 2 2 4 4 4 4 7 7 ... ## $ ncp: num 1 4 16 25 1 4 16 25 1 4 ... qch <- with(pars, qchisq(p=p, df=df, ncp=ncp)) p.q <- with(pars, pchisq(qch, df=df, ncp=ncp)) cbind(pars, qch, p.q, relE = signif(1 - p.q / pars$p, 4)) ## very accurate ## p df ncp qch p.q relE ## 1 0.95 2 1 8.6422039 0.95 3.331e-16 ## 2 0.95 2 4 14.6402116 0.95 3.442e-15 ## 3 0.95 2 16 33.0542126 0.95 -8.882e-15 ## 4 0.95 2 25 45.3082281 0.95 2.887e-15 ## 5 0.95 4 1 11.7072278 0.95 5.662e-15 ## 6 0.95 4 4 17.3093229 0.95 5.995e-15 ## 7 0.95 4 16 35.4270110 0.95 -1.199e-14 ## 8 0.95 4 25 47.6127674 0.95 8.771e-15 ## 9 0.95 7 1 16.0039003 0.95 -5.551e-15 ## 10 0.95 7 4 21.2280338 0.95 6.661e-15 ## 11 0.95 7 16 38.9700904 0.95 1.110e-14 ## 12 0.95 7 25 51.0605938 0.95 -1.488e-14 ## 13 0.05 2 1 0.1683911 0.05 -2.398e-14 ## 14 0.05 2 4 0.6455990 0.05 3.020e-14 ## 15 0.05 2 16 6.3216416 0.05 5.673e-14 ## 16 0.05 2 25 12.0802051 0.05 -1.477e-13 ## 17 0.05 4 1 0.9087447 0.05 5.385e-14 ## 18 0.05 4 4 1.7650116 0.05 6.106e-14 ## 19 0.05 4 16 7.8843284 0.05 -7.105e-15 ## 20 0.05 4 25 13.7329249 0.05 8.271e-14 ## 21 0.05 7 1 2.4937057 0.05 -6.128e-14 ## 22 0.05 7 4 3.6642526 0.05 -2.420e-14 ## 23 0.05 7 16 10.2573190 0.05 1.066e-14 ## 24 0.05 7 25 16.2267524 0.05 3.775e-14 all.equal(pars$p, p.q, tol=0)# Lnx 64b: 9.2987e-15; Win 32b: 9.25e-15 stopifnot(all.equal(pars$p, p.q, tol=1e-14)) showProc.time() ## now works fine : str(n.s3 <- newton(1, G= function(x,...) x^2 -3 , g = function(x,...) 2*x, eps = 8e-16)) with(n.s3, stopifnot(converged, all.equal(x^2, 3, tol = 1e-15))) ### New comparison -- particularly for right tail: ## upper tail "1 - p" p.qappr <- function(p, df, ncp, main = NULL, kind = c("raw", "diff", "abs.Err", "rel.Err"), nF = NULL, do.title= is.null(main), do.legend = TRUE, ylim.range = 0.4, ...) { ## Purpose: Plot comparison of different qchisq() approximations ## ---------------------------------------------------------------------- ## Arguments: ## ---------------------------------------------------------------------- ## Author: Dr. Martin Maechler, Date: 27 Feb 2004, 18:19 kind <- match.arg(kind) d.arg <- (l.d <- length(df)) > 1 n.arg <- (l.n <- length(ncp)) > 1 p.arg <- (l.p <- length(p)) > 1 if((p.arg && (d.arg || n.arg)) || (d.arg && n.arg)) stop("only one of the three argument should have length > 1") if(!(d.arg || n.arg || p.arg)) p.arg <- TRUE n <- max(l.d, l.n, l.p) Fns <- c("qchisq", "qnchisqPearson", "qchisqApprCF1", "qchisqApprCF2", "qnchisqPatnaik", "qchisqCappr.2", "qchisqAppr.0", "qchisqAppr.1", "qchisqAppr.2", "qchisqAppr.3" ) if(is.null(nF)) nF <- length(Fns) else if(is.numeric(nF) & nF > 1) Fns <- Fns[1:nF] else stop("invalid 'nF' argument") qmat <- matrix(NA, n, length(Fns), dimnames=list(NULL,Fns)) for(i.f in 1:nF) { fn <- Fns[i.f] F <- get(fn) qmat[,i.f] <- do.call(F, list(p=p, df=df, ncp=ncp, lower.tail=FALSE)) } cols <- 1:nF lwds <- c(2, rep(1,nF-1)) ltys <- rep(1:3, length = nF) if(kind != "raw") { cols <- cols[-1] lwds <- lwds[-1] ltys <- ltys[-1] Fns <- Fns[-1] ## Inf - Inf = 0 in the following : "%-%" <- function(x,y) ifelse(is.infinite(x) & is.infinite(y) & x==y, 0, x-y) qm <- qmat[,-1, drop=FALSE] %-% qmat[,1] if(kind != "diff") { qm <- abs(qm) if(kind == "rel.Err") qm <- qm / abs(qmat[,1]) } yl <- rrange(qm, r = ylim.range) } else { qm <- qmat yl <- range(qmat[,"qchisq"], rrange(qmat, r = ylim.range)) } if(do.title && is.null(main)) main <- deparse(match.call()) matplot(if(p.arg) p else if(d.arg) df else ncp, qm, type = 'l', xlab = if(p.arg) "1 - p" else if(d.arg) "df" else "ncp", ylim = yl, main = main, col = cols, lwd= lwds, lty= ltys, ...) if(do.title) mtext("different approximations to qchisq()", line = 0.4, cex = 1.25) ## drop "qn?chisq" from names, since have it above: if(do.legend) { Fns <- sub("^qn?chisq.","*",Fns) pu <- par("usr") legend(par("xaxp")[2], par("yaxp")[2], Fns, xjust = 1.02, yjust = 1.02, ncol = 3, col = cols, lwd=lwds, lty= ltys) } invisible(qmat) } ## end{ p.qappr() } pU <- seq(.5, 1, length= 201) pU <- seq( 0, 1, length= 501)[-c(1,501)] ## (I've lost the original 'pU' I had used ...) mystats <- function(x) c(M=mean(x), quantile(x)) sum.qappr <- function(r) { m <- t(apply(abs(r[,-1] - r[,1]), 2,mystats)) m[order(m[,"50%"]),] } op <- options(digits = 6, width = 110)# warn: immediate .. showProc.time() sum.qappr(p.qappr (pU, df= 1, ncp= 1)) sum.qappr(r <- p.qappr (pU, df=10, ncp= 10)) ## just different pictures: p.qappr (pU, df=10, ncp= 10, kind = "diff", ylim.r = 1) p.qappr (pU, df=10, ncp= 10, kind = "abs", ylim.r = 0.01) p.qappr (pU, df=10, ncp= 10, kind = "rel", log = 'y') showProc.time() sum.qappr(p.qappr (pU, df= 1, ncp= 10)) sum.qappr(p.qappr (pU, df= 1, ncp= 10, kind="rel")) showProc.time() if(doExtras) ## this takes CPU ! sum.qappr(p.qappr (pU, df= 10, ncp= 1e4, kind="rel")) showProc.time() # 2.9 sec ##--> CF2, Pea, CF1 and Patn are the four best ones overall ## --- --- --- ---- ### Now look at upper tail only: ----- even a more clear picture summary(pU <- 2^-seq(7,40, length=200)) sum.qappr(r <- p.qappr (pU, df= 1, ncp= 10)) p.qappr (pU, df= 1, ncp= 10, kind="rel") sum.qappr(r <- p.qappr (pU, df= 1, ncp= 100)) ## very small ncp: sum.qappr(r <- p.qappr (pU, df= 1, ncp= .01)) p.qappr (pU, df= 1, ncp= .01, kind="dif", log="x", nF =6) p.qappr (pU, df= 1, ncp= .01, kind="rel", log="xy",nF =6) # here, CF2 is "off" and the top is "Cappr.2", "Pea", "Patn" ("CF1", "appr.3") sum.qappr(r <- p.qappr (pU, df= 10, ncp= .01)) # shows noise in qchisq() itself !? p.qappr (pU, df= 10, ncp= .01, kind="rel", log="xy") # "CF2" +-ok; top is "Cappr.2", "Pea", "Patn" ("CF2", "appr.3") sum.qappr(r <- p.qappr (pU, df= 100, ncp= .01)) # shows noise in qchisq() itself !!! p.qappr (pU, df= 100, ncp= .01, kind="rel", log="xy") showProc.time() ## even smaller ncp: sum.qappr(r <- p.qappr (pU, df= 100, ncp= .001)) # shows noise in qchisq() itself !!! p.qappr (pU, df= 100, ncp= .001, kind="rel", log="xy") sum.qappr(r <- p.qappr (pU, df= 1, ncp= .1)) p.qappr (pU, df= 1, ncp= .1, kind="rel", log="y") summary(warnings()) showProc.time() sum.qappr(r <- p.qappr (pU, df= 20, ncp= 200)) p.qappr (pU, df= 20, ncp= 200, kind='rel', log='y') sum.qappr(r <- p.qappr (pU, df= .1, ncp= 500)) ## drop the appr. : they are bad p.qappr (pU, df= .1, ncp= 500, kind='dif', nF = 6) p.qappr (pU, df= .1, ncp= 500, kind='rel', nF = 6, log='y') sum.qappr(r <- p.qappr (pU, df= .1, ncp= 500, kind='dif', nF = 6)) p.qappr (pU, df= .1, ncp= 500, kind='rel', log='y', nF = 6) # order: CF2, CF1, Pea, Patn showProc.time() ### Very large ncp, df --- Sankaran_d and Pearson had failed ! op <- options(warn = 1)# immediate .. pp <- c(.001, .005, .01, .05, (1:9)/10, .95, .99, .995, .999) for(DF in 10^c(50, 100,150,200, 250, 300)) stopifnot(exprs = { qnchisqPearson (pp, df=DF, ncp=100) == DF qnchisqSankaran_d(pp, df=DF, ncp=100) == DF qnchisqPatnaik (pp, df=DF, ncp=100) == DF }) qtol <- if(doExtras) 3e-16 else 1e-15 ## Both large df & large ncp for(NCP in 10^c(50, 100,150,200, 250, 300)) stopifnot(exprs = { abs(1 - qnchisqPearson (pp, df=2*NCP, ncp=NCP) / (3*NCP)) < qtol abs(1 - qnchisqSankaran_d(pp, df=2*NCP, ncp=NCP) / (3*NCP)) < qtol abs(1 - qnchisqPatnaik (pp, df=2*NCP, ncp=NCP) / (3*NCP)) < qtol abs(1 - qnchisqPearson (pp, df=NCP, ncp=NCP) / (2*NCP)) < qtol abs(1 - qnchisqSankaran_d(pp, df=NCP, ncp=NCP) / (2*NCP)) < qtol abs(1 - qnchisqPatnaik (pp, df=NCP, ncp=NCP) / (2*NCP)) < qtol abs(1 - qnchisqPearson (pp, df=NCP/2, ncp=NCP) / (1.5*NCP)) < qtol abs(1 - qnchisqSankaran_d(pp, df=NCP/2, ncp=NCP) / (1.5*NCP)) < qtol abs(1 - qnchisqPatnaik (pp, df=NCP/2, ncp=NCP) / (1.5*NCP)) < qtol }) showProc.time() options(op) # revert DF <- 1e200 if(FALSE)## BUG (2019-08-31): system.time( qch <- qchisq(pp, df=DF, ncp=100) )## gives these warnings (immediately "warn = 1"), and then takes "forever" !! ## Warning in qchisq(pp, df = DF, ncp = 100) : ## pnchisq(x=1e+200, ..): not converged in 10000 iter. ## Warning in qchisq(pp, df = DF, ncp = 100) : ## pnchisq(x=1e+200, ..): not converged in 10000 iter. ## "forever": Timing stopped at: 3871 0.184 3878 > 1 hour showProc.time()