R Under development (unstable) (2023-11-02 r85465 ucrt) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > #### Extreme-tail pnorm() and qnorm() approximations > #### =============================================== > ### notably pnormAsymp() and qnormAsymp() > ### ~~~~~~~~~~~~ ~~~~~~~~~~~~ > ## partly inspired by MM's NUMERICS/dpq-functions/qnorm-asymptotic.R > ## and also ../man/pnormAsymp.Rd > ## ../man/qnormAsymp.Rd > > library(DPQ) > library(sfsmisc)# {we import it in DPQ} > > p.ver <- function() mtext(R.version.string, cex=3/4, adj=1) > readUser <- function(i, max.i) { + if(interactive() && i != max.i) { + cat("[Enter] to continue: ") + cat(readLines(stdin(), n=1), "\n") + } + } > # "bisque4" , "coral2" .. > p.epsC <- function(col = "coral2", alpha = 3/4, lwd1 = 3) { + lg <- par("ylog") + mult <- if(lg) c(1/2, 1, 2) else -2:2 # multiplicities + is1 <- abs(mult) == 1 + abline(h = .Machine$double.eps * mult, + col = adjustcolor(col, alpha), + lty = ifelse(is1, 2, 5), + lwd = ifelse(is1, lwd1, 1)) + } > > absP <- function(re) pmax(abs(re), 2e-17) # not zero, so log-scale "shows" it > > > if(!dev.interactive(orNone=TRUE)) pdf("pqnorm-extreme.pdf") > > x <- sort(2 ^ c(seq(3, 10, by=1/16), seq(10.125, 17, by=1/8))) > str(x) num [1:169] 8 8.35 8.72 9.11 9.51 ... > summary(x) Min. 1st Qu. Median Mean 3rd Qu. Max. 8.00 49.35 304.44 9413.56 3444.31 131072.00 > > pn <- pnorm(x, lower.tail=FALSE, log.p=TRUE) > ## look at the asymptotic approximations: > k.s <- 0:5; nks <- paste0("k=", k.s); k. <- setNames(k.s, nks) > pnAsy <- sapply(k., function(k) pnormAsymp(x, k=k, lower.tail=FALSE, log.p=TRUE)) > > relEpnAsy <- apply(pnAsy, 2, relErrV, target = pn) > tit <- "relErrV(pnormAsym(x, k=k, lower.tail=FALSE, log.p=TRUE)\n vs pnorm(x, ..)" > leg.k <- function(x, y=NULL, ...) legend(x,y, legend=paste0("k=",0:5), lwd=2, col=1:6, lty=1:6, bty="n", ...) > matpl <- function(x, y, ...) { + matplot(x,y, type="l", lwd=2, log="x", main=tit, xaxt="n", ...) + eaxis(1, sub10=3); grid() + } > > matpl(x, relEpnAsy, ylim = c(-8,1)*5e-6 ); leg.k("center") > matpl(x, relEpnAsy, ylim = c(-1,1)*1e-8 ); leg.k("topright") > matpl(x, relEpnAsy, ylim = c(-1,1)*1e-10); leg.k("topright") > matpl(x, relEpnAsy, ylim = c(-1,1)*1e-12); leg.k("topright") > matpl(x, relEpnAsy, ylim = c(-1,1)*1e-14); leg.k("topright"); p.epsC() > matpl(x, relEpnAsy, ylim = c(-1,1)*1e-15); leg.k("topright"); p.epsC() > abline(v = 38581.371, col=adjustcolor(4, 1/2), lwd=3) # the Rmpfr maximal value (for "independent accurate pnorm") > > ## They seem to be of opposite sign - mostly (and apart from +/- epsC) > ## *but* k=5 (at the beginning; (and then +/- epsC) > > ## ?proof? --- use Rmpfr-version pnormAsymp() but there, > ## ....... pnorm(x, log.p=TRUE) *underflows* to -Inf as soon as x > 38581.371 > > ## Start with quantiles so we know the "truth according to pnorm()" : > qs <- c(2^seq(0, 35, by=1/256), Inf) # => s >= 1.84 --> no NaN in xs_5 etc > ## == the "true" qnorm() according to pnorm() > > lp <- pnorm(qs, lower.tail=FALSE, log.p=TRUE) > qnp <- qnorm(lp, lower.tail=FALSE, log.p=TRUE) > s <- -lp # = -log(1 - Phi(qs)) > summary(r <- sqrt(s)) Min. 1st Qu. Median Mean 3rd Qu. Max. 1 305 131250 Inf 56546354 Inf > ## store in 'dat' including current "inaccuracy" of qnorm(): > head(dat <- data.frame(l2q=log2(qs), qs, # lp, + s, # r = sqrt(s), t = log(s), + relE_qn = relErrV(qs, qnp))) l2q qs s relE_qn 1 0.00000000 1.000000 1.841022 0.000000e+00 2 0.00390625 1.002711 1.845160 2.220446e-16 3 0.00781250 1.005430 1.849315 0.000000e+00 4 0.01171875 1.008156 1.853487 0.000000e+00 5 0.01562500 1.010889 1.857677 4.440892e-16 6 0.01953125 1.013630 1.861884 4.440892e-16 > ##^^^^^^ will depend much on R version > stopifnot(dat[nrow(dat), "relE_qn"] == 0) > > ## relative Error (log x) > plot(relE_qn ~ s, dat, log="x", type="l", col = 2); p.ver() > > ## |rel.Error| ==> log-log scale > plot(abs(relE_qn) ~ s, dat, log="xy", type="l", col = 2, + axes=FALSE, main = "|relative Error| of qnorm(-s, log.p=TRUE, lower=F) wrt pnorm()") Warning message: In xy.coords(x, y, xlabel, ylabel, log) : 7284 y values <= 0 omitted from logarithmic plot > eaxis(1); eaxis(2); p.ver(); p.epsC() > > ## now look at the asymptotic approximations: > k.s <- 0:5; nks <- paste0("k=", k.s); k. <- setNames(k.s, nks) > qnAsy <- sapply(k., function(ord) qnormAsymp(lp=lp, order=ord)) > stopifnot(identical(qnAsy, + sapply(k., function(ord) qnormAsymp(p=lp, lower.tail=FALSE, log.p=TRUE, order=ord)) + )) > > relEAsy <- apply(qnAsy, 2, relErrV, target = qs) > > > matplot(sqrt(s), relEAsy, type="l", log="x", xlab = quote(r == sqrt(s)), + main = "relative Error of qnormAsymp(.., k=*)"); p.ver() > legend("top", nks, col=1:6, lty=1:5, bty="n") > > mp <- c(-1,1) > xL <- list(NULL, c(2, 1000), c(2, 1e5), c(5, 1e5), c(10, 1e8), c(10, 1e10)) > yL <- list(NULL, mp * 0.1, mp * 1e-4, mp * 1e-7, mp * 1e-11, mp * 2e-15) > stopifnot(length(xL) == length(yL)) > for(j in seq_along(xL)) { + matplot(sqrt(s), relEAsy, type="l", log="x", xaxt="n", lwd=2, + xlab = quote(r == sqrt(s)), + xlim = xL[[j]], ylim = yL[[j]], + main = "relative Error of qnormAsymp(.., k=*)") + eaxis(1, sub10=2) # not here: p.ver() + legend("top", nks, col=1:6, lty=1:5, lwd=2, bty="n") + readUser(j, length(xL)) + } > > for(j in seq_along(xL)) { + if(!is.null(yli <- yL[[j]])) yli <- c(2e-17, 100*yli[2]) + if(!is.null(xli <- xL[[j]])) xli[2] <- 10*xli[2] + cat("j=",j,"; rbind(xlim,ylim) = \n"); print(rbind(xlim=xli, ylim=yli)) + cols <- adjustcolor(1:6, 3/4) + ## abs() does not show 0 {-> -Inf); absP() will show them + matplot(sqrt(s), abs(relEAsy), type="l", log="xy", xaxt="n", yaxt="n", col=cols, lwd=2, + xlab = quote(r == sqrt(s)), + xlim = xli, ylim = yli, + main = "relative Error of qnormAsymp(.., k=*)") + matlines(sqrt(s), absP(relEAsy), col=adjustcolor(cols, 1/2)) + eaxis(1, sub10=2) # not here: p.ver() + eaxis(2, sub10=c(-3,2)) + legend("top", nks, col=cols, lwd=2, lty=1:5, bty="n") + readUser(j, length(xL)) + } j= 1 ; rbind(xlim,ylim) = NULL j= 2 ; rbind(xlim,ylim) = [,1] [,2] xlim 2e+00 10000 ylim 2e-17 10 j= 3 ; rbind(xlim,ylim) = [,1] [,2] xlim 2e+00 1e+06 ylim 2e-17 1e-02 j= 4 ; rbind(xlim,ylim) = [,1] [,2] xlim 5e+00 1e+06 ylim 2e-17 1e-05 j= 5 ; rbind(xlim,ylim) = [,1] [,2] xlim 1e+01 1e+09 ylim 2e-17 1e-09 j= 6 ; rbind(xlim,ylim) = [,1] [,2] xlim 1e+01 1e+11 ylim 2e-17 2e-13 There were 12 warnings (use warnings() to see them) > > ## Real tests : > table(relEAsy[s > 1e17,] * 2^52) -0.5 0 1 71 9434 125 > ## -0.5 0 1 > ## 71 9434 125 > > table(relEAsy[s > 1e16,] * 2^52) -0.5 0 1 2 3 4 5 71 11550 310 136 78 18 17 > ## -0.5 0 1 2 3 4 5 > ## 71 11550 311 135 78 18 17 > > > ## drop k = 0 from now on ------------- The following *is* platform dependent .. let's see > > re1 <- abs(relEAsy[s > 95e6, "k=1"]) > table(re1 * 2^52) 0 0.5 1 5264 83 94 > ## 0 0.5 1 > ## 5264 84 93 > > plot(abs(relEAsy[,"k=2"]) ~ s, subset = s > 1e5 & s < 1e8, log="xy", type="l", xaxt="n") Warning message: In xy.coords(x, y, xlabel, ylabel, log) : 1000 y values <= 0 omitted from logarithmic plot > eaxis(1); p.epsC() > > re2 <- abs(relEAsy[s > 2e5, "k=2"]) > table(re2 * 2^52) 0 0.5 1 6352 84 143 > ## 0 0.5 1 > ## 6353 85 141 > > plot(abs(relEAsy[,"k=3"]) ~ s, subset = s > 500 & s < 1e5, log="xy", type="l", xaxt="n") Warning message: In xy.coords(x, y, xlabel, ylabel, log) : 475 y values <= 0 omitted from logarithmic plot > eaxis(1); p.epsC() > > re3 <- abs(relEAsy[s > 4000, "k=3"]) > table(re3 * 2^52) 0 0.5 1 6952 123 227 > ## 0 0.5 1 > ## 6953 124 225 > > plot(abs(relEAsy[,"k=4"]) ~ s, subset = s > 300 & s < 2000, log="xy", type="l", axes=FALSE) Warning message: In xy.coords(x, y, xlabel, ylabel, log) : 60 y values <= 0 omitted from logarithmic plot > eaxis(1);eaxis(2); p.epsC() > > re4 <- abs(relEAsy[s > 1500, "k=4"]) > table(re4 * 2^52) 0 0.5 1 7152 144 188 > ## 0 0.5 1 > ## 7154 145 185 > > plot(abs(relEAsy[,"k=5"]) ~ s, subset = s > 200 & s < 1000, log="xy", type="l", axes=FALSE) Warning message: In xy.coords(x, y, xlabel, ylabel, log) : 105 y values <= 0 omitted from logarithmic plot > eaxis(1);eaxis(2); p.epsC() > r. <- pretty(sqrt( par("xaxp")[1:2] ), 10) > (rlab <- as.expression(lapply(r., function(rr) substitute(R^2, list(R = rr+0))))) expression(14^2, 16^2, 18^2, 20^2, 22^2, 24^2, 26^2, 28^2, 30^2, 32^2) > axis(3, at=r.^2, labels = rlab, col = 4, col.axis = 4, mgp = c(1.25,.5,0)) > > re5 <- abs(relEAsy[s > 700, "k=5"]) > table(re5 * 2^52) 0 0.5 1 7283 140 202 > ## 0 0.5 1 > ## 7285 141 199 > > stopifnot(exprs = { + abs(relEAsy[s > 1e17,]) <= 2^-52 # even for k=0 + re1 <= 2^-52 + re2 <= 2^-52 + re3 <= 2^-52 + re4 <= 2^-52 + re5 <= 2^-52 + }) > > > ### R code from vignette source '../vignettes/qnorm-asymp.Rnw' ==================== > ### > ### code chunk number 2: qnormLog-compute > ######################################### > qs <- 2^seq( 0, 29, by=1/256) # => s >= 1.84 > lp <- pnorm(qs, lower.tail=FALSE, log.p=TRUE) > s <- -lp # = -pnorm(..) = -log(1 - Phi(qs)) > 0 > ## > qnrm <- qnorm (-s, lower.tail=FALSE, log.p=TRUE) > qnrm405 <- qnormR(-s, lower.tail=FALSE, log.p=TRUE, version= "4.0.x") # R <= 4.0.5 > qnrm410 <- qnormR(-s, lower.tail=FALSE, log.p=TRUE, version= "2020-10-17") > Rver <- shortRversion() > if(getRversion() <= "4.0.5") { # our qnormR(.., version="4.0.x") + cat(sprintf("%s, \"4.0.5\",\n all.equal(*, tol=0): %s; identical(): %s\n", Rver, + all.equal(qnrm, qnrm405, tolerance=0), identical(qnrm, qnrm405))) + stopifnot(all.equal(qnrm, qnrm405, tolerance = 1e-12)) + } else if(getRversion() < "4.3") { # our qnormR(*, version="2020-10-17") matches: + cat(sprintf("%s, \"4.1.0\",\n all.equal(*, tol=0): %s; identical(): %s\n", Rver, + all.equal(qnrm, qnrm410, tolerance=0), identical(qnrm, qnrm410))) + ## see TRUE twice, for R 4.2.2, Linux{x86_64-pc-linux-gnu} *and* Windows{x86_64-w64-mingw32/x64} + # M1mac(aarch64-apple-darwin20, R 4.2.1 ptchd): 2.675587e-16 + stopifnot(all.equal(qnrm, qnrm410, tolerance = 1e-12)) + } else { # R version >= 4.3.x + qnrm43 <- qnormR(-s, lower.tail=FALSE, log.p=TRUE, version = "2022") + cat(sprintf("%s, >= 4.3.x,\n all.equal(*, tol=0): %s; identical(): %s\n", Rver, + all.equal(qnrm, qnrm43, tolerance=0), identical(qnrm, qnrm43))) + rE6 <- qnorm(-1e6, log.p=TRUE)/-1414.2077829910174 - 1 + cat(sprintf(" rE(-1e6) = %g\n", rE6)) + if(abs(rE6) < 7e-16) # have R-devel with new 2022 code: + stopifnot(all.equal(qnrm, qnrm43, tolerance = 1e-14)) + } R devel 2023-11-02 r85465 ucrt, >= 4.3.x, all.equal(*, tol=0): TRUE; identical(): TRUE rE(-1e6) = 0 > > source(system.file("extraR", "qnorm-asymp-utils.R", package="DPQ")) > if(!exists("r0", mode="numeric")) + q("no") > if(!dev.interactive(orNone=TRUE)) { + dev.off() + pdf("pqnorm-qnormAsy2.pdf", height=8*sqrt(2), width=8) # ~ 'A4' + } > > ################################################### > ### code chunk number 27: plot-qnormAsy2 > ################################################### > sfsmisc::mult.fig(5, main = "qnormAsymp(*, k) approximations in the 5 cutpoint regions") > r0 <- c(27, 55, 109, 840, 36000, 6.4e8) # <-- cutoffs <--> in ../R/norm_f.R > # use k = 5 4 3 2 1 0 e.g. k = 0 good for r >= 6.4e8 > for(ir in 2:length(r0)) + p.qnormAsy2(r0[ir], k = 5 +2-ir, cex.main = .90) Around r0 = 55 ; k = 4:5 r pq k=4 k=5 [1,] 27.50000 -756.2500 -1.443290e-15 0 [2,] 27.52015 -757.3587 -1.332268e-15 0 [3,] 27.54030 -758.4683 -1.443290e-15 0 [4,] 27.56045 -759.5786 -1.332268e-15 0 [5,] 27.58061 -760.6898 -1.443290e-15 0 [6,] 27.60076 -761.8018 -1.332268e-15 0 [7,] 27.62091 -762.9146 -1.332268e-15 0 [8,] 27.64106 -764.0282 -1.443290e-15 0 [9,] 27.66121 -765.1426 -1.221245e-15 0 .......... .......... r pq k=4 k=5 [2045,] 68.68955 -4718.254 0.000000e+00 0.000000e+00 [2046,] 68.70970 -4721.022 0.000000e+00 0.000000e+00 [2047,] 68.72985 -4723.792 2.220446e-16 2.220446e-16 [2048,] 68.75000 -4726.562 0.000000e+00 0.000000e+00 Around r0 = 109 ; k = 3:4 r pq k=3 k=4 [1,] 54.50000 -2970.250 1.110223e-15 0 [2,] 54.53994 -2974.605 8.881784e-16 0 [3,] 54.57987 -2978.963 8.881784e-16 0 [4,] 54.61981 -2983.324 8.881784e-16 0 [5,] 54.65975 -2987.688 8.881784e-16 0 [6,] 54.69968 -2992.055 8.881784e-16 0 [7,] 54.73962 -2996.426 8.881784e-16 0 [8,] 54.77956 -3000.800 1.110223e-15 0 [9,] 54.81949 -3005.177 8.881784e-16 0 .......... .......... r pq k=3 k=4 [2045,] 136.1302 -18531.43 0 0 [2046,] 136.1701 -18542.30 0 0 [2047,] 136.2101 -18553.18 0 0 [2048,] 136.2500 -18564.06 0 0 Around r0 = 840 ; k = 2:3 r pq k=2 k=3 [1,] 420.0000 -176400.0 2.220446e-16 0 [2,] 420.3078 -176658.6 2.220446e-16 0 [3,] 420.6155 -176917.4 2.220446e-16 0 [4,] 420.9233 -177176.4 2.220446e-16 0 [5,] 421.2311 -177435.6 0.000000e+00 0 [6,] 421.5388 -177695.0 2.220446e-16 0 [7,] 421.8466 -177954.6 2.220446e-16 0 [8,] 422.1544 -178214.3 2.220446e-16 0 [9,] 422.4621 -178474.3 2.220446e-16 0 .......... .......... r pq k=2 k=3 [2045,] 1049.077 -1100562 0 0 [2046,] 1049.384 -1101208 0 0 [2047,] 1049.692 -1101854 0 0 [2048,] 1050.000 -1102500 0 0 Around r0 = 36000 ; k = 1:2 r pq k=1 k=2 [1,] 18000.00 -324000000 0.000000e+00 0 [2,] 18013.19 -324475015 0.000000e+00 0 [3,] 18026.38 -324950378 0.000000e+00 0 [4,] 18039.57 -325426089 0.000000e+00 0 [5,] 18052.76 -325902149 0.000000e+00 0 [6,] 18065.95 -326378556 -1.110223e-16 0 [7,] 18079.14 -326855311 0.000000e+00 0 [8,] 18092.33 -327332413 0.000000e+00 0 [9,] 18105.52 -327809864 0.000000e+00 0 .......... .......... r pq k=1 k=2 [2045,] 44960.43 -2021440257 0 0 [2046,] 44973.62 -2022626490 0 0 [2047,] 44986.81 -2023813071 0 0 [2048,] 45000.00 -2025000000 0 0 Around r0 = 6.4e+08 ; k = 0:1 r pq k=0 k=1 [1,] 320000000 -1.024000e+17 2.220446e-16 0 [2,] 320234489 -1.025501e+17 2.220446e-16 0 [3,] 320468979 -1.027004e+17 2.220446e-16 0 [4,] 320703468 -1.028507e+17 0.000000e+00 0 [5,] 320937958 -1.030012e+17 2.220446e-16 0 [6,] 321172447 -1.031517e+17 2.220446e-16 0 [7,] 321406937 -1.033024e+17 0.000000e+00 0 [8,] 321641426 -1.034532e+17 0.000000e+00 0 [9,] 321875916 -1.036041e+17 0.000000e+00 0 .......... .......... r pq k=0 k=1 [2045,] 799296532 -6.388749e+17 0 0 [2046,] 799531021 -6.392499e+17 0 0 [2047,] 799765511 -6.396249e+17 0 0 [2048,] 800000000 -6.400000e+17 0 0 > > > proc.time() user system elapsed 4.92 0.29 5.21