R Under development (unstable) (2024-08-21 r87038 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> 
> #### Experiments about  accurate computation of  log4p1p(p) := log(4*p*(1-p))
> #### ------------------------------------------------------------------------
> #### ../R/norm_f.R  uses this in some qnorm() approximations (around p~=0)
> 
> require(DPQ) # log1mexp
Loading required package: DPQ
> if(!dev.interactive(orNone=TRUE)) pdf("log4p1p-exp.pdf")
> 
> ### log.p = TRUE: --------------------------------------------------
> lp <- seq(-800, -1/4, by=1/4)
> if(FALSE) ## no difference (between first two):
+     lp <- seq(-1e5, -1, by=100)
> c2 <- adjustcolor(2, 1/2)
> c3 <- adjustcolor(3, 1/2)
> plot(lp, log(4) + lp + log1mexp(-lp), type="l", lty=5)
> ## should be equivalent {as it always takes first branch in log1mexp()} -- but  faster
> lines(lp, log(-4*expm1(lp)) + lp, type="l", col=4, lty=3, lwd=4)
> ## both above are better for lp < ~-700 than these two (which seem equivalent):
> lines(lp, log(4*exp(lp)*1-exp(lp)),  col=c2, lwd=3)
> lines(lp, log(4*exp(lp)*-expm1(lp)), col=c3, lwd=5)
> 
> ### log.p = FALSE -----------------------------------------------------------------------------
> p <- seq(0,1, by=2^-10)
> matplot(p, cbind(log(4*p*(1-p)), log(4)+log(p)+log1p(-p), log1p(-4*(p-1/2)^2)), type="l")
> abline(h=0,col="gray")
> ## relerr: problem only at p ~ 1/2
> matplot(p, cbind(log(4*p*(1-p)), log(4)+log(p)+log1p(-p))/log1p(-4*(p-1/2)^2) -1, type="l")
> 
> if(!require("Rmpfr")) quit("no")
Loading required package: Rmpfr
Loading required package: gmp

Attaching package: 'gmp'

The following objects are masked from 'package:base':

    %*%, apply, crossprod, matrix, tcrossprod

C code of R package 'Rmpfr': GMP using 64 bits per limb


Attaching package: 'Rmpfr'

The following object is masked from 'package:gmp':

    outer

The following object is masked from 'package:DPQ':

    log1mexp

The following objects are masked from 'package:stats':

    dbinom, dgamma, dnbinom, dnorm, dpois, dt, pnorm

The following objects are masked from 'package:base':

    cbind, pmax, pmin, rbind

> #---===============---====------------ Rmpfr needed from here -----------------------------
> 
> require("sfsmisc")# (*is* in strong dependencies of DPQ), for relErrV()
Loading required package: sfsmisc

Attaching package: 'sfsmisc'

The following objects are masked from 'package:gmp':

    factorize, is.whole

> 
> ### log.p = TRUE: --------------------------------------------------
> lpM <- mpfr(lp, 128) # (lp are exact)
> reM <- relErrV(target = log(-4*expm1(lpM)) + lpM,
+                current= cbind(log(4) + lp + log1mexp(-lp),
+                               log(-4*expm1(lp)) + lp,
+                               log(4*exp(lp)*-expm1(lp)),
+                               log(4*exp(lp)*1-exp(lp))))
> 
> matplot(lp, asNumeric(reM), type="l") # last one is catastrophe
> apply(abs(asNumeric(reM)), 2, summary)# the 3d one also under/overflows
                [,1]         [,2]         [,3]         [,4]
Min.    2.091899e-18 2.091899e-18 2.091899e-18 2.992614e-05
1st Qu. 6.194077e-18 6.194077e-18 7.202142e-18 5.293857e-04
Median  9.228916e-18 9.228916e-18 1.164250e-17 8.369209e-04
Mean    2.303258e-17 2.199923e-17          Inf          Inf
3rd Qu. 1.789898e-17 1.789898e-17 2.923046e-17 2.000555e-03
Max.    1.921179e-14 1.708064e-14          Inf          Inf
> matplot(lp, asNumeric(reM[,1:3]), type="l") # 3rd one (green) gets bad before overflow
> matplot(lp, asNumeric(reM[,1:2]), type="l") #
> ## difference close to lp~0 i.e. p~1:
> lp <- seq(-4, -1/128, by=1/128)# <- at first ==> see it's around ln(0.5) = -0.6931472
> 
> lp <- -(44500:46500)*2^-16
> lpM <- mpfr(lp, 128) # (lp are exact)
> reM <- asNumeric(relErrV(target = log(-4*expm1(lpM)) + lpM,
+                          current= cbind(log(4) + lp + log1mexp(-lp)
+                                       , log(-4*expm1(lp)) + lp
+                                       # , log(4*exp(lp)*-expm1(lp))
+                                         )))
> matplot(lp, reM, type="l") # a "spike" around  log(p) = log(1/2) = -0.693147...
> 
> apply(abs(reM), 2, summary)
                [,1]         [,2]
Min.    2.796616e-15 3.998320e-16
1st Qu. 2.955857e-13 3.359716e-13
Median  6.954352e-13 8.272668e-13
Mean    2.817835e-08 1.094477e-09
3rd Qu. 2.709611e-12 3.165739e-12
Max.    5.568359e-05 1.285430e-06
> matplot(lp, abs(reM), type="l", log="y") #--> 2nd is slightly better!
> 
> ## smooth on log scale, weight large errors  and re-transform:
> psmspl <- function(x,y, ...) exp(predict(smooth.spline(x, y=log(y), w=sqrt(y), ...), x)$y)
> smE <- apply(abs(reM), 2, psmspl, x=lp, df=28)
> matlines(lp, smE, lwd = 4)
> ## -- the 2nd is slightly better in the worst region, i.e. close ln(2)
> 
> 
> 
> ## log.p = FALSE -----------------------------------------------------------------------------
> ## =============
> ## see above
> p <- seq(0,1, by=2^-10)
> ## matplot(p, cbind(log(4*p*(1-p)), ........)
> pM <- mpfr(p, 128)
> reM.<- relErrV(target = log(4*pM*(1-pM)),
+                current= cbind(simp = log(4*p*(1-p)),     sumL = log(4)+log(p)+log1p(-p),
+                               s2log= log(4*p)+log1p(-p), lg1p = log1p(-4*(p-1/2)^2)))
> matplot(p, abs(asNumeric(reM.)), type="l")
> ## --> *spike* at  p=1/2, the 2nd is *clearly* the worst
> apply(abs(asNumeric(reM.)), 2, summary)
                simp         sumL        s2log         lg1p
Min.    0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
1st Qu. 1.908159e-17 9.184463e-17 4.981183e-17 1.908159e-17
Median  3.872176e-17 2.286791e-16 1.236486e-16 3.872176e-17
Mean    4.017137e-17 6.107214e-14 2.547943e-14 4.017137e-17
3rd Qu. 5.784552e-17 8.281096e-16 5.486280e-16 5.784552e-17
Max.    1.093768e-16 2.425313e-11 4.851490e-12 1.093768e-16
> ##                 simp         sumL        s2log         lg1p
> ## Min.    0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
> ## 1st Qu. 1.908159e-17 9.297852e-17 5.160503e-17 1.908159e-17
> ## Median  3.872176e-17 2.318870e-16 1.269260e-16 3.908697e-17
> ## Mean    4.017137e-17 6.117178e-14 2.551975e-14 4.066526e-17
> ## 3rd Qu. 5.784552e-17 8.281096e-16 5.657260e-16 5.857990e-17
> ## Max.    1.093768e-16 2.425313e-11 4.851490e-12 1.117483e-16
> ##
> ## 'sumL' is the worst, 's2log' is close;
> ## drop "sumL" as worst -- zoom into the others
> matplot(p, abs(asNumeric(reM.)[,-2]), type="l")# again 2nd, i.e. "s2log"
> 
> ## only the 2 good ones,  simp, lg1p :
> matplot(p, abs(asNumeric(reM.)[,c(1,4)]), type="l", log="y")
Warning messages:
1: In xy.coords(x, y, xlabel, ylabel, log = log, recycle = TRUE) :
  6 y values <= 0 omitted from logarithmic plot
2: In xy.coords(x, y, xlabel, ylabel, log) :
  3 y values <= 0 omitted from logarithmic plot
> ## if you look very carefully (and also by 'summary' table above):
> ## ==> the first -- simple direct formula -- is even very slightly better than the log1p
> 
> 
> proc.time()
   user  system elapsed 
   1.87    0.25    2.10