R Under development (unstable) (2023-12-07 r85661 ucrt) -- "Unsuffered Consequences" Copyright (C) 2023 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(bbmle) Loading required package: stats4 > old_opt <- options(digits=3) > ## source("../R/dists.R") > ## source("../R/mle.R") > > ## an attempt to sketch out by hand > ## how one would derive an analytic > ## gradient function for a formula-specified > ## likelihood and use it ... > > ## chain rule should be: > > ## deriv(probability distribution)/[prob params] * > ## deriv([prob params])/[model params] * > ## {OPTIONAL} deriv([model params])/[linear model params] > > set.seed(1001) > x <- rbinom(50,size=10,prob=0.4) > suppressWarnings(mle2(x~dbinom(prob=p,size=10),start=list(p=0.3),data=data.frame(x))) Call: mle2(minuslogl = x ~ dbinom(prob = p, size = 10), start = list(p = 0.3), data = data.frame(x)) Coefficients: p 0.396 Log-likelihood: -97.2 > > ## step 1: construct gradient function for simplest example > f <- sbinom(prob=0.1,size=1)$formula > > d1 <- deriv(parse(text=f),"prob",function.arg=TRUE) > > ## step 2: chain rule step #1 > mle2(x~dbinom(prob=plogis(logitp),size=10),start=list(logitp=-1), + data=data.frame(x)) Call: mle2(minuslogl = x ~ dbinom(prob = plogis(logitp), size = 10), start = list(logitp = -1), data = data.frame(x)) Coefficients: logitp -0.422 Log-likelihood: -97.2 > > f <- sbinom(prob=NA,size=NA)$formula > > ## note: plogis is not in derivatives table!! > ## will need to extend by text substitution ... > gsub("plogis(\\([^)]+\\))", + "(1+exp(\\1))^(-1)", + "plogis(logitprob)") [1] "(1+exp((logitprob)))^(-1)" > > f2 <- gsub("plogis(\\([^)]+\\))", + "(1+exp(\\1))^(-1)","plogis(logitp)") > > ## start with a single parameter (ignore 'size') > fun1 <- deriv(parse(text=f),c("prob"),function.arg=TRUE) > fun2 <- deriv(parse(text=f2),"logitp", function.arg=TRUE) > > size <- 10 > a1 <- attr(fun2(logitp=0),"gradient") > a2 <- attr(fun1(prob=plogis(0)),"gradient") > > ## compute gradient by variable and sum > colSums(apply(a1,2,"*",a2)) logitp 52 > ## rep(a1,length(x))*a2 > > > ## eventually we will want to do something tricky to > ## 'memoise' results because optim() requires the > ## objective function and gradient to be computed > ## *separately*. Not worth worrying about this in the > ## first pass! > options(old_opt) > > proc.time() user system elapsed 1.34 0.15 1.48