# test functions # set the seed and flush the graph before running tests if (check_tf_version()) { tensorflow::tf$compat$v1$reset_default_graph() } set.seed(2020 - 02 - 11) rng_seed <- function() { get(".Random.seed", envir = .GlobalEnv) } # evaluate a greta_array, node, or tensor grab <- function(x, dag = NULL) { if (inherits(x, "node")) { x <- as.greta_array(x) } if (inherits(x, "greta_array")) { node <- get_node(x) dag <- dag_class$new(list(x)) } dag$tf_environment$batch_size <- 1L node$define_tf(dag) x_name <- dag$tf_name(node) out <- dag$tf_environment[[x_name]] out <- as.array(out) drop_first_dim(out) } set_distribution <- function(dist, data) { # fix the value of dist dist_node <- get_node(dist) data_node <- get_node(data) distrib <- dist_node$distribution distrib$value(data_node$value()) distrib$.fixed_value <- TRUE data_node$set_distribution(distrib) data_node$register() } # evaluate the (unadjusted) density of distribution greta array at some data get_density <- function(distrib, data) { x <- as_data(data) distribution(x) <- distrib # create dag and define the density dag <- dag_class$new(list(x)) get_node(x)$distribution$define_tf(dag) # get the log density as a vector tensor_name <- dag$tf_name(get_node(distrib)$distribution) tensor <- get(tensor_name, envir = dag$tf_environment) as.vector(grab(tensor, dag)) } compare_distribution <- function( greta_fun, r_fun, parameters, x, dim = NULL, multivariate = FALSE, tolerance = 1e-4 ) { # calculate the absolute difference in the log density of some data between # greta and a r benchmark. # 'greta_fun' is the greta distribution constructor function (e.g. normal()) # 'r_fun' is the r density function, which must have argument 'log' # both of these functions must take the same parameters in the same order # 'parameters' is an optionally named list of numeric parameter values # x is the vector of values at which to evaluate the log density # define greta distribution, with fixed values greta_log_density <- greta_density( greta_fun, parameters, x, dim, multivariate ) # get R version r_log_density <- log(do.call(r_fun, c(list(x), parameters))) # return absolute difference compare_op(r_log_density, greta_log_density, tolerance) } # evaluate the log density of x, given 'parameters' and a distribution # constructor function 'fun' greta_density <- function( fun, parameters, x, dim = NULL, multivariate = FALSE ) { dim <- dim %||% NROW(x) # add the output dimension to the arguments list dim_list <- list(dim = dim) # if it's a multivariate distribution name it n_realisations if (multivariate) { names(dim_list) <- "n_realisations" } # don't add it for wishart & lkj, which don't mave multiple realisations is_wishart <- identical(names(parameters), c("df", "Sigma")) is_lkj <- identical(names(parameters), c("eta", "dimension")) if (is_wishart | is_lkj) { dim_list <- list() } parameters <- c(parameters, dim_list) # evaluate greta distribution dist <- do.call(fun, parameters) distrib_node <- get_node(dist)$distribution # set density x_ <- as.greta_array(x) distrib_node$remove_target() distrib_node$add_target(get_node(x_)) # create dag dag <- dag_class$new(list(x_)) dag$tf_environment$batch_size <- 1L distrib_node$define_tf(dag) # get the log density as a vector result <- dag$evaluate_density(distrib_node, get_node(x_)) as.vector(result) } # execute a call via greta, swapping the objects named in 'swap' to greta # arrays, then converting the result back to R. 'swap_scope' tells eval() how # many environments to go up to get the objects for the swap; 1 would be # environment above the funct, 2 would be the environment above that etc. with_greta <- function(call, swap = c("x"), swap_scope = 1) { swap_entries <- paste0(swap, " = as_data(", swap, ")") swap_text <- paste0( "list(", paste(swap_entries, collapse = ", "), ")" ) swap_list <- eval( parse(text = swap_text), envir = parent.frame(n = swap_scope) ) greta_result <- with( swap_list, eval(call) ) result <- grab(greta_result) # account for the fact that greta outputs are 1D arrays; convert them back to # R vectors if (is.array(result) && length(dim(result)) == 2 && dim(result)[2] == 1) { result <- as.vector(result) } result } # check an expression is equivalent when done in R, and when done on greta # arrays with results ported back to R # e.g. check_expr(a[1:3], swap = 'a') check_expr <- function(expr, swap = c("x"), tolerance = 1e-4) { call <- substitute(expr) r_out <- eval(expr) greta_out <- with_greta( call, swap = swap, swap_scope = 2 ) compare_op(r_out, greta_out, tolerance) } # generate a random string to describing a binary operation on two variables, do # an op selected from 'ops' to an arg from 'args' and to init add_op_string <- function( init = "a", args = c("a", "b"), ops = c("+", "-", "*", "/") ) { op <- sample(ops, 1) arg <- sample(args, 1) sprintf("(%s %s %s)", arg, op, init) } # generate a random function that combines two variables together in a string of # (n) increasingly bizarre operations gen_opfun <- function(n, ops) { string <- "a" for (i in seq_len(n)) { string <- add_op_string(string, ops = ops) } fun_string <- sprintf("function(a, b) {%s}", string) eval(parse(text = fun_string)) } # sample n values from a distribution by HMC, check they all have the correct # support greta array is defined as a stochastic in the call sample_distribution <- function( greta_array, n = 10, lower = -Inf, upper = Inf, warmup = 1 ) { m <- model(greta_array, precision = "double") draws <- mcmc(m, n_samples = n, warmup = warmup, verbose = FALSE) samples <- as.matrix(draws) vectorised <- length(lower) > 1 | length(upper) > 1 if (vectorised) { above_lower <- sweep(samples, 2, lower, `>=`) below_upper <- sweep(samples, 2, upper, `<=`) } else { above_lower <- samples >= lower below_upper <- samples <= upper } expect_true(all(above_lower & below_upper)) } compare_truncated_distribution <- function( greta_fun, which, parameters, truncation, tolerance = 1e-4 ) { # calculate the absolute difference in the log density of some data between # greta and a r benchmark, for an implied truncated distribution 'greta_array' # is a greta array created from a distribution and a constrained variable # greta array. 'r_fun' is an r function returning the log density for the same # truncated distribution, taking x as its only argument. x <- do.call( truncdist::rtrunc, c( n = 100, spec = which, a = truncation[1], b = truncation[2], parameters ) ) # create truncated R function and evaluate it r_fun <- truncfun(which, parameters, truncation) r_log_density <- log(r_fun(x)) greta_log_density <- greta_density( fun = greta_fun, parameters = c(parameters, list(truncation = truncation)), x = x, dim = 1 ) # return absolute difference compare_op(r_log_density, greta_log_density, tolerance) } # use the truncdist package to crete a truncated distribution function for use # in compare_truncated_distribution truncfun <- function(which = "norm", parameters, truncation) { args <- c( spec = which, a = truncation[1], b = truncation[2], parameters ) function(x) { arg_list <- c(x = list(x), args) do.call(truncdist::dtrunc, arg_list) } } # R distribution functions for the location-scale Student T distribution dt_ls <- function(x, df, location, scale, log = FALSE) { ans <- stats::dt((x - location) / scale, df) / scale if (log) { ans <- log(ans) } ans } pt_ls <- function(q, df, location, scale, log.p = FALSE) { ans <- stats::pt((q - location) / scale, df) if (log.p) { ans <- log(ans) } ans } qt_ls <- function(p, df, location, scale, log.p = FALSE) { ans <- stats::qt(p, df) * scale + location if (log.p) { ans <- log(ans) } ans } # mock up the progress bar to force its output to stdout for testing cpb <- eval(parse(text = capture.output(dput(create_progress_bar)))) mock_create_progress_bar <- function(...) { cpb(..., stream = stdout()) } # capture messages in testthat block to get the output of the progress bar; # copied from progress' test suite: # https://github.com/r-lib/progress/blob/master/tests/testthat/helper.R get_output <- function(expr) { msgs <- character() i <- 0 suppressMessages(withCallingHandlers( expr, message = function(e) msgs[[i <<- i + 1]] <<- conditionMessage(e) )) paste0(msgs, collapse = "") } # mock up mcmc progress bar output for neurotic testing mock_mcmc <- function(n_samples = 1010) { pb <- create_progress_bar( "sampling", c(0, n_samples), pb_update = 10, width = 50 ) iterate_progress_bar(pb, n_samples, rejects = 10, chains = 1) } # random lkj draws, code from the rethinking package (can't load the package # because of stan*Travis*compiler issues) rlkjcorr <- function(n, eta = 1, dimension = 2) { k <- dimension stopifnot(is.numeric(k), k >= 2, k == as.integer(k)) stopifnot(eta > 0) f <- function() { alpha <- eta + (k - 2) / 2 r12 <- 2 * stats::rbeta(1, alpha, alpha) - 1 r <- matrix(0, k, k) r[1, 1] <- 1 r[1, 2] <- r12 r[2, 2] <- sqrt(1 - r12^2) if (k > 2) { for (m in 2:(k - 1)) { alpha <- alpha - 0.5 y <- rbeta(1, m / 2, alpha) z <- rnorm(m, 0, 1) z <- z / sqrt(crossprod(z)[1]) r[1:m, m + 1] <- sqrt(y) * z r[m + 1, m + 1] <- sqrt(1 - y) } } crossprod(r) } r <- replicate(n, f()) if (dim(r)[3] == 1) { r <- r[, , 1] } else { r <- aperm(r, c(3, 1, 2)) } r } # normalising component of lkj (depends only on eta and dimension) # NOTE # we may need to find a better way to do the normalising of this/find a better # reference equation. E.g., looking in # potentially our implementation here is incorrect. This is not impacting # IID sampling or MCMC, but is currently causing some tests to fail where we # are ensuring our densities are accurate (in test_distributions.R) lkj_log_normalising <- function(eta, n) { log_pi <- log(pi) ans <- 0 for (k in 1:(n - 1)) { ans <- ans + log_pi * (k / 2) ans <- ans + lgamma(eta + (n - 1 - k) / 2) ans <- ans - lgamma(eta + (n - 1) / 2) } ans } dlkj_correlation_unnormalised <- function(x, eta, log = FALSE, dimension = NULL) { res <- (eta - 1) * log(det(x)) if (!log) { res <- exp(res) } res } # lkj density dlkj_correlation <- function(x, eta, log = FALSE, dimension = NULL) { res <- (eta - 1) * log(det(x)) - lkj_log_normalising(eta, ncol(x)) if (!log) { res <- exp(res) } res } # helper RNG functions rmvnorm <- function(n, mean, Sigma) { # nolint mvtnorm::rmvnorm(n = n, mean = mean, sigma = Sigma) } rwish <- function(n, df, Sigma) { # nolint draws <- stats::rWishart(n = n, df = df, Sigma = Sigma) aperm(draws, c(3, 1, 2)) } rmulti <- function(n, size, prob) { draws <- stats::rmultinom(n = n, size = size, prob = prob) t(draws) } rcat <- function(n, prob) { rmulti(n, 1, prob) } rtnorm <- function(n, mean, sd, truncation) { truncdist::rtrunc( n, "norm", a = truncation[1], b = truncation[2], mean = mean, sd = sd ) } rtlnorm <- function(n, meanlog, sdlog, truncation) { truncdist::rtrunc( n, "lnorm", a = truncation[1], b = truncation[2], meanlog = meanlog, sdlog = sdlog ) } rtweibull <- function(n, shape, scale, truncation) { truncdist::rtrunc( n, "weibull", a = truncation[1], b = truncation[2], shape = shape, scale = scale ) } rtf <- function(n, df1, df2, truncation) { truncdist::rtrunc( n, "f", a = truncation[1], b = truncation[2], df1 = df1, df2 = df2 ) } # joint testing functions joint_normals <- function(...) { params_list <- list(...) components <- lapply(params_list, function(par) do.call(normal, par)) do.call(joint, components) } rjnorm <- function(n, ...) { params_list <- list(...) args_list <- lapply(params_list, function(par) c(n, par)) sims <- lapply(args_list, function(par) do.call(stats::rnorm, par)) do.call(cbind, sims) } rjtnorm <- function(n, ...) { params_list <- list(...) args_list <- lapply(params_list, function(par) c(n, par)) sims <- lapply(args_list, function(par) do.call(rtnorm, par)) do.call(cbind, sims) } # mixture testing functions mixture_normals <- function(...) { args <- list(...) is_weights <- names(args) == "weights" params_list <- args[!is_weights] components <- lapply(params_list, function(par) do.call(normal, par)) do.call(mixture, c(components, args[is_weights])) } mixture_multivariate_normals <- function(...) { args <- list(...) is_weights <- names(args) == "weights" params_list <- args[!is_weights] components <- lapply( params_list, function(par) { do.call(multivariate_normal, par) } ) do.call(mixture, c(components, args[is_weights])) } rmixnorm <- function(n, ...) { args <- list(...) is_weights <- names(args) == "weights" params_list <- args[!is_weights] weights <- args[[which(is_weights)]] args_list <- lapply(params_list, function(par) c(n, par)) sims <- lapply(args_list, function(par) do.call(rnorm, par)) draws <- do.call(cbind, sims) components <- sample.int(length(sims), n, prob = weights, replace = TRUE) idx <- cbind(seq_len(n), components) draws[idx] } rmixtnorm <- function(n, ...) { args <- list(...) is_weights <- names(args) == "weights" params_list <- args[!is_weights] weights <- args[[which(is_weights)]] args_list <- lapply(params_list, function(par) c(n, par)) sims <- lapply(args_list, function(par) do.call(rtnorm, par)) draws <- do.call(cbind, sims) components <- sample.int(length(sims), n, prob = weights, replace = TRUE) idx <- cbind(seq_len(n), components) draws[idx] } rmixmvnorm <- function(n, ...) { args <- list(...) is_weights <- names(args) == "weights" params_list <- args[!is_weights] weights <- args[[which(is_weights)]] args_list <- lapply(params_list, function(par) c(n, par)) sims <- lapply(args_list, function(par) do.call(rmvnorm, par)) components <- sample.int(length(sims), n, prob = weights, replace = TRUE) # loop through the n observations, pulling out the corresponding slice draws_out <- array(NA, dim(sims[[1]])) for (i in seq_len(n)) { draws_out[i, ] <- sims[[components[i]]][i, ] } draws_out } # a form of two-sample chi squared test for discrete multivariate distributions combined_chisq_test <- function(x, y) { stats::chisq.test( x = colSums(x), y = colSums(y) ) } # flatten unique part of a symmetric matrix get_upper_tri <- function(x, diag) { x[upper.tri(x, diag = diag)] } # compare iid samples from a greta distribution (using calculate) against a # comparison R RNG function compare_iid_samples <- function( greta_fun, r_fun, parameters, nsim = 200, p_value_threshold = 0.001 ) { greta_array <- do.call(greta_fun, parameters) # get information about distribution distribution <- get_node(greta_array)$distribution multivariate <- distribution$multivariate discrete <- distribution$discrete name <- distribution$distribution_name greta_samples <- calculate(greta_array, nsim = nsim)[[1]] r_samples <- do.call(r_fun, c(n = nsim, parameters)) # reshape to matrix or vector if (multivariate) { # if it's a symmetric matrix, take only a triangle and flatten it if (name %in% c("wishart", "lkj_correlation")) { include_diag <- name == "wishart" t_greta_samples <- apply(greta_samples, 1, get_upper_tri, include_diag) t_r_samples <- apply(r_samples, 1, get_upper_tri, include_diag) greta_samples <- t(t_greta_samples) r_samples <- t(t_r_samples) } else { dim <- dim(greta_samples) new_dim <- c(dim[1], dim[2] * dim[3]) dim(greta_samples) <- new_dim dim(r_samples) <- new_dim } } else { greta_samples <- as.vector(greta_samples) } # find a vaguely appropriate test if (discrete) { test <- ifelse(multivariate, combined_chisq_test, stats::chisq.test) } else { test <- ifelse(multivariate, cramer::cramer.test, stats::ks.test) } # do Kolmogorov Smirnov test on samples suppressWarnings(test_result <- test(greta_samples, r_samples)) testthat::expect_gte(test_result$p.value, p_value_threshold) } # is this a release candidate? skip_if_not_release <- function() { if (identical(Sys.getenv("RELEASE_CANDIDATE"), "true")) { return(invisible(TRUE)) } skip("Not a Release Candidate") } # run a geweke test on a greta model, providing: 'sampler' a greta sampler (e.g. # hmc()), a greta 'model', a 'data' greta array used as the target in the model, # the two IID random number generators for the data generating function # ('p_theta' = generator for the prior, 'p_x_bar_theta' = generator for the # likelihood), 'niter' the number of MCMC samples to compare check_geweke <- function( sampler, model, data, p_theta, p_x_bar_theta, niter = 2000, warmup = 1000, thin = 1 ) { # sample independently target_theta <- p_theta(niter) # sample with Markov chain greta_theta <- p_theta_greta( niter = niter, model = model, data = data, p_theta = p_theta, p_x_bar_theta = p_x_bar_theta, sampler = sampler, warmup = warmup ) geweke_checks <- list( target_theta = do_thinning(target_theta, thin), greta_theta = do_thinning(greta_theta, thin) ) geweke_checks } geweke_qq <- function(geweke_checks, title){ # visualise correspondence quants <- (1:99) / 100 q1 <- stats::quantile(geweke_checks$target_theta, quants) q2 <- stats::quantile(geweke_checks$greta_theta, quants) plot(q2, q1, main = title) graphics::abline(0, 1) } geweke_ks <- function(geweke_checks){ # do a formal hypothesis test suppressWarnings(stat <- stats::ks.test(geweke_checks$target_theta, geweke_checks$greta_theta)) stat } # sample from a prior on theta the long way round, fro use in a Geweke test: # gibbs sampling the posterior p(theta | x) and the data generating function p(x # | theta). Only retain the samples of theta from the joint distribution, p_theta_greta <- function( niter, model, data, p_theta, p_x_bar_theta, sampler = hmc(), warmup = 1000 ) { # set up and initialize trace theta <- rep(NA, niter) theta[1] <- p_theta(1) # set up and tune sampler draws <- mcmc( model, warmup = warmup, n_samples = 1, chains = 1, sampler = sampler, verbose = FALSE ) # set up a progress bar and do a first increment cli::cli_progress_bar("Geweke test iterations", total = niter) cli::cli_progress_update() # now loop through, sampling and updating x and returning theta for (i in 2:niter) { # update the progress bar cli::cli_progress_update() # sample x given theta x <- p_x_bar_theta(theta[i - 1]) # replace x in the node dag <- model$dag x_node <- get_node(data) x_node$value(as.matrix(x)) # rewrite the log prob tf function, and the tf function for the posterior # samples, now using this value of x (slow, but necessary in eager mode) dag$tf_log_prob_function <- NULL dag$define_tf_log_prob_function() sampler <- attr(draws, "model_info")$samplers[[1]] sampler$define_tf_evaluate_sample_batch() # take anoteher sample draws <- extra_samples( draws, n_samples = 1, verbose = FALSE ) # trace the sample theta[i] <- tail(as.numeric(draws[[1]]), 1) } # kill the progress_bar cli::cli_progress_done() theta } # test mcmc for models with analytic posteriors need_more_samples <- function(draws, target_samples = 5000) { neff <- coda::effectiveSize(draws) rhats <- coda::gelman.diag( x = draws, multivariate = FALSE, autoburnin = FALSE ) rhats <- rhats$psrf[, 1] converged <- all(rhats < 1.01) enough_samples <- all(neff >= target_samples) !(converged & enough_samples) } new_samples <- function(draws, target_samples = 5000) { neff <- min(coda::effectiveSize(draws)) efficiency <- neff / coda::niter(draws) 1.2 * (target_samples - neff) / efficiency } still_have_time <- function(start_time, time_limit = 300) { elapsed <- Sys.time() - start_time elapsed < time_limit } get_enough_draws <- function( model, sampler = sampler, n_effective = 5000, time_limit = 300, verbose = TRUE, one_by_one = FALSE ) { start_time <- Sys.time() draws <- mcmc( model, sampler = sampler, verbose = verbose, one_by_one = one_by_one ) while (need_more_samples(draws, n_effective) && still_have_time(start_time, time_limit)) { n_samples <- new_samples(draws, n_effective) draws <- extra_samples( draws, n_samples, verbose = verbose, one_by_one = one_by_one ) } if (need_more_samples(draws, n_effective)) { stop("could not draws enough effective samples within the time limit") } draws } # Monte Carlo standard error (using batch means) mcse <- function(draws) { n <- nrow(draws) b <- floor(sqrt(n)) a <- floor(n / b) group <- function(k) { idx <- ((k - 1) * b + 1):(k * b) colMeans(draws[idx, , drop = FALSE]) } bm <- vapply( seq_len(a), group, draws[1, ] ) if (is.null(dim(bm))) { bm <- t(bm) } mu_hat <- as.matrix(colMeans(draws)) ss <- sweep(t(bm), 2, mu_hat, "-")^2 var_hat <- b * colSums(ss) / (a - 1) sqrt(var_hat / n) } # absolute error of the estimate, scaled by the Monte Carlo standard error scaled_error <- function(draws, expectation) { draws <- as.matrix(draws) se <- mcse(draws) est <- colMeans(draws) abs(est - expectation) / se } # given a sampler (e.g. hmc()) and minimum number of effective samples, ensure # that the sampler can draw correct samples from a bivariate normal distribution check_mvn_samples <- function(sampler, n_effective = 3000) { # get multivariate normal samples mu <- as_data(t(rnorm(2, 0, 5))) sigma <- stats::rWishart(1, 3, diag(2))[, , 1] x <- multivariate_normal(mu, sigma) m <- model(x, precision = "single") draws <- get_enough_draws( m, sampler = sampler, n_effective = n_effective, verbose = FALSE ) # get MCMC samples for statistics of the samples (value, variance and # correlation of error wrt mean) err <- x - mu var <- (err)^2 corr <- prod(err) / prod(sqrt(diag(sigma))) err_var_corr <- c(err, var, corr) stat_draws <- calculate(err_var_corr, values = draws) # get true values of these - on average the error should be 0, and the # variance and correlation of the errors should encoded in Sigma stat_truth <- c( rep(0, 2), diag(sigma), cov2cor(sigma)[1, 2] ) # get absolute errors between posterior means and true values, and scale them # by time-series Monte Carlo standard errors (the expected amount of # uncertainty in the MCMC estimate), to give the number of standard errors # away from truth. There's a 1/100 chance of any one of these scaled errors # being greater than qnorm(0.99) if the sampler is correct errors <- scaled_error(stat_draws, stat_truth) errors } do_thinning <- function(x, thinning = 1) { idx <- seq(1, length(x), by = thinning) x[idx] } get_distribution_name <- function(x){ x_node <- get_node(x) if (inherits(x_node, "operation_node")){ dist_name <- x_node$parents[[1]]$distribution$distribution_name } else { dist_name <- get_node(x)$distribution$distribution_name } dist_name } # sample values of greta array 'x' (which must follow a distribution), and # compare the samples with iid samples returned by iid_function (which takes the # number of arguments as its sole argument), producing a labelled qqplot, and # running a KS test for differences between the two samples check_samples <- function( x, iid_function, sampler = hmc(), n_effective = 3000, title = NULL, one_by_one = FALSE, time_limit = 300 ) { m <- model(x, precision = "single") draws <- get_enough_draws( model = m, sampler = sampler, n_effective = n_effective, verbose = TRUE, one_by_one = one_by_one, time_limit = time_limit ) neff <- coda::effectiveSize(draws) iid_samples <- iid_function(neff) mcmc_samples <- as.matrix(draws) thin_amount <- find_thinning(draws) mcmc_samples <- do_thinning(mcmc_samples, thin_amount) iid_samples <- do_thinning(iid_samples, thin_amount) list( mcmc_samples = mcmc_samples, iid_samples = iid_samples, distrib = get_distribution_name(x), sampler_name = class(sampler)[1] ) } qqplot_checked_samples <- function(checked_samples, title){ distrib <- checked_samples$distrib sampler_name <- checked_samples$sampler_name title <- paste(distrib, "with", sampler_name) mcmc_samples <- checked_samples$mcmc_samples iid_samples <- checked_samples$iid_samples stats::qqplot( x = mcmc_samples, y = iid_samples, main = title ) graphics::abline(0, 1) } ## helpers for running Kolmogorov-Smirnov test for MCMC samples vs IID samples ks_test_mcmc_vs_iid <- function(checked_samples){ # do a formal hypothesis test suppressWarnings(stat <- ks.test(checked_samples$mcmc_samples, checked_samples$iid_samples)) stat } ## helpers for looping through optimisers run_opt <- function( m, optmr, max_iterations = 200 ) { opt( m, optimiser = optmr(), max_iterations = max_iterations ) } possibly_run_opt <- purrr::possibly(.f = run_opt, otherwise = "error") opt_df_run <- function(optimisers, m, x) { opt_df <- tibble::enframe( x = optimisers, name = "opt", value = "opt_fn" ) %>% dplyr::mutate( result = lapply(opt_fn, possibly_run_opt, m = m), x_val = list(x) ) opt_df } tidy_optimisers <- function(opt_df, tolerance = 1e-2) { opt_df %>% dplyr::select(-opt_fn) %>% tidyr::unnest_wider(col = c(result)) %>% dplyr::mutate( par = unname(purrr::flatten(par)), par_x_diff = purrr::map2( .x = par, .y = x_val, .f = function(.x, .y){ abs(.y - .x) }), close_to_truth = purrr::map_lgl( par_x_diff, function(x) all(x < tolerance) ) ) %>% dplyr::relocate( close_to_truth, par_x_diff, iterations, convergence, .after = opt ) } # find a thinning rate that sufficiently reduces autocorrelation in the samples # example # x <- normal(0, 1) # m <- model(x) # draws <- mcmc(m) # find_thinning(draws) find_thinning <- function(draws, max_thin = 100, autocorr_threshold = 0.01) { autocorr_list <- coda::autocorr(draws, lags = seq_len(max_thin)) autocorrs <- do.call(cbind, autocorr_list) mean_autocorr <- rowMeans(autocorrs) smallest_thin <- which(mean_autocorr < autocorr_threshold)[1] if (is.na(smallest_thin)) { smallest_thin <- max_thin cli::cli_warn( c( "Could not find a thinning value that reduces mean autocorrelation \\ below the threshold, {.val {autocorr_threshold}}.", "Using the maximum thinning amount: {.val {max_thin}}" ) ) } smallest_thin }