# 13th April 2022
# Code to implement NP Bayesian Calibration and Summaristion of Related 14C Determinations
# Original code written by TJHeaton and available at https://github.com/TJHeaton/NonparametricCalibration
# This code is adaped from NPCalibrationWalker to test that the R Package is giving the correct result
seednum <- 8
set.seed(seednum)

# Read in IntCal20 curve
calcurve <- read.table("tests/testthat/fixtures/helpers/intcal20.14c", sep = ",", header=FALSE, skip=11)
names(calcurve) <- c("calage", "c14age", "c14sig", "Delta14C", "DeltaSigma")

# Read in the necessary functions
source("tests/testthat/fixtures/helpers/SliceUpdateFunsFinal.R")
source('tests/testthat/fixtures/helpers/WalkerDirichletMixtureUpdateFunsFinal.R') # This also reads in the slice sampling SliceUpdateFuns.R
source('tests/testthat/fixtures/helpers/NealDirichletMixtureMasterFunctionsFinal.R')
source("tests/testthat/fixtures/helpers/WalkerMasterFunctionFinal.R")
source("tests/testthat/fixtures/helpers/SimStudyFuncsFinal.R")

# Read in data
# x - c14ages
# xsig - corresponding 1 sigma
Kerr <- read.csv("tests/testthat/fixtures/helpers/kerr2014sss_sup.csv", header = FALSE, sep =  ",")
x <- Kerr[,3]
xsig <- Kerr[,4]

# Only choose the first 100 points for speed
x <- x[1:100]
xsig <- xsig[1:100]
#############################################################################
# Now choose hyperparameters
############################################################
# Prior on the concentration parameter
# Place  a gamma prior on alpha
# alpha ~ Gamma(alphaprshape, alphaprrate)
# A small alpha means more concentrated (i.e. few clusters)
# Large alpha not concentrated (many clusters)
cprshape <- alphaprshape <- 1
cprrate <- alphaprrate <- 1

#### Updated adaptive version
# Prior on mu theta for DP - very uninformative based on observed data
IntCalyrgrid <- FindCal(1:50000, calcurve$c14ag, calcurve$calage, calcurve$c14sig)
initprobs <- mapply(calibind, x, xsig, MoreArgs = list(calmu = IntCalyrgrid$mu, calsig = IntCalyrgrid$sigma))
inittheta <- apply(initprobs, 2, which.max)
# Choose A and B from range of theta
A <- median(inittheta)
B <- 1 / (max(inittheta) - min(inittheta))^2
maxrange <- max(inittheta) - min(inittheta)

# Parameters for sigma2 (sigma^2 ~ InvGamma(nu1, nu2))
# E[tau] = (1/100)^2 Var[tau] = (1/100)^4
# Interval for sigma2 is approx 1/ c(nu2/nu1 - 2*nu2^2/nu1, nu2/nu1 + 2*nu2^2/nu1)
tempspread <- 0.1 * mad(inittheta)
tempprec <- 1/(tempspread)^2
nu1 <- 0.25
nu2 <- nu1 / tempprec

# Setup the NP method
lambda <- (100/maxrange)^2  # Each muclust ~ N(mutheta, sigma2/lambda)


# Choose number of iterations for sampler
niter <- 1e5
nthin <- 10 # Don't choose too high, after burn-in we have (niter/nthin)/2 samples from posterior to potentially use
npostsum <- 5000 # Current number of samples it will draw from this posterior to estimate fhat (possibly repeats)

slicew <- max(1000, diff(range(x))/2)
m <- 10
kstar <- 10

save(x, xsig, lambda, nu1, nu2, A, B, cprshape, cprrate, niter, nthin, inittheta, slicew, m, kstar, seednum, file="tests/testthat/fixtures/walker_input.rda")

WalkerTemp <- WalkerBivarDirichlet(x = x, xsig = xsig,
                                   lambda = lambda, nu1 = nu1, nu2 = nu2,
                                   A = A, B = B,
                                   cprshape = cprshape, cprrate = cprrate,
                                   niter = niter, nthin = nthin, theta = inittheta,
                                   slicew = slicew, m = m, calcurve = calcurve, kstar = kstar)

##############################
# Also find the SPD estimate to plot alongside  -> New function CreateSPD
##############################
# Find the independent calibration probabilities
yrange <- floor(range(WalkerTemp$theta))
yfromto <- seq(max(0,yrange[1]-400), min(50000, yrange[2]+400), by = 1)

# Find the calibration curve mean and sd over the yrange
CurveR <- FindCalCurve(yfromto, calcurve)

# Now we want to apply to each radiocarbon determination
# Matrix where each column represents the posterior probability of each theta in yfromto
indprobs <- mapply(calibind, x, xsig, MoreArgs = list(calmu = CurveR$curvemean, calsig = CurveR$curvesd))

# Find the SPD estimate (save as dataframe)
SPD <- data.frame(calage = yfromto,
                  prob = apply(indprobs, 1, sum)/dim(indprobs)[2])

####### End of new function

# To plot the predictive distribution then you run
seednum <- 14
set.seed(seednum)
source("tests/testthat/fixtures/helpers/WalkerPostProcessingFinal.R")

save(x, xsig, postdenCI, postden, tempx, file = "tests/testthat/fixtures/walker_output.rda")

# To access the posterior calendar age estimate for individual determination then you can look at:
# WalkerTemp$theta[,10] # MCMC chain for 10th determination (will need to remove burn in)

# If we want to plot e.g. the posterior calendar age density against the curve then we can run the below
# ident is the determination you want to calibrate
ident <- 10
resolution <- 10
indpost <- plotindpost(WalkerTemp, ident = ident, y = x, er = xsig, calcurve = calcurve, resolution = resolution)