# Helper fns for testthat

########################### SIMULATE META-ANALYSIS DATA (FROM MRM) ###########################

# potentially with clustering
# notes:
# - number of actually generated clusters could be less than m
#  because we randomly draw from the m clusters with replacement (to allow for k not divisible by m)
# - if bc = bb = 0 (i.e., moderators have no effect), then mu will be static within clusters and equal to zeta1 + b0
# see helper_MRM_sanity_checks.R for extensive sanity checks of this function

sim_data2 = function( k, # total number of studies
                      m = k, # number of clusters (m=k implies no clustering)
                      b0, # intercept
                      bc, # effect of continuous moderator
                      bb, # effect of binary moderator 
                      V,  # TOTAL residual heterogeneity after conditioning on moderators (including within- and between-cluster variance)
                      Vzeta = 0, # between-cluster variance (must be less than V)
                      muN, 
                      minN,
                      sd.w, 
                      true.effect.dist) {
  
  # # @test for m=k case
  # # TEST ONLY
  # k = 3
  # m = 3
  # b0 = 0 # intercept
  # bc = 0 # effect of continuous moderator
  # bb = 0 # effect of binary moderator
  # V = .5
  # Vzeta = 0.25
  # muN = 100
  # minN = 50
  # sd.w = 1
  # true.effect.dist = "expo"
  
  
  if ( Vzeta > V ) stop( "Vzeta must be less than or equal to V" )
  
  
  # assign studies to clusters
  # each in own cluster:
  if (m == k) cluster = 1:k  # each in its own cluster
  # k not divisble by m:
  # fine if k isn't divisible by m (number of clusters); clusters will just be unbalanced and actual number of cluster might be less than m 
  #if ( m < k ) cluster = sample( 1:m, size = k, replace = TRUE )
  # k not divisible by m: assign each observation to a cluster chosen at random (unbalanced clusters)
  if ( m < k & (k %% m != 0) ) cluster = sample( 1:m, size = k, replace = TRUE )
  # k divisible by m: assign observations to clusters in a balanced way
  if ( m < k & (k %% m == 0) ) cluster = rep(1:m, each = k/m)
  
  if (m > k) stop("m must be <= k")
  
  cluster = sort(cluster)
  
  # generate cluster random intercepts (zeta)
  # these are normal even when true effect dist is exponential
  zeta1 = rnorm( n = m, mean = 0, sd = sqrt( Vzeta ) )  # one entry per cluster
  zeta1i = zeta1[cluster]  # one entry per study
  
  
  # simulate k studies
  for (i in 1:k) {
    study = sim_one_study2( b0, # intercept
                            bc, # effect of continuous moderator
                            bb, # effect of binary moderator
                            V = V, 
                            Vzeta = Vzeta, 
                            zeta1 = zeta1i[i], # cluster random intercept for this study's cluster
                            muN = muN,
                            minN = minN,
                            sd.w = sd.w,
                            true.effect.dist = true.effect.dist)
    
    if ( i == 1 ) d = study else d = rbind( d, study )
  }
  
  # add cluster indicator
  d = d %>% mutate( cluster, .before = 1) 
  
  # ICC of study population effects within clusters (will be static for dataset)
  d$icc = ICCbareF( x = as.factor(d$cluster), 
                    y = d$Mi )  
  
  return(d)
}


# potentially with clustering
sim_one_study2 = function(b0, # intercept
                          bc, # effect of continuous moderator
                          bb, # effect of binary moderator
                          V, 
                          Vzeta, # used to calcuate within-cluster variance
                          zeta1,  # scalar cluster random intercept for this study's cluster
                          muN,
                          minN,
                          sd.w,
                          true.effect.dist = "normal"
) {
  
  # # @test for m=1 case
  # # TEST ONLY
  # b0 = 0.5 # intercept
  # bc = 0.5 # effect of continuous moderator
  # bb = 1 # effect of binary moderator
  # V = .5
  # Vzeta = 0.25
  # zeta1 = -0.2
  # muN = 100
  # minN = 50
  # sd.w = 1
  # true.effect.dist = "normal"
  
  if( !true.effect.dist %in% c("normal", "expo") ) stop("True effect dist not recognized")
  
  ##### Simulate Sample Size and Fixed Design Matrix for This Study #####
  # simulate total N for this study
  N = round( runif( n = 1, min = minN, max = minN + 2*( muN - minN ) ) ) # draw from uniform centered on muN
  
  # simulate study-level moderators (each a scalar)
  Zc = rnorm( n = 1, mean = 0, sd = 1)
  Zb = rbinom( n = 1, size = 1, prob = 0.5)
  
  # mean (i.e., linear predictor) conditional on the moderators and cluster membership
  mu = b0 + zeta1 + bc*Zc + bb*Zb
  # all that follows is that same as in NPPhat, except incorporating clustering as in SAPB
  
  ##### Draw a Single Population True Effect for This Study #####
  if ( true.effect.dist == "normal" ) {
    Mi = rnorm( n=1, mean=mu, sd=sqrt(V - Vzeta) )
  }
  if ( true.effect.dist == "expo" ) {
    # within-cluster variance = total - between
    Vwithin = V - Vzeta
    # set the rate so the heterogeneity is correct
    Mi = rexp( n = 1, rate = sqrt(1/Vwithin) )
    # now the mean is sqrt(V) rather than mu
    # shift to have the correct mean (in expectation)
    Mi = Mi + (mu - sqrt(Vwithin))
  }
  
  ###### Simulate Data For Individual Subjects ######
  # group assignments
  X = c( rep( 0, N/2 ), rep( 1, N/2 ) )
  
  # simulate continuous outcomes
  # 2-group study of raw mean difference with means 0 and Mi in each group
  # and same SD
  Y = c( rnorm( n = N/2, mean = 0, sd = sd.w ),
         rnorm( n = N/2, mean = Mi, sd = sd.w ) )
  
  # calculate ES for this study using metafor (see Viechtbauer "Conducting...", pg 10)
  require(metafor)
  ES = escalc( measure="SMD",   
               n1i = N/2, 
               n2i = N/2,
               m1i = mean( Y[X==1] ),
               m2i = mean( Y[X==0] ),
               sd1i = sd( Y[X==1] ),
               sd2i = sd( Y[X==0] ) ) 
  yi = ES$yi
  vyi = ES$vi
  
  return( data.frame( Mi, # study's true effect size; if within-cluster heterogeneity is zero, will be equal to mu
                      mu, # study's linear predictor conditional on the moderators and cluster membership
                      zeta1,
                      Zc,
                      Zb,
                      yi,
                      vyi ) )
}


# return the threshold q that is the TheoryP^th quantile 
#  when moderators are set to zc.star and zb.star
calculate_theory_p = function(true.effect.dist, 
                              q,
                              b0,
                              bc,
                              bb,
                              zc,   
                              zb,
                              V){
  
  # get the mean for this combination of moderators
  mu = b0 + bc*zc + bb*zb
  
  if ( true.effect.dist == "normal" ) {
    return( 1 - pnorm( q = q,
                       mean = mu,
                       sd = sqrt(V) ) )
  }
  
  if ( true.effect.dist == "expo" ) {
    # we generate from a exponential, then shift to achieve the correct mean, 
    #  so q is the threshold BEFORE shifting
    # here is the data generation code from sim_data:
    # Mi = rexp( n = 1, rate = sqrt(1/V) )
    # Mi = Mi + (mu - sqrt(V))
    # is this first line right?
    #stop("Not tested/written yet")
    q0 = q - ( mu - sqrt(V) )
    return( pexp( q = q0,
                  rate = sqrt(1/V),
                  lower.tail = FALSE) )
  }
  
  if ( true.effect.dist == "unif2" ) {
    stop("unif2 method not tested/written yet")
    # return( qunif2( p = 1 - TheoryP, 
    #                 mu = mu, 
    #                 V = V) )
  }
  
  if (true.effect.dist == "t.scaled") {
    stop("t.scaled method not tested/written yet")
    # from metRology package
    # return( qt.scaled(p = 1 - TheoryP,
    #                   df = 3,
    #                   mean = mu,
    #                   sd = sqrt(V) ) )
  }
  
  else stop("true.effect.dist not recognized.")
}