context("distributions") test_succeeds("Normal distribution works", { d <- tfd_normal(loc = c(1, 2), scale = c(11, 22)) x <- d %>% tfd_sample(c(2, 2)) expect_equal(d$batch_shape$as_list(), 2) }) test_succeeds("MultivariateNormalDiag distribution works", { d <- tfd_multivariate_normal_diag(loc = c(1,-1), scale_diag = c(1, 2)) x <- d %>% tfd_sample() expect_equal(d$batch_shape$as_list(), list()) }) test_succeeds("Independent distribution works", { d <- tfd_normal( loc = c(-1., 1, 5, 2), scale = c(0.1, 0.5, 1.4, 6) ) i <- tfd_independent( distribution = d, reinterpreted_batch_ndims = 1 ) i expect_equal(i$event_shape$as_list(), 4) # Make independent distribution from a 28*28-batch Bernoulli. d <- tfd_bernoulli( probs = matrix(rep(0.5, 28 * 28), ncol = 28) ) i <- tfd_independent( distribution = d, reinterpreted_batch_ndims = 2 ) i expect_equal(i$event_shape$ndims, 2) }) test_succeeds("Transformed distribution works", { d <- tfd_transformed_distribution( distribution = tfd_normal(loc = 0, scale = 1), bijector = tfb_exp() ) expect_equal(d$event_shape$ndims, 0) }) test_succeeds("Relaxed one hot categorical distribution works", { s <- tfd_relaxed_one_hot_categorical(temperature = 0, logits = c(1, 1)) %>% tfd_sample(10) %>% tensor_value() expect_equal(s, array(NaN, c(10, 2))) s <- tfd_relaxed_one_hot_categorical(temperature = 1e-10, logits = c(1e5, -1e5)) %>% tfd_sample(10) %>% tensor_value() expect_equal(s, array(c(rep(1, 10), rep(0, 10)), dim = c(10, 2))) s <- tfd_relaxed_one_hot_categorical(temperature = 1e-10, probs = c(0, 1)) %>% tfd_sample(10) %>% tensor_value() expect_equal(s, array(c(rep(0, 10), rep(1, 10)), dim = c(10, 2))) }) test_succeeds("One hot categorical distribution works", { s <- tfd_one_hot_categorical(logits = c(-1e5, 1e5)) %>% tfd_sample(10) %>% tensor_value() expect_equal(s, array(c(rep(0, 10), rep(1, 10)), dim = c(10, 2))) s <- tfd_one_hot_categorical(probs = c(1, 0)) %>% tfd_sample(10) %>% tensor_value() expect_equal(s, array(c(rep(1, 10), rep(0, 10)), dim = c(10, 2))) s <- tfd_one_hot_categorical(probs = c(0.5, 0.5)) %>% tfd_sample(10) %>% tensor_value() expect_identical(s %in% c(0, 1), rep(TRUE, 20)) }) test_succeeds("Relaxed Bernoulli distribution works", { s <- tfd_relaxed_bernoulli(temperature = 1e-10, logits = c(1e5, -1e5)) %>% tfd_sample(10) %>% tensor_value() expect_equal(s, array(c(rep(1, 10), rep(0, 10)), dim = c(10, 2))) s <- tfd_relaxed_bernoulli(temperature = 1e-10, probs = c(0, 1)) %>% tfd_sample(10) %>% tensor_value() expect_equal(s, array(c(rep(0, 10), rep(1, 10)), dim = c(10, 2))) }) test_succeeds("Zipf distribution works", { skip_if(tfp_version() >= "0.17") # TypeError: got an unexpected keyword argument 'interpolate_nondiscrete' batch_size <- 12 power <- rep(3, batch_size) x <- c(-3, -0.5, 0, 2, 2.2, 3, 3.1, 4, 5, 5.5, 6, 7.2) zipf <- tfd_zipf(power = power, interpolate_nondiscrete = FALSE) log_pmf <- zipf %>% tfd_log_prob(x) expect_equal(log_pmf$get_shape()$as_list(), batch_size) }) test_succeeds("Wishart distribution works", { skip_if_tfp_above("0.8") s <- matrix(c(1, 2, 2, 5), ncol = 2, byrow = TRUE) df <- 4 d <- tfd_wishart(df = df, scale_tril = tf$linalg$cholesky(s)) expect_equal(tfd_mean(d) %>% tensor_value(), df * s) }) test_succeeds("VonMisesFisher distribution works", { mean_dirs <- tf$nn$l2_normalize( matrix(c(1, 1, -2, 1, 0, -1), ncol =2, byrow = TRUE), axis = -1L) concentration <- matrix(c(0, 0.1, 2, 40, 1000)) d = tfd_von_mises_fisher( mean_direction = mean_dirs, concentration = concentration, #validate_args = TRUE, ### this does not work allow_nan_stats = FALSE) expect_equal(d$batch_shape$as_list(), c(5, 3)) expect_equal(d$event_shape$as_list(), c(2)) }) test_succeeds("VonMises distribution works", { x <- c(2, 3, 4, 5, 6, 7) d <- tfd_von_mises(0.1, 0) log_prob <- d %>% tfd_log_prob(x) expect_equivalent(log_prob %>% tensor_value(), rep(-log(2 * pi), 6), tol = 1e-6) }) test_succeeds("VectorSinhArcsinhDiag distribution works", { skip_if_tfp_above("0.9") n <- 10 scale_diag <- runif(n) scale_identity_multiplier <- 1 loc = rnorm(n) norm = tfd_multivariate_normal_diag( loc = loc, scale_diag = scale_diag, validate_args = TRUE) vsad = tfd_vector_sinh_arcsinh_diag( loc = loc, scale_diag = scale_diag, validate_args = TRUE) x <- matrix(rnorm(5 * n), ncol = n) normal_prob <- norm %>% tfd_prob(x) vsad_prob <- vsad %>% tfd_prob(x) expect_equal(normal_prob %>% tensor_value(), vsad_prob %>% tensor_value()) }) test_succeeds("VectorLaplaceLinearOperator distribution works", { skip_if_tfp_above("0.9") mu <- c(1, 2, 3) cov <- matrix( c(0.36, 0.12, 0.06, 0.12, 0.29,-0.13, 0.06,-0.13, 0.26), nrow = 3, byrow = TRUE ) scal <- tf$linalg$cholesky(cov) %>% tf$cast(tf$float32) vla <- tfd_vector_laplace_linear_operator( loc = mu, scal = tf$linalg$LinearOperatorLowerTriangular(scal / tf$sqrt(2)) ) vla %>% tfd_covariance() %>% tensor_value() expect_equal(vla %>% tfd_covariance() %>% tensor_value(), cov, tol = 1e-6) }) test_succeeds("VectorLaplaceDiag distribution works", { skip_if_tfp_above("0.9") d <- tfd_vector_laplace_diag(loc = matrix(rep(0, 6), ncol =3)) expect_equivalent(d %>% tfd_stddev() %>% tensor_value(), rep(sqrt(2), 3), tol = 1e-8) }) test_succeeds("VectorExponentialDiag distribution works", { skip_if(tfp_version() >= "0.17") # AttributeError: module 'tensorflow_probability.python.distributions' has no attribute 'VectorExponentialDiag' d <- tfd_vector_exponential_diag(loc = c(-1, 1),scale_diag = c(1, -5)) expect_equivalent(d %>% tfd_mean() %>% tensor_value(), c(-1 + 1, 1 - 5), tol = 1e-8) }) test_succeeds("VectorExponentialDiag distribution works", { skip_if(tfp_version() >= "0.17") # AttributeError: module 'tensorflow_probability.python.distributions' has no attribute 'vector_exponential_linear_operator' s <- matrix(c(1, 0.1, 0.1, 1), ncol = 2) d <- tfd_vector_exponential_linear_operator(scale = tf$linalg$LinearOperatorFullMatrix(s)) skip("Batch dim behavior changed") expect_equivalent(d %>% tfd_mean() %>% tensor_value(), c(1.1, 1.1), tol = 1e-8) }) test_succeeds("VectorDiffeoMixture distribution works", { skip_if_tfp_above("0.10") dims <- 5L d <- tfd_vector_diffeomixture( mix_loc = list(c(0, 1)), temperature = list(1), distribution = tfd_normal(loc = 0, scale = 1), loc = list(NULL, rep(2, 5)), scale = list( tf$linalg$LinearOperatorScaledIdentity( num_rows = dims, multiplier = 1.1, is_positive_definite = TRUE), tf$linalg$LinearOperatorDiag( diag = seq(2.5, 3.5, length.out = 5), is_positive_definite = TRUE))) expect_equal((d %>% tfd_mean())$get_shape()$as_list(), c(1, 5)) }) test_succeeds("VariationalGaussianProcess distribution works", { # Important: # This test only creates the distribution and does not train it. # Consider enhancing as per examples in # https://github.com/tensorflow/probability/blob/master/tensorflow_probability/python/distributions/variational_gaussian_process.py # Create kernel with trainable parameters, and trainable observation noise # variance variable. Each of these is constrained to be positive. amplitude <- tf$nn$softplus(tf$Variable(-1, name = 'amplitude')) length_scale <- 1e-5 + tf$nn$softplus(tf$Variable(-3, name = 'length_scale')) kernel = (get_psd_kernels())$ExponentiatedQuadratic(amplitude = amplitude, length_scale = length_scale) observation_noise_variance <- tf$nn$softplus(tf$Variable(0, name = 'observation_noise_variance')) # Create trainable inducing point locations and variational parameters. num_inducing_points <- 20L initial_inducing_points <- matrix(seq(-13, 13, length.out = num_inducing_points), nrow = num_inducing_points) %>% tf$cast(tf$float32) inducing_index_points <- tf$Variable(initial_inducing_points, name = 'inducing_index_points') variational_inducing_observations_loc <- tf$Variable(rep(0, num_inducing_points) %>% tf$cast(tf$float32), name = 'variational_inducing_observations_loc') variational_inducing_observations_scale <- tf$Variable(diag(num_inducing_points) %>% tf$cast(tf$float32), name = 'variational_inducing_observations_scale') # These are the index point locations over which we'll construct the # (approximate) posterior predictive distribution. num_predictive_index_points <- 500 index_points <- matrix(seq(-13, 13, length.out = num_predictive_index_points), nrow = num_predictive_index_points) %>% tf$cast(tf$float32) # Construct our variational GP Distribution instance. vgp = tfd_variational_gaussian_process( kernel, index_points = index_points, inducing_index_points = inducing_index_points, variational_inducing_observations_loc = variational_inducing_observations_loc, variational_inducing_observations_scale = variational_inducing_observations_scale, observation_noise_variance = observation_noise_variance ) }) test_succeeds("Uniform distribution works", { d <- tfd_uniform(low = 3, high = c(5, 6, 7)) expect_equivalent(d %>% tfd_mean() %>% tensor_value(), c(4, 4.5, 5), tol = 1e-6) }) test_succeeds("Truncated normal distribution works", { d <- tfd_truncated_normal(loc = c(0, 1), scale = 1, low = c(-1, 0), high = c(1, 1)) m <- d %>% tfd_mean() expect_equal(m$get_shape()$as_list(), 2) }) test_succeeds("Triangular distribution works", { d <- tfd_triangular(low = 3, high = 7, peak = 5) expect_equivalent(d %>% tfd_mean() %>% tensor_value(), 5) }) test_succeeds("Student T distribution works", { d <- tfd_student_t(df = c(5,6), loc = 0, scale = 1) sds <- d %>% tfd_stddev() %>% tensor_value() expect_gt(sds[1], sds[2]) }) test_succeeds("Student T process works", { num_points <- 100 index_points <- seq(-1., 1., length.out = num_points) %>% matrix(nrow = num_points) kernel <- (get_psd_kernels())$ExponentiatedQuadratic() d <- tfd_student_t_process(df = 3, kernel = kernel, index_points = index_points) noisy_samples <- d %>% tfd_sample(10) expect_equal(noisy_samples$get_shape()$as_list(), c(10, 100)) }) test_succeeds("SinhArcsinh distribution works", { n <- 10 scale <- runif(n) loc <- rnorm(n) norm = tfd_normal( loc = loc, scale = scale) vsad = tfd_sinh_arcsinh( loc = loc, scale = scale) x <- matrix(rnorm(5 * n), ncol = n) normal_prob <- norm %>% tfd_prob(x) vsad_prob <- vsad %>% tfd_prob(x) expect_equal(normal_prob %>% tensor_value(), vsad_prob %>% tensor_value(), tol = 1e-6) }) test_succeeds("Quantized distribution works", { scale <- 1 loc <- 0 q = tfd_quantized(tfd_normal( loc = loc, scale = scale)) x <- c(0.1, 0.4, 1.2) q_prob <- q %>% tfd_cdf(x) expect_equal(q_prob %>% tensor_value() %>% which.max(), 3) }) test_succeeds("Poisson distribution works", { skip_if(tfp_version() >= "0.17") # TypeError: got an unexpected keyword argument 'interpolate_nondiscrete' lambda <- c(1, 3, 2.5) d <- tfd_poisson(rate = lambda) expect_equivalent(d %>% tfd_stddev() %>% tensor_value(), sqrt(lambda), tol = 1e-7) }) test_succeeds("PoissonLogNormalQuadratureCompound distribution works", { d <- tfd_poisson_log_normal_quadrature_compound( loc = c(0.,-0.5), scale = 1, quadrature_size = 10 ) expect_equal((d %>% tfd_stddev())$get_shape()$as_list(), 2) }) test_succeeds("Pareto distribution works", { d <- tfd_pareto(2) expect_equal(d %>% tfd_mode() %>% tensor_value(), 1) }) test_succeeds("NegativeBinomial distribution works", { d <- tfd_negative_binomial(total_count = 23, probs = 0.1) nb_mean <- function(r, p) r * p /(1 - p) expect_equal(d %>% tfd_mean() %>% tensor_value(), nb_mean(23, 0.1), tol = 1e-7) }) test_succeeds("MultivariateNormalTriL distribution works", { mu <- c(1, 2, 3) cov <- matrix(c(0.36, 0.12, 0.06, 0.12, 0.29, -0.13, 0.06, -0.13, 0.26), nrow = 3, byrow =TRUE) scale <- tf$linalg$cholesky(cov) d <- tfd_multivariate_normal_tri_l(loc = mu, scale_tril = scale) expect_equivalent(d %>% tfd_mean() %>% tensor_value(), mu) }) test_succeeds("MultivariateNormalLinearOperator distribution works", { mu <- c(1, 2, 3) cov <- matrix(c(0.36, 0.12, 0.06, 0.12, 0.29, -0.13, 0.06, -0.13, 0.26), nrow = 3, byrow =TRUE) scale <- tf$linalg$cholesky(cov) d <- tfd_multivariate_normal_linear_operator(loc = mu, scale = tf$linalg$LinearOperatorLowerTriangular(scale)) expect_equivalent(d %>% tfd_covariance() %>% tensor_value(), cov) }) test_succeeds("MultivariateNormalFullCovariance distribution works", { mu <- c(1, 2, 3) cov <- matrix(c(0.36, 0.12, 0.06, 0.12, 0.29, -0.13, 0.06, -0.13, 0.26), nrow = 3, byrow =TRUE) d <- tfd_multivariate_normal_full_covariance(loc = mu, covariance_matrix = cov) expect_equivalent(d %>% tfd_mean() %>% tensor_value(), mu) }) test_succeeds("MultivariateNormalDiagPlusLowRank distribution works", { # Initialize a single 3-variate Gaussian with covariance `cov = S @ S.T`, # `S = diag(d) + U @ diag(m) @ U.T`. The perturbation, `U @ diag(m) @ U.T`, is # a rank-2 update. mu <- c(-0.5, 0, 0.5) d <- c(1.5, 0.5, 2) U <- matrix(c(1, 2, -1, 1, 2, -0.5), nrow = 3, byrow = TRUE) m <- c(4, 5) d <- tfd_multivariate_normal_diag_plus_low_rank(loc = mu, scale_diag = d, scale_perturb_factor = U, scale_perturb_diag = m) expect_equal((d %>% tfd_prob(c(-1, 0, 1)))$get_shape()$as_list(), list()) }) test_succeeds("MultivariateStudentTLinearOperator distribution works", { df <- 3 loc <- c(1, 2, 3) scale <- matrix(c(0.6, 0, 0, 0.2, 0.5, 0, 0.1, -0.3, 0.4), nrow = 3, byrow = TRUE) sigma = tf$matmul(scale, scale, adjoint_b = TRUE) d <- tfd_multivariate_student_t_linear_operator( df = df, loc = loc, scale = tf$linalg$LinearOperatorLowerTriangular(scale)) expect_equivalent(d %>% tfd_covariance() %>% tensor_value(), cov * 3) }) test_succeeds("Multinomial distribution works", { p <- list(c(.1, .2, .7), c(.3, .3, .4)) total_count <- c(4, 5) d <- tfd_multinomial(total_count = total_count, probs = p) counts <- list(c(2, 1, 1), c(3, 1, 1)) expect_equal((d %>% tfd_prob(counts))$get_shape()$as_list(), 2) expect_equal((d %>% tfd_sample(5))$get_shape()$as_list(), c(5, 2, 3)) }) test_succeeds("Mixture distribution works", { mix <- 0.3 d <- tfd_mixture( cat = tfd_categorical(probs = c(mix, 1 -mix)), components = list( tfd_normal(loc = -1, scale = 0.1), tfd_normal(loc = 1, scale = 0.5))) expect_equal((d %>% tfd_sample(5))$get_shape()$as_list(), 5) }) test_succeeds("Categorical distribution works", { d <- tfd_categorical(logits = log(c(0.1, 0.5, 0.4))) empirical_prob <- tf$cast(tf$histogram_fixed_width(d %>% tfd_sample(1e4), c(0L, 2L), nbins = 3L), dtype = tf$float32) / 1e4 expect_equal(which.max(empirical_prob %>% tensor_value()), 2) }) test_succeeds("Mixture same family distribution works", { d <- tfd_mixture_same_family( mixture_distribution = tfd_categorical(probs = c(0.3, 0.7)), components_distribution = tfd_normal(loc = c(-1, 1), scale = c(0.1, 0.5)) ) expect_equal(d %>% tfd_mean() %>% tensor_value(), 0.4, tol = 1e-7) }) test_succeeds("Log normal distribution works", { mu <- 1.3 v <- 0.5 d <- tfd_log_normal(loc = mu, scale = sqrt(v)) expected_mean <- exp(mu + v/2) expect_equal(d %>% tfd_mean() %>% tensor_value(), expected_mean) }) test_succeeds("Logistic distribution works", { d <- tfd_logistic(loc = c(1, 2), scale = c(11, 22)) d %>% tfd_prob(c(0, 1.5)) expect_equal((d %>% tfd_prob(c(0, 1.5)))$get_shape()$as_list(), 2) }) test_succeeds("LKJ distribution works", { d <- tfd_lkj(dimension = 3, concentration = 1.5) prob <- d %>% tfd_prob(array(rnorm(2 * 3 * 3), dim = c(2, 3, 3))) expect_equal(prob$get_shape()$as_list(), 2) }) test_succeeds("CholeskyLKJ distribution works", { skip_if_tfp_below("0.9") d <- tfd_cholesky_lkj(dimension = 3, concentration = 1.5) prob <- d %>% tfd_prob(array(rnorm(2 * 3 * 3), dim = c(2, 3, 3))) expect_equal(prob$get_shape()$as_list(), 2) }) test_succeeds("LinearGaussianStateSpaceModel distribution works", { ndims <- 2L step_std <- 1 noise_std <- 5 d <- tfd_linear_gaussian_state_space_model( num_timesteps = 100, transition_matrix = tf$linalg$LinearOperatorIdentity(ndims), transition_noise = tfd_multivariate_normal_diag( scale_diag = step_std^2 * tf$ones(list(ndims))), observation_matrix = tf$linalg$LinearOperatorIdentity(ndims), observation_noise = tfd_multivariate_normal_diag( scale_diag = noise_std^2 * tf$ones(list(ndims))), initial_state_prior = tfd_multivariate_normal_diag( scale_diag = noise_std^2 * tf$ones(list(ndims)))) expect_equal((d %>% tfd_sample())$get_shape()$as_list(), c(100, 2)) }) test_succeeds("Kumaraswamy distribution works", { alpha <- c(1, 2, 3) beta <- c(1, 2, 3) d <- tfd_kumaraswamy(alpha, beta) x <- matrix(c(0.1, 0.4, 0.5, 0.2, 0.3, 0.5), ncol = 3) prob <- d %>% tfd_prob(x) expect_equal(prob$get_shape()$as_list(), c(2, 3)) }) test_succeeds("Exponential distribution works", { d <- tfd_exponential(rate = 0.1) expect_equal(d %>% tfd_mean() %>% tensor_value(), 10) }) test_succeeds("Gamma distribution works", { d <- tfd_gamma(concentration = 1, rate = 2) expect_equal(d %>% tfd_mean() %>% tensor_value(), 0.5) }) test_succeeds("JointDistributionSequential distribution works", { skip_if_tfp_below("0.9") d <- tfd_joint_distribution_sequential( list( # e tfd_independent(tfd_exponential(rate = c(100, 120)), 1, name = "e"), # g function(e) tfd_gamma(concentration = e[1], rate = e[2], name = "g"), # n tfd_normal(loc = 0, scale = 2, name = "n1"), # m function(n1, g) tfd_normal(loc = n1, scale = g, name = "n2"), # x function(n2) tfd_sample_distribution(tfd_bernoulli(logits = n2), 12, name = "s") )) x <- d %>% tfd_sample() expect_equal(length(x), 5) expect_equal((d %>% tfd_log_prob(x))$get_shape()$as_list(), list()) expect_equal(d$resolve_graph() %>% length(), 5) }) test_succeeds("Track JointDistributionSequential broadcasting/reshaping changes", { df <- cbind(iris[1:100, 1:4], as.integer(iris[1:100, 5]) - 1) model <- tfd_joint_distribution_sequential( list(# b1 tfd_normal(loc = 0, scale = 0.5), # b2 tfd_normal(loc = 0, scale = 0.5), # b3 tfd_normal(loc = 0, scale = 0.5), # b4 tfd_normal(loc = 0, scale = 0.5), function(b4, b3, b2, b1) tfd_independent( tfd_binomial( total_count = 1, logits = tf$expand_dims(b1, axis = -1L) * df$Sepal.Length + tf$expand_dims(b2, axis = -1L) * df$Sepal.Width + tf$expand_dims(b3, axis = -1L) * df$Petal.Length + tf$expand_dims(b4, axis = -1L) * df$Petal.Width ), reinterpreted_batch_ndims = 1 ) ) ) # this SHOULD work as well but does not # model <- tfd_joint_distribution_sequential( # list(# b1 # tfd_normal(loc = 0, scale = 0.5), # # b2 # tfd_normal(loc = 0, scale = 0.5), # # b3 # tfd_normal(loc = 0, scale = 0.5), # # b4 # tfd_normal(loc = 0, scale = 0.5), # function(b4, b3, b2, b1) # tfd_independent( # tfd_binomial( # total_count = 1, # logits = # b1[reticulate::py_ellipsis(), tf$newaxis] * df$Sepal.Length + # b2[reticulate::py_ellipsis(), tf$newaxis] * df$Sepal.Width + # b3[reticulate::py_ellipsis(), tf$newaxis] * df$Petal.Length + # b4[reticulate::py_ellipsis(), tf$newaxis] * df$Petal.Width # ), # reinterpreted_batch_ndims = 1 # ) # ) # ) samples <- model %>% tfd_sample(3) model %>% tfd_log_prob(samples) }) test_succeeds("JointDistributionNamed distribution works", { skip_if_tfp_below("0.9") d <- tfd_joint_distribution_named( list( e = tfd_independent(tfd_exponential(rate = c(100, 120)), 1), g = function(e) tfd_gamma(concentration = e[1], rate = e[2]), n = tfd_normal(loc = 0, scale = 2), m = function(n, g) tfd_normal(loc = n, scale = g), x = function(m) tfd_sample_distribution(tfd_bernoulli(logits = m), 12) )) x <- d %>% tfd_sample() expect_equal(length(x), 5) expect_equal((d %>% tfd_log_prob(x))$get_shape()$as_list(), list()) expect_equal(d$resolve_graph() %>% length(), 5) }) test_succeeds("Inverse Gaussian distribution works", { d <- tfd_inverse_gaussian(loc = 1, concentration = 1) expect_equal(d %>% tfd_mean() %>% tensor_value(), 1) }) test_succeeds("Inverse Gamma distribution works", { d <- tfd_inverse_gamma(concentration = 1.5, scale = 2) expect_equal(d %>% tfd_mean() %>% tensor_value(), 2/(1.5-1)) }) test_succeeds("Horseshoe distribution works", { d <- tfd_horseshoe(scale = 2) expect_equal(d %>% tfd_mean() %>% tensor_value(), 0) }) test_succeeds("Hidden Markov Model distribution works", { # Represent a cold day with 0 and a hot day with 1. # Suppose the first day of a sequence has a 0.8 chance of being cold. # We can model this using the categorical distribution: initial_distribution <- tfd_categorical(probs = c(0.8, 0.2)) # Suppose a cold day has a 30% chance of being followed by a hot day # and a hot day has a 20% chance of being followed by a cold day. # We can model this as: transition_distribution <- tfd_categorical(probs = matrix(c(0.7, 0.3, 0.2, 0.8), nrow = 2, byrow = TRUE) %>% tf$cast(tf$float32)) # Suppose additionally that on each day the temperature is # normally distributed with mean and standard deviation 0 and 5 on # a cold day and mean and standard deviation 15 and 10 on a hot day. # We can model this with: observation_distribution <- tfd_normal(loc = c(0, 15), scale = c(5, 10)) # We can combine these distributions into a single week long # hidden Markov model with: d <- tfd_hidden_markov_model( initial_distribution = initial_distribution, transition_distribution = transition_distribution, observation_distribution = observation_distribution, num_steps = 7) # The expected temperatures for each day are given by: d %>% tfd_mean() %>% tensor_value() # shape [7], elements approach 9.0 # The log pdf of a week of temperature 0 is: d %>% tfd_log_prob(rep(0, 7)) %>% tensor_value() }) test_succeeds("Half normal distribution works", { d <- tfd_half_normal(scale = 1) expect_equal(d %>% tfd_mean() %>% tensor_value(), 1 * sqrt(2) / sqrt(pi), tol = 1e-7) }) test_succeeds("Half cauchy distribution works", { d <- tfd_half_cauchy(loc = 1, scale = 1) expect_equal(d %>% tfd_mode() %>% tensor_value(), 1) }) test_succeeds("Beta distribution works", { d <- tfd_beta(concentration1 = 3, concentration0 = 1) expect_equal(d %>% tfd_mean() %>% tensor_value(), 3/4) }) test_succeeds("Binomial distribution works", { d <- tfd_binomial(total_count = 7, probs = 0.3) expect_equal(d %>% tfd_mean() %>% tensor_value(), 7 * 0.3, tol = 1e-7) }) test_succeeds("Cauchy distribution works", { d <- tfd_cauchy(loc = 1, scale = 1) expect_equal(d %>% tfd_entropy() %>% tensor_value(), log(4 * pi * 1)) }) test_succeeds("Gamma-Gamma distribution works", { d <- tfd_gamma_gamma(concentration = 1, mixing_concentration = 2, mixing_rate = 0.5) expect_equal(d %>% tfd_mean() %>% tensor_value(), 0.5) }) test_succeeds("Chi distribution works", { d <- tfd_chi(df = 1) expect_equal((d %>% tfd_prob(c(0, 1)))$get_shape()$as_list() , 2) }) test_succeeds("Chi2 distribution works", { d <- tfd_chi2(df = 1) expect_equal(d %>% tfd_mean() %>% tensor_value() , 1) }) test_succeeds("Gumbel distribution works", { d <- tfd_gumbel(loc = 3, scale = 1.5) expect_equal(d %>% tfd_variance() %>% tensor_value() , pi^2/6 * 1.5^2, tol = 1e-7) }) test_succeeds("Geometric distribution works", { d <- tfd_geometric(probs = 0.6) expect_equal(d %>% tfd_variance() %>% tensor_value() , (1 - 0.6)/0.6^2, tol = 1e-6) }) test_succeeds("Dirichlet distribution works", { d <- tfd_dirichlet(concentration = c(1, 2, 3)) expect_equivalent(d %>% tfd_mean() %>% tensor_value() , c(1/6, 1/3, 1/2)) }) test_succeeds("DirichletMultinomial distribution works", { d <- tfd_dirichlet_multinomial(total_count = 2, concentration = c(1, 2, 3)) expect_equivalent(d %>% tfd_mean() %>% tensor_value() , c(1/3, 2/3, 1), tol = 1e-7) }) test_succeeds("Deterministic distribution works", { d <- tfd_deterministic(loc = 1.22) expect_equal(d %>% tfd_prob(1.23) %>% tensor_value() ,0) }) test_succeeds("Empirical distribution works", { d <- tfd_empirical(samples = c(0, 1, 1, 2)) expect_equal(d %>% tfd_cdf(1) %>% tensor_value(), 0.75) }) test_succeeds("BatchReshape distribution works", { dims <- 2 new_batch_shape <- c(1, 2, -1) old_batch_shape <- 6 scale <- matrix(rep(1, old_batch_shape * dims), nrow = old_batch_shape) mvn <- tfd_multivariate_normal_diag(scale_diag = scale) d <- tfd_batch_reshape( distribution = mvn, batch_shape = new_batch_shape, validate_args = TRUE) expect_equal(d$batch_shape$as_list() %>% length(), 3) }) test_succeeds("Autoregressive distribution works", { if (tfp_version() >= "0.8") { fill_triangular <- tfp$math$fill_triangular } else { fill_triangular <- tfp$distributions$fill_triangular } normal_fn <- function(event_size) { n <- event_size * (event_size + 1) / 2 %>% trunc() n <- tf$cast(n, tf$int32) p <- 2 * tfd_uniform()$sample(n) - 1 affine <- tfb_affine(scale_tril = fill_triangular(0.25 * p)) fn <- function(samples) { scale <- tf$exp(affine %>% tfb_forward(samples)) tfd_independent(tfd_normal(loc = 0, scale = scale, validate_args = TRUE), reinterpreted_batch_ndims = 1) } fn } batch_and_event_shape <- c(3L, 2L, 4L) sample0 <- tf$zeros(batch_and_event_shape, dtype = tf$int32) skip("bijectors.Affine removed, test needs updating") ar <- tfd_autoregressive(normal_fn(batch_and_event_shape[3]), sample0) x <- ar %>% tfd_sample(c(6, 5)) expect_equal(x$get_shape()$as_list(), c(6, 5, 3, 2, 4)) }) test_succeeds("BatchReshape distribution works", { num_points <- 100 kernel <- (get_psd_kernels())$ExponentiatedQuadratic() index_points <- matrix(seq(-1, 1, length.out = num_points), nrow = num_points) %>% tf$cast(tf$float32) gp <- tfd_gaussian_process(kernel, index_points, observation_noise_variance = .05) samples <- gp %>% tfd_sample(10) expect_equal(samples$get_shape()$as_list(), c(10, 100)) }) test_succeeds("Sample distribution works", { d <- tfd_sample_distribution( tfd_independent(tfd_normal(loc = tf$zeros(list(3L, 2L)), scale = 1), reinterpreted_batch_ndims = 1), sample_shape = list(5, 4)) samples <- d %>% tfd_sample(list(6, 1)) expect_equal(samples$get_shape()$as_list(), c(6, 1, 3, 5, 4, 2)) logprob <- d %>% tfd_log_prob(samples) expect_equal(logprob$get_shape()$as_list(), c(6, 1, 3)) }) test_succeeds("Blockwise distribution works", { d <- tfd_blockwise( list( tfd_multinomial(total_count = 5, probs = c(0.1, 0.2, 0.3)), tfd_multivariate_normal_diag(loc = c(0,0,0,0)) ) ) expect_equal(tfd_sample(d)$get_shape()$as_list(), 7) }) test_succeeds("Vector Deterministic distribution works", { d <- tfd_vector_deterministic( loc = c(1,1,1) ) expect_equal(tfd_sample(d)$get_shape()$as_list(), 3) }) test_succeeds("Gaussian Process Regression Model works", { skip_if_tfp_below("0.8") d <- tfd_gaussian_process_regression_model( kernel = (get_psd_kernels())$MaternFiveHalves(), index_points = matrix(seq(-1, 1, length.out = 100), ncol = 1), observation_index_points = matrix(runif(50), ncol = 1), observations = rnorm(50), observation_noise_variance = 0.5 ) expect_equal(tfd_sample(d)$get_shape()$as_list(), 100) }) test_succeeds("Exponential Relaxed One Hot Categorical distribution works", { d <- tfd_exp_relaxed_one_hot_categorical( temperature = 0.5, probs = c(0.1, 0.5, 0.4) ) expect_equal(tfd_sample(d)$get_shape()$as_list(), 3) }) test_succeeds("Doublesided Maxwell works", { skip_if_tfp_below("0.9") d <- tfd_doublesided_maxwell( loc = 0, scale = c(0.5, 1, 2) ) expect_equal(tfd_sample(d)$get_shape()$as_list(), 3) }) test_succeeds("Finite discrete works", { skip_if_tfp_below("0.9") d <- tfd_finite_discrete( c(1, 2, 3), probs = c(0.5, 0.2, 0.3) ) expect_equal(d %>% tfd_prob(2) %>% tensor_value(), 0.2) }) test_succeeds("Generalized Pareto discrete works", { skip_if_tfp_below("0.9") scale <- 2 conc <- 0.5 genp <- tfd_generalized_pareto(loc = 0, scale = scale, concentration = conc) mean1 <- genp %>% tfd_sample(1000) %>% tf$reduce_mean() jd <- tfd_joint_distribution_named( list( rate = tfd_gamma(1 / genp$concentration, genp$scale / genp$concentration), x = function(rate) tfd_exponential(rate))) mean2 <- jd %>% tfd_sample(1000) %>% .$x %>% tf$reduce_mean() expect_equal(mean1 %>% tensor_value(), mean2 %>% tensor_value(), tolerance = 1) }) test_succeeds("Logit-normal works", { skip_if_tfp_below("0.9") t <- tfd_transformed_distribution( tfd_normal(0, 1), tfb_sigmoid() ) sample_mean_t <- t %>% tfd_sample(10000) %>% tf$reduce_mean() d <- tfd_logit_normal(0, 1) sample_mean_d <- d %>% tfd_sample(10000) %>% tf$reduce_mean() expect_equal(sample_mean_t %>% tensor_value(), sample_mean_d %>% tensor_value(), tolerance = 0.1) }) test_succeeds("Log-normal works", { skip_if_tfp_below("0.9") t <- tfd_transformed_distribution( tfd_normal(0, 1), tfb_exp() ) sample_mean_t <- t %>% tfd_sample(10000) %>% tf$reduce_mean() d <- tfd_log_normal(0, 1) sample_mean_d <- d %>% tfd_sample(10000) %>% tf$reduce_mean() expect_equal(sample_mean_t %>% tensor_value(), sample_mean_d %>% tensor_value(), tolerance = 0.1) }) test_succeeds("Pert works", { skip_if_tfp_below("0.9") d1 <- tfd_pert(1, 7, 11, 4) d2 <- tfd_pert(1, 7, 11, 20) p1 <- d1 %>% tfd_prob(7) p2 <- d2 %>% tfd_prob(7) expect_lt(p1 %>% tensor_value(), p2 %>% tensor_value()) }) test_succeeds("PlackettLuce works", { skip_if_tfp_below("0.9") scores <- c(0.1, 2, 5) d <- tfd_plackett_luce(scores) permutations <- list(list(2, 1, 0), list(1, 0, 2)) expect_equal(d %>% tfd_prob(permutations) %>% tensor_value() %>% length(), 2) }) test_succeeds("ProbitBernoulli works", { skip_if_tfp_below("0.9") probits <- c(1, 0) d <- tfd_probit_bernoulli(probits = probits) samples <- d %>% tfd_sample(10) expect_equal(samples$get_shape() %>% length(), 2) }) test_succeeds("WishartTriL distribution works", { skip_if_tfp_below("0.9") s <- matrix(c(1, 2, 2, 5), ncol = 2, byrow = TRUE) df <- 4 d <- tfd_wishart_tri_l(df = df, scale_tril = tf$linalg$cholesky(s)) expect_equal(tfd_mean(d) %>% tensor_value(), df * s) }) test_succeeds("PixelCNN distribution works", { skip_if_tfp_below("0.10") library(tfdatasets) library(keras) mnist <- dataset_mnist() train_data <- mnist$train preprocess <- function(record) { record$x <- tf$cast(record$x, tf$float32) %>% tf$expand_dims(axis = -1L) record$y <- tf$cast(record$y, tf$float32) list(tuple(record$x, record$y)) } batch_size <- 32 train_ds <- tensor_slices_dataset(train_data) train_ds <- train_ds %>% dataset_take(32) %>% dataset_map(preprocess) %>% dataset_batch(batch_size) dist <- tfd_pixel_cnn( image_shape = c(28, 28, 1), conditional_shape = list(), num_resnet = 1, num_hierarchies = 1, num_filters = 2, num_logistic_mix = 1, dropout_p = .3 ) image_input <- layer_input(shape = c(28, 28, 1)) label_input <- layer_input(shape = list()) log_prob <- dist %>% tfd_log_prob(image_input, conditional_input = label_input) model <- keras_model(inputs = list(image_input, label_input), outputs = log_prob) model$add_loss(tf$negative(tf$reduce_mean(log_prob))) model$compile(optimizer = optimizer_adam(.001)) model %>% fit(train_ds, epochs = 1) samples <- dist %>% tfd_sample(1, conditional_input = 1) expect_equal(dim(samples), c(1, 28, 28, 1)) }) test_succeeds("BetaBinomial distribution works", { skip_if_tfp_below("0.10") d <- tfd_beta_binomial( total_count = c(5, 10, 20), concentration1 = c(.5, 2, 3), concentration0 = c(4, 3, 2)) expect_equal(d %>% tfd_log_prob(1) %>% tensor_value() %>% length(), 3) }) test_succeeds("JointDistributionSequentialAutoBatched distribution works", { skip_if_tfp_below("0.11") d <- tfd_joint_distribution_sequential_auto_batched( # e ~ Exponential(rate=[100,120]) # g ~ Gamma(concentration=e[0], rate=e[1]) # n ~ Normal(loc=0, scale=2.) # m ~ Normal(loc=n, scale=g) # for i = 1, ..., 12: x[i] ~ Bernoulli(logits=m) list( # e tfd_exponential(rate = c(100, 120)), # g function(e) tfd_gamma(concentration = e[1], rate = e[2], name = "g"), # n tfd_normal(loc = 0, scale = 2, name = "n1"), # m function(n1, g) tfd_normal(loc = n1, scale = g, name = "n2"), # x function(n2) tfd_sample_distribution(tfd_bernoulli(logits = n2), 12, name = "s") )) x <- d %>% tfd_sample() expect_equal(length(x), 5) expect_equal((d %>% tfd_log_prob(x))$get_shape()$as_list(), list()) expect_equal(d$resolve_graph() %>% length(), 5) }) test_succeeds("JointDistributionNamedAutoBatched distribution works", { skip_if_tfp_below("0.11") d <- tfd_joint_distribution_named_auto_batched( list( e = tfd_exponential(rate = c(100, 120)), g = function(e) tfd_gamma(concentration = e[1], rate = e[2]), n = tfd_normal(loc = 0, scale = 2), m = function(n, g) tfd_normal(loc = n, scale = g), x = function(m) tfd_sample_distribution(tfd_bernoulli(logits = m), 12) )) x <- d %>% tfd_sample() expect_equal(length(x), 5) expect_equal((d %>% tfd_log_prob(x))$get_shape()$as_list(), list()) expect_equal(d$resolve_graph() %>% length(), 5) }) test_succeeds("Weibull distribution works", { skip_if_tfp_below("0.11") d <- tfd_weibull(scale = c(1, 3, 45), concentration = c(2.5, 22, 7)) expect_equal(d %>% tfd_cdf(1) %>% tensor_value() %>% length(), 3) }) test_succeeds("TruncatedCauchy distribution works", { skip_if_tfp_below("0.11") d <- tfd_truncated_cauchy( loc = c(0, 1), scale = 1, low = c(-1, 0), high = c(1, 1) ) expect_equal(d %>% tfd_cdf(1) %>% tensor_value() %>% length(), 2) }) test_succeeds("SphericalUniform distribution works", { skip_if_tfp_below("0.11") d <- tfd_spherical_uniform( dimension = 4, batch_shape = list(3) ) expect_equal(d %>% tfd_sample(5) %>% tensor_value() %>% dim(), c(5, 3, 4)) }) test_succeeds("PowerSpherical distribution works", { skip_if_tfp_below("0.11") d <- tfd_power_spherical( mean_direction = list(list(0, 1, 0), list(1, 0, 0)), concentration = c(1, 2) ) x <- rbind(c(0, 0, 1), c(0, 1, 0)) expect_equal(d %>% tfd_prob(x) %>% tensor_value() %>% length(), 2) }) test_succeeds("LogLogistic distribution works", { skip_if_tfp_below("0.11") d1 <- tfd_log_logistic( loc = 1, scale = 1 ) d2 <- tfd_transformed_distribution( distribution = tfd_logistic(loc = 1, scale = 1), bijector = tfb_exp() ) x <- c(1, 10, 100) expect_equal(d1 %>% tfd_prob(x) %>% tensor_value(), d2 %>% tfd_prob(x) %>% tensor_value(), tol = 1e-6 ) }) test_succeeds("Bates distribution works", { skip_if_tfp_below("0.11") counts <- c(1, 2, 5) low <- c(0, 5, 10) high <- 100 d <- tfd_bates(total_count = counts, low = low, high = high) expect_equal(d %>% tfd_sample() %>% tensor_value() %>% length(), 3) }) test_succeeds("GeneralizedNormal distribution works", { skip_if_tfp_below("0.11") d <- tfd_generalized_normal(loc = c(1, 2), scale = c(11, 22), power = c(1, 1)) x <- d %>% tfd_sample(c(2, 2)) expect_equal(d$batch_shape$as_list(), 2) }) test_succeeds("JohnsonSU distribution works", { skip_if_tfp_below("0.11") d <- tfd_johnson_s_u(skewness = -2, tailweight = 2, loc = 1.1, scale = 1.5) expect_equal(d %>% tfd_log_prob(1) %>% tensor_value() %>% length(), 1) }) test_succeeds("ContinuousBernoulli distribution works", { skip_if_tfp_below("0.11") d <- tfd_continuous_bernoulli(logits = c(0.1, 0.9)) expect_equal(d %>% tfd_log_prob(1) %>% tensor_value() %>% length(), 2) }) test_succeeds("Skellam distribution works", { skip_if_tfp_below("0.12") d <- tfd_skellam(rate1 = 0.1, rate2 = 0.7) expect_equal(d %>% tfd_log_prob(1) %>% tensor_value() %>% length(), 1) }) test_succeeds("ExpGamma distribution works", { skip_if_tfp_below("0.12") d <- tfd_exp_gamma(concentration = 1, rate = 1) expect_equal(d %>% tfd_mean() %>% tensor_value() %>% length(), 1) }) test_succeeds("ExpInverseGamma distribution works", { skip_if_tfp_below("0.12") d <- tfd_exp_inverse_gamma(concentration = 1, scale = 1) expect_equal(d %>% tfd_mean() %>% tensor_value() %>% length(), 1) })