get_dummy_net <- function(n_nodes, n_blocks, seed = 1234) { bigergm_formula <- g ~ edges + nodematch("x") nodes_data <- tibble::tibble( node_id = 1:n_nodes, x = sample(1:2, size = n_nodes, replace = T), block = sample(1:n_blocks, size = n_nodes, replace = T) ) g <- network::network.initialize(n = n_nodes,directed = FALSE) g%v% "block" <- nodes_data$block network::set.vertex.attribute(g, "x", nodes_data$x) preprocessed_features <- get_features(g, bigergm_formula) coef_between_block <- c(-3, 1) coef_within_block <- c(-2, 0.1) sim_control_within <- ergm::control.simulate.formula( MCMC.burnin = 400, MCMC.interval = 200 ) g <- bigergm::simulate_bigergm( formula = bigergm_formula, coef_between = coef_between_block, coef_within = coef_within_block, control_within = sim_control_within ) bigergm_res <- bigergm::bigergm( g ~ edges + nodematch("x"), n_blocks = n_blocks, n_MM_step_max = 2, estimate_parameters = TRUE, use_infomap_python = FALSE, clustering_with_features = TRUE ) list( bigergm_res = bigergm_res, g = g, nodes_data = nodes_data, K = n_blocks, preprocessed_features = preprocessed_features, vertex_id_var = "node_id", block_id_var = "block", control_within = sim_control_within ) } sim <- get_dummy_net(n_nodes = 50,n_blocks = 2, seed = 1234) get_dummy_net_nocov <- function(n_nodes, n_blocks, seed = 1234) { bigergm_formula <- g ~ edges nodes_data <- tibble::tibble( node_id = 1:n_nodes, block = sample(1:n_blocks, size = n_nodes, replace = T) ) g <- network::network.initialize(n = n_nodes,directed = FALSE) g%v% "block" <- nodes_data$block preprocessed_features <- get_features(g, bigergm_formula) coef_between_block <- c(-3) coef_within_block <- c(-2) sim_control_within <- ergm::control.simulate.formula( MCMC.burnin = 400, MCMC.interval = 200 ) g <- bigergm::simulate_bigergm( formula = bigergm_formula, coef_between = coef_between_block, coef_within = coef_within_block, control_within = sim_control_within ) bigergm_res <- bigergm::bigergm( g ~ edges , n_blocks = n_blocks, n_MM_step_max = 2, estimate_parameters = TRUE, use_infomap_python = FALSE, clustering_with_features = FALSE ) list( bigergm_res = bigergm_res, g = g, nodes_data = nodes_data, K = n_blocks, preprocessed_features = preprocessed_features, vertex_id_var = "node_id", block_id_var = "block", control_within = sim_control_within ) } sim_nocov <- get_dummy_net_nocov(n_nodes = 50,n_blocks = 2, seed = 1234) test_that("Returned GOF dataframe has the correct fields", { g <- sim$g test_gof_res <- gof( sim$bigergm_res, control_within = sim$control_within, nsim = 3, compute_geodesic_distance = FALSE ) for (stat_type in c("original", "simulated")) { stats <- test_gof_res[[stat_type]] expect_false(is.null(stats)) for (stat in c("network_stats", "degree_dist", "esp_dist")) { expect_false(is.null(stats[[stat]])) } expect_true(is.null(stats[["geodesic_dist"]])) } g <- sim_nocov$g test_gof_res <- gof( sim_nocov$bigergm_res, control_within = sim_nocov$control_within, nsim = 3, compute_geodesic_distance = FALSE ) for (stat_type in c("original", "simulated")) { stats <- test_gof_res[[stat_type]] expect_false(is.null(stats)) for (stat in c("network_stats", "degree_dist", "esp_dist")) { expect_false(is.null(stats[[stat]])) } expect_true(is.null(stats[["geodesic_dist"]])) } }) test_that("GOF network stats have the right fields and terms", { g <- sim$g test_gof_res <- gof( sim$bigergm_res, control_within = sim$control_within, nsim = 3 ) expected_terms <- ergm::ergm_model(sim$bigergm_res$est_within$formula)$terms %>% purrr::map(function(t) { `$`(t, name) }) for (stat_type in c("original", "simulated")) { stat_type_df <- test_gof_res[[stat_type]] actual_terms <- colnames(stat_type_df$network_stats) actual_terms[stringr::str_detect(actual_terms, "nsim", negate = TRUE)] %>% setdiff(c("value", "stat")) %>% length() %>% expect_equal(0) stat_type_df$network_stats$stat %>% unique() %>% stringr::str_replace("[.].*", "") %>% setdiff(expected_terms) %>% length() %>% expect_equal(0) } g <- sim_nocov$g test_gof_res <- gof( sim_nocov$bigergm_res, control_within = sim$control_within, nsim = 3 ) expected_terms <- ergm::ergm_model(sim_nocov$bigergm_res$est_within$formula)$terms %>% purrr::map(function(t) { `$`(t, name) }) for (stat_type in c("original", "simulated")) { stat_type_df <- test_gof_res[[stat_type]] actual_terms <- colnames(stat_type_df$network_stats) actual_terms[stringr::str_detect(actual_terms, "nsim", negate = TRUE)] %>% setdiff(c("value", "stat")) %>% length() %>% expect_equal(0) stat_type_df$network_stats$stat %>% unique() %>% stringr::str_replace("[.].*", "") %>% setdiff(expected_terms) %>% length() %>% expect_equal(0) } }) test_that("GOF degree stats have the right fields and terms", { g <- sim$g test_gof_res <- gof( sim$bigergm_res, control_within = sim$control_within, nsim = 3 ) for (stat_type in c("original", "simulated")) { stat_type_df <- test_gof_res[[stat_type]] actual_terms <- colnames(stat_type_df$degree_dist) actual_terms[stringr::str_detect(actual_terms, "nsim", negate = TRUE)] %>% setdiff(c("degree", "share")) %>% length() %>% expect_equal(0) expect_lte(max(stat_type_df$degree_dist$degree), g$gal$n) expect( min(stat_type_df$degree_dist$share) >= 0 && max(stat_type_df$degree_dist$share) <= 1, failure_message = "Some degree shares are out of bounds" ) } g <- sim_nocov$g test_gof_res <- gof( sim_nocov$bigergm_res, control_within = sim_nocov$control_within, nsim = 3 ) for (stat_type in c("original", "simulated")) { stat_type_df <- test_gof_res[[stat_type]] actual_terms <- colnames(stat_type_df$degree_dist) actual_terms[stringr::str_detect(actual_terms, "nsim", negate = TRUE)] %>% setdiff(c("degree", "share")) %>% length() %>% expect_equal(0) expect_lte(max(stat_type_df$degree_dist$degree), g$gal$n) expect( min(stat_type_df$degree_dist$share) >= 0 && max(stat_type_df$degree_dist$share) <= 1, failure_message = "Some degree shares are out of bounds" ) } }) test_that("GOF esp stats have the right fields and terms", { g <- sim$g test_gof_res <- gof( sim$bigergm_res, control_within = sim$control_within, nsim = 3 ) for (stat_type in c("original", "simulated")) { stat_type_df <- test_gof_res[[stat_type]] actual_terms <- colnames(stat_type_df$esp_dist) actual_terms[stringr::str_detect(actual_terms, "nsim", negate = TRUE)] %>% setdiff(c("label", "esp")) %>% length() %>% expect_equal(0) expect_lte(max(stat_type_df$esp_dist$label), min(g$gal$n, 10)) expect( min(stat_type_df$esp_dist$esp) >= 0 && max(stat_type_df$esp_dist$esp) <= (g$gal$n^2), failure_message = "Some esp counts are out of bounds." ) } g <- sim_nocov$g test_gof_res <- gof( sim_nocov$bigergm_res, control_within = sim_nocov$control_within, nsim = 3 ) for (stat_type in c("original", "simulated")) { stat_type_df <- test_gof_res[[stat_type]] actual_terms <- colnames(stat_type_df$esp_dist) actual_terms[stringr::str_detect(actual_terms, "nsim", negate = TRUE)] %>% setdiff(c("label", "esp")) %>% length() %>% expect_equal(0) expect_lte(max(stat_type_df$esp_dist$label), min(g$gal$n, 10)) expect( min(stat_type_df$esp_dist$esp) >= 0 && max(stat_type_df$esp_dist$esp) <= (g$gal$n^2), failure_message = "Some esp counts are out of bounds." ) } }) test_that("GOF geodesic distance is returned when requested", { g <- sim$g test_gof_res <- gof( sim$bigergm_res, control_within = sim$control_within, nsim = 3, compute_geodesic_distance = TRUE ) for (stat_type in c("original", "simulated")) { stat_type_df <- test_gof_res[[stat_type]] actual_terms <- colnames(stat_type_df$geodesic_dist) actual_terms[stringr::str_detect(actual_terms, "nsim", negate = TRUE)] %>% setdiff(c("dist", "pairs")) %>% length() %>% expect_equal(0) # Some of the distances will be Inf, and that's ok (that's how ergm returns them). non_inf <- stat_type_df$geodesic_dist$dist[!is.infinite(stat_type_df$geodesic_dist$dist)] expect_lte(max(non_inf), g$gal$n) expect( (min(stat_type_df$geodesic_dist$pairs) >= 0) && (max(stat_type_df$geodesic_dist$pairs) <= (g$gal$n^2)), failure_message = "Some geodesic distance pairs are out of bounds." ) } g <- sim_nocov$g test_gof_res <- gof( sim_nocov$bigergm_res, control_within = sim_nocov$control_within, nsim = 3, compute_geodesic_distance = TRUE ) for (stat_type in c("original", "simulated")) { stat_type_df <- test_gof_res[[stat_type]] actual_terms <- colnames(stat_type_df$geodesic_dist) actual_terms[stringr::str_detect(actual_terms, "nsim", negate = TRUE)] %>% setdiff(c("dist", "pairs")) %>% length() %>% expect_equal(0) # Some of the distances will be Inf, and that's ok (that's how ergm returns them). non_inf <- stat_type_df$geodesic_dist$dist[!is.infinite(stat_type_df$geodesic_dist$dist)] expect_lte(max(non_inf), g$gal$n) expect( (min(stat_type_df$geodesic_dist$pairs) >= 0) && (max(stat_type_df$geodesic_dist$pairs) <= (g$gal$n^2)), failure_message = "Some geodesic distance pairs are out of bounds." ) } }) test_that("Return GOF statistics including only within-block connections", { g <- sim$g test_gof_res <-gof( sim$bigergm_res, control_within = sim$control_within, nsim = 3, type = "within", compute_geodesic_distance = FALSE ) # check that the network stats belong to the within-block sub network only edgelist <- network::as.edgelist(g) %>% as.data.frame colnames(edgelist) <- c('src', 'dst') nodes_with_blocks <- data.frame(id = 1:length(network::network.vertex.names(g)), block=sim$bigergm_res$block) actual_within_conns <- edgelist %>% dplyr::left_join(nodes_with_blocks, by = c('src' = 'id')) %>% dplyr::left_join(nodes_with_blocks, by = c('dst' = 'id'), suffix=c('.src', '.dst')) %>% dplyr::filter(block.src == block.dst) %>% nrow within_conns_from_gof <- (test_gof_res$original$network_stats %>% dplyr::filter(stat == 'edges'))[, 2] expect_equal(within_conns_from_gof, actual_within_conns) for (stat_type in c("original", "simulated")) { stats <- test_gof_res[[stat_type]] expect_false(is.null(stats)) for (stat in c("network_stats", "degree_dist", "esp_dist")) { expect_false(is.null(stats[[stat]])) } expect_true(is.null(stats[["geodesic_dist"]])) } g <- sim_nocov$g test_gof_res <-gof( sim_nocov$bigergm_res, control_within = sim_nocov$control_within, nsim = 3, type = "within", compute_geodesic_distance = FALSE ) # check that the network stats belong to the within-block sub network only edgelist <- network::as.edgelist(g) %>% as.data.frame colnames(edgelist) <- c('src', 'dst') nodes_with_blocks <- data.frame(id = 1:length(network::network.vertex.names(g)), block=sim_nocov$bigergm_res$block) actual_within_conns <- edgelist %>% dplyr::left_join(nodes_with_blocks, by = c('src' = 'id')) %>% dplyr::left_join(nodes_with_blocks, by = c('dst' = 'id'), suffix=c('.src', '.dst')) %>% dplyr::filter(block.src == block.dst) %>% nrow within_conns_from_gof <- (test_gof_res$original$network_stats %>% dplyr::filter(stat == 'edges'))[, 2] expect_equal(within_conns_from_gof, actual_within_conns) for (stat_type in c("original", "simulated")) { stats <- test_gof_res[[stat_type]] expect_false(is.null(stats)) for (stat in c("network_stats", "degree_dist", "esp_dist")) { expect_false(is.null(stats[[stat]])) } expect_true(is.null(stats[["geodesic_dist"]])) } }) test_that("Within-connections GOF can be started from the observed network", { g <- sim$g control_within <- ergm::control.simulate.formula( MCMC.burnin = 0, MCMC.interval = 1 ) test_gof_res <-gof( sim$bigergm_res, control_within = control_within, start_from_observed = TRUE, nsim = 2, type = "within" ) first_simulation_stats <- test_gof_res$simulated$network_stats %>% dplyr::filter(nsim == 1) %>% dplyr::select(-nsim) original_network_stats <- test_gof_res$original$network_stats expect_equal(original_network_stats,first_simulation_stats) g <- sim_nocov$g control_within <- ergm::control.simulate.formula( MCMC.burnin = 0, MCMC.interval = 1 ) test_gof_res <-gof( sim_nocov$bigergm_res, control_within = control_within, start_from_observed = TRUE, nsim = 2, type = "within" ) first_simulation_stats <- test_gof_res$simulated$network_stats %>% dplyr::filter(nsim == 1) %>% dplyr::select(-nsim) original_network_stats <- test_gof_res$original$network_stats expect_equal(original_network_stats,first_simulation_stats) }) test_that("Full GOF can be started from the observed network", { sim <- get_dummy_net(100, 4) g <- sim$g control_within <- ergm::control.simulate.formula( MCMC.burnin = 0, MCMC.interval = 1 ) test_gof_res <- gof( sim$bigergm_res, type = 'full', control_within = control_within, nsim = 2, start_from_observed = TRUE ) first_simulation_stats <-test_gof_res$simulated$network_stats %>% dplyr::filter(nsim == 1) # If it starts from the observed network, the stats should not be zero expect_true(all(first_simulation_stats['value'] > 0)) sim <- get_dummy_net_nocov(100, 4) g <- sim$g control_within <- ergm::control.simulate.formula( MCMC.burnin = 0, MCMC.interval = 1 ) test_gof_res <- gof( sim$bigergm_res, type = 'full', control_within = control_within, nsim = 2, start_from_observed = TRUE ) first_simulation_stats <-test_gof_res$simulated$network_stats %>% dplyr::filter(nsim == 1) # If it starts from the observed network, the stats should not be zero expect_true(all(first_simulation_stats['value'] > 0)) })