context("Test atlas_counts") galah_config(verbose = FALSE) vcr::use_cassette("atlas_counts_startup", { test_that("atlas_counts checks group_by field", { galah_config(run_checks = TRUE) expect_warning(atlas_counts(group_by = galah_group_by("invalid"))) galah_config(run_checks = FALSE) }) }) test_that("atlas_counts works with no arguments", { vcr::use_cassette("atlas_counts_no_args", { count <- atlas_counts() }) # atlas_counts with no arguments gives the total number of records in the ALA expect_gt(count, 0) }) test_that("atlas_counts returns expected output", { vcr::use_cassette("atlas_counts_identify", { counts <- atlas_counts(identify = galah_identify("Mammalia")) }) expect_type(counts$count, "integer") }) test_that("grouped atlas_counts returns expected output", { vcr::use_cassette("atlas_counts_group_by", { counts <- atlas_counts( identify = galah_identify("Mammalia"), group_by = galah_group_by(basisOfRecord)) }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_equal(names(counts), c("basisOfRecord", "count")) }) test_that("grouped atlas_counts returns expected output when limit != NULL", { vcr::use_cassette("atlas_counts_group_by_with_limit", { counts <- atlas_counts( identify = galah_identify("Mammalia"), group_by = galah_group_by(basisOfRecord), limit = 3 ) }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_equal(names(counts), c("basisOfRecord", "count")) expect_equal(nrow(counts), 3) }) test_that("atlas_counts returns all counts if no limit is provided", { vcr::use_cassette("atlas_counts_no_limit", { counts <- atlas_counts(group_by = galah_group_by(month), limit = NULL) }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_equal(nrow(counts), 12) }) test_that("atlas_counts returns species counts", { vcr::use_cassette("atlas_counts_type_species", { counts <- atlas_counts(type = "species") }) expect_type(counts$count, "integer") expect_gt(counts, 0) }) test_that("grouped atlas_counts for species returns expected output", { vcr::use_cassette("atlas_counts_type_species_group_by", { counts <- atlas_counts( identify = galah_identify("Mammalia"), filter = galah_filter(year == 2020), group_by = galah_group_by(month), type = "species" ) }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_equal(names(counts), c("month", "count")) }) test_that("atlas_counts handles pagination", { vcr::use_cassette("atlas_counts_pagination", { counts <- atlas_counts(group_by = galah_group_by(year), limit = 101) }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_equal(nrow(counts), 101) expect_equal(names(counts), c("year", "count")) }) # test_that("atlas_counts caches as expected", { # skip_on_cran() # galah_config(caching = TRUE, verbose = TRUE) # filters <- galah_filter(basisOfRecord == "FossilSpecimen") # counts <- atlas_counts(filter = filters, # group_by = galah_group_by(year), # limit = 100) # expect_message( # counts2 <- atlas_counts( # filter = filters, # group_by = galah_group_by(year), # limit = 100 # ), # "Using cached file" # ) # expect_equal(nrow(counts), nrow(counts2)) # }) # # test_that("atlas_counts returns consistent data from cached/non-cached calls", { # skip_on_cran() # galah_config(caching = TRUE, verbose = TRUE) # counts1 <- atlas_counts(group_by = galah_group_by(year)) # counts2 <- atlas_counts(group_by = galah_group_by(year)) # expect_equal( # class(counts1$year), # class(counts2$year) # ) # }) test_that("atlas_counts handles multiple 'group by' variables", { vcr::use_cassette("atlas_counts_multiple_group_by", { counts <- atlas_counts( filter = galah_filter(year >= 2018), group_by = galah_group_by(year, basisOfRecord)) }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_true(all(names(counts) %in% c("year", "basisOfRecord", "count"))) }) test_that("atlas_counts handles 'species' as a 'group by' variable", { vcr::use_cassette("atlas_counts_type_species_group_by_2", { counts <- galah_call() |> galah_identify("perameles") |> galah_filter(year > 2010) |> galah_group_by(species, year) |> atlas_counts() }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_true(all(names(counts) %in% c("year", "species", "count"))) }) test_that("atlas_counts handles 'taxonConceptID' as a 'group by' variable", { vcr::use_cassette("atlas_counts_group_by_taxonConceptID", { counts <- galah_call() |> galah_identify("perameles") |> galah_filter(year > 2010) |> galah_group_by(taxonConceptID, year) |> atlas_counts() }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_true(all(names(counts) %in% c("year", "taxonConceptID", "count"))) }) test_that("atlas_counts handles piping", { vcr::use_cassette("atlas_counts_piped_1", { counts <- galah_call() |> galah_filter(year >= 2018) |> galah_group_by(year, basisOfRecord) |> atlas_counts() }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_true(all(names(counts) %in% c("year", "basisOfRecord", "count"))) }) test_that("atlas_counts ignores superflueous piped arguments", { vcr::use_cassette("atlas_counts_piped_2", { counts <- galah_call() |> galah_filter(year >= 2018) |> galah_group_by(year) |> galah_down_to(species) |> galah_select(taxonConceptID) |> atlas_counts() }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_equal(names(counts), c("year", "count")) expect_gt(nrow(counts), 0) }) test_that("atlas_counts works for three groups", { vcr::use_cassette("atlas_counts_piped_3_groups", { counts <- galah_call() |> galah_identify("cacatuidae") |> galah_filter(year >= 2020) |> galah_group_by(biome, year, basisOfRecord, stateProvince) |> atlas_counts() }) expect_s3_class(counts, c("tbl_df", "tbl", "data.frame")) expect_gt(nrow(counts), 1) expect_true(all(names(counts) %in% c("basisOfRecord", "biome", "year", "stateProvince", "count"))) }) test_that("atlas_counts filters correctly with galah_geolocate/galah_polygon", { vcr::use_cassette("atlas_counts_piped_polygon", { wkt <- "POLYGON ((146.5425 -42.63203, 146.8312 -43.13203, 147.4085 -43.13203, 147.6972 -42.63203, 147.4085 -42.13203, 146.8312 -42.13203, 146.5425 -42.63203))" |> sf::st_as_sfc() base_query <- galah_call() |> galah_identify("dasyurus") |> galah_filter(year >= 2020) counts <- base_query |> atlas_counts() counts_filtered <- base_query |> galah_geolocate(wkt) |> atlas_counts() }) count_1 <- counts_filtered$count[1] count_2 <- counts$count[1] expect_lt(count_1, count_2) }) test_that("atlas_counts filters correctly with galah_geolocate/galah_bbox", { vcr::use_cassette("atlas_counts_piped_bbox", { wkt <- "POLYGON ((146.5425 -42.63203, 146.8312 -43.13203, 147.4085 -43.13203, 147.6972 -42.63203, 147.4085 -42.13203, 146.8312 -42.13203, 146.5425 -42.63203))" |> sf::st_as_sfc() base_query <- galah_call() |> galah_identify("dasyurus") |> galah_filter(year >= 2020) counts <- base_query |> atlas_counts() counts_filtered <- base_query |> galah_geolocate(wkt, type = "bbox") |> atlas_counts() }) count_1 <- counts_filtered$count[1] count_2 <- counts$count[1] expect_lt(count_1, count_2) })