# tests in this file are run automatically in CI and require # an actual Elasticsearch cluster to be up and running. For details, # see: https://github.com/uptake/uptasticsearch/blob/main/.github/workflows/ci.yml # Sample data from: # - https://www.elastic.co/guide/en/kibana/current/tutorial-load-dataset.html # Configure logger (suppress all logs in testing) loggerOptions <- futile.logger::logger.options() if (!identical(loggerOptions, list())) { origLogThreshold <- loggerOptions[[1]][["threshold"]] } else { origLogThreshold <- futile.logger::INFO } futile.logger::flog.threshold(0) ES_HOST <- "http://127.0.0.1:9200" #--- es_search # search request test_that("es_search works as expected for a simple search request", { testthat::skip_on_cran() outDT <- es_search( es_host = ES_HOST , es_index = "shakespeare" , max_hits = 100 , size = 100 ) expect_true(data.table::is.data.table(outDT)) }) test_that("es_search works when you have to scroll", { testthat::skip_on_cran() outDT <- es_search( es_host = ES_HOST , es_index = "shakespeare" , max_hits = 30 , size = 2 ) expect_true(data.table::is.data.table(outDT)) expect_true(nrow(outDT) == 30) }) test_that("es_search works in single-threaded mode", { testthat::skip_on_cran() outDT <- es_search( es_host = ES_HOST , es_index = "shakespeare" , max_hits = 30 , size = 2 , n_cores = 1 ) expect_true(data.table::is.data.table(outDT)) expect_true(nrow(outDT) == 30) }) test_that("es_search rejects scrolls longer than 1 hour", { testthat::skip_on_cran() expect_error({ outDT <- es_search( es_host = ES_HOST , es_index = "shakespeare" , max_hits = 100 , size = 100 , scroll = "2h" ) }, regexp = "By default, this function does not permit scroll requests which keep the scroll") }) test_that("es_search warns and readjusts size if max_hits less than 10000", { testthat::skip_on_cran() expect_warning({ outDT <- es_search( es_host = ES_HOST , es_index = "shakespeare" , max_hits = 9999 ) }, regexp = "You requested a maximum of 9999 hits and a page size of 10000") expect_true(data.table::is.data.table(outDT)) }) test_that("es_search warns when max hits is not a clean multiple of size", { testthat::skip_on_cran() expect_warning({ outDT <- es_search( es_host = ES_HOST , es_index = "shakespeare" , max_hits = 12 , size = 7 ) }, regexp = "When max_hits is not an exact multiple of size, it is possible to get a few more than max_hits results back") expect_true(data.table::is.data.table(outDT)) }) test_that("es_search works as expected for search requests that return nothing", { testthat::skip_on_cran() # NOTE: Creating an intentionally empty index is the safest way to test # this functionality. Any other test would involve writing a query # and I want to avoid exposing our tests to changes in the query DSL expect_warning({ outDT <- es_search( es_host = ES_HOST , es_index = "empty_index" ) }, regexp = "Query is syntactically valid but 0 documents were matched") expect_null(outDT) }) # aggregation request test_that("es_search works as expected for a simple aggregation request", { testthat::skip_on_cran() outDT <- es_search( es_host = ES_HOST , es_index = "shakespeare" , max_hits = 100 , query = '{"aggs": {"thing": {"terms": {"field": "speaker", "size": 12}}}}' # nolint[quotes] ) expect_true(data.table::is.data.table(outDT)) num_expected_levels <- 4 major_version <- .major_version( .get_es_version("http://127.0.0.1:9200") ) if (as.integer(major_version) >= 7) { num_expected_levels <- 3 } expect_true(nrow(outDT) == num_expected_levels) expect_named( outDT , c("thing", "doc_count") , ignore.case = FALSE , ignore.order = TRUE ) expect_true(is.numeric(outDT[, doc_count])) expect_true(is.character(outDT[, thing])) expect_true(all(outDT[, doc_count > 0])) }) test_that("es_search respects the names you assign to aggregation results", { testthat::skip_on_cran() outDT <- es_search( es_host = ES_HOST , es_index = "shakespeare" , max_hits = 100 , query = '{"aggs": {"name_i_picked": {"terms": {"field": "speaker", "size": 12}}}}' # nolint[quotes] ) # main test expect_named( outDT , c("name_i_picked", "doc_count") , ignore.case = FALSE , ignore.order = TRUE ) # the stuff we might as well test expect_true(data.table::is.data.table(outDT)) expect_true(is.numeric(outDT[, doc_count])) expect_true(is.character(outDT[, name_i_picked])) expect_true(all(outDT[, doc_count > 0])) }) # We have tests on static empty results, but this test will catch # changes across versions in the way Elasticsearch actually responds to aggs results that # return nothing test_that("es_search correctly handles empty bucketed aggregation result", { testthat::skip_on_cran() outDT <- es_search( es_host = ES_HOST , es_index = "shakespeare" , max_hits = 100 , query = '{"aggs": {"blegh": {"terms": {"field": "nonsense_field"}}}}' # nolint[quotes] ) expect_null(outDT) }) #--- .get_es_version test_that(".get_es_version works", { testthat::skip_on_cran() ver <- uptasticsearch:::.get_es_version(es_host = ES_HOST) # is a string expect_true(.is_string(ver), info = paste0("returned version: ", ver)) # Decided to check that it's coercible to an integer instead of # hard-coding known Elasticsearch versions so this test won't require # attention or break builds if/when Elasticsearch 7 or whatever the next major version # is comes out expect_true(!is.na(as.integer(ver)), info = paste0("returned version: ", ver)) }) #--- get_fields and .get_aliases # set up helper function for manipulating aliases. Valid actions below are # "add" and "remove" .alias_action <- function(action, alias_name) { res <- .request( verb = "POST" , url = "http://127.0.0.1:9200/_aliases" , body = sprintf( '{"actions": [{"%s": {"index": "shakespeare", "alias": "%s"}}]}' # nolint[quotes] , action , alias_name ) ) .stop_for_status(res) return(invisible(NULL)) } test_that(".get_aliases returns NULL when no aliases have been created in the cluster", { testthat::skip_on_cran() result <- .get_aliases( es_host = ES_HOST ) expect_null(result) }) test_that("get_fields works on an actual running Elasticsearch cluster with no aliases", { testthat::skip_on_cran() fieldDT <- get_fields( es_host = ES_HOST , es_indices = "_all" ) expect_true(data.table::is.data.table(fieldDT)) expect_true(nrow(fieldDT) > 0) expect_named( fieldDT , c("index", "type", "field", "data_type") , ignore.order = TRUE , ignore.case = FALSE ) expect_true("shakespeare" %in% fieldDT[, unique(index)]) expect_true(is.character(fieldDT$index)) expect_true(is.character(fieldDT$type)) expect_true(is.character(fieldDT$field)) expect_true(is.character(fieldDT$data_type)) expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0) }) test_that(".get_aliases and get_fields work as expected when exactly one alias exists for one index in the cluster", { testthat::skip_on_cran() # create an alias .alias_action("add", "the_test_alias") # get_aliases should work resultDT <- .get_aliases("http://127.0.0.1:9200") expect_true(data.table::is.data.table(resultDT)) expect_true(nrow(resultDT) == 1) expect_named( resultDT , c("alias", "index") , ignore.case = FALSE , ignore.order = TRUE ) expect_identical(resultDT[, index], "shakespeare") expect_identical(resultDT[, alias], "the_test_alias") # get_fields should work fieldDT <- get_fields( es_host = ES_HOST , es_indices = "_all" ) expect_true(data.table::is.data.table(fieldDT)) expect_true(nrow(fieldDT) > 0) expect_named( fieldDT , c("index", "type", "field", "data_type") , ignore.order = TRUE , ignore.case = FALSE ) expect_true(is.character(fieldDT$index)) expect_true(is.character(fieldDT$type)) expect_true(is.character(fieldDT$field)) expect_true(is.character(fieldDT$data_type)) expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0) # get_fields should replace index names with their aliases expect_true(fieldDT[, sum(index == "the_test_alias")] > 0) expect_true( fieldDT[, sum(index == "shakespeare")] == 0 , info = "get_fields didn't replace index names with their aliases" ) # delete the alias we created (to keep tests self-contained) .alias_action("remove", "the_test_alias") # confirm that it's gone resultDT <- .get_aliases("http://127.0.0.1:9200") expect_null(resultDT) }) test_that(".get_aliases and get_fields work as expected when more than one alias exists for one index in the cluster", { testthat::skip_on_cran() # create an alias .alias_action("add", "the_test_alias") .alias_action("add", "the_best_alias") .alias_action("add", "the_nest_alias") # get_aliases should work resultDT <- .get_aliases("http://127.0.0.1:9200") expect_true(data.table::is.data.table(resultDT)) expect_true(nrow(resultDT) == 3) expect_named( resultDT , c("alias", "index") , ignore.case = FALSE , ignore.order = TRUE ) expect_identical(resultDT[, index], rep("shakespeare", 3)) expect_true(resultDT[, all(c("the_best_alias", "the_nest_alias", "the_test_alias") %in% alias)]) # get_fields should work for "_all" indices # NOTE: this was deprecated in Elasticsearch 6 and removed in # Elasticsearch 7, but we use it here so that old uptasticsearch code # continues to work fieldDT <- get_fields( es_host = ES_HOST , es_indices = "_all" ) expect_true(data.table::is.data.table(fieldDT)) expect_true(nrow(fieldDT) > 0) expect_named( fieldDT , c("index", "type", "field", "data_type") , ignore.order = TRUE , ignore.case = FALSE ) expect_true(is.character(fieldDT$index)) expect_true(is.character(fieldDT$type)) expect_true(is.character(fieldDT$field)) expect_true(is.character(fieldDT$data_type)) expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0) # get_fields should replace index names with their aliases expect_true(fieldDT[, all(c("the_best_alias", "the_nest_alias", "the_test_alias") %in% index)]) expect_true( fieldDT[, sum(index == "shakespeare")] == 0 , info = "get_fields didn't replace index names with their aliases" ) # since we aliased the same index three times, the subsections should all be identical expect_true(identical( fieldDT[index == "the_best_alias", .(type, field, data_type)] , fieldDT[index == "the_nest_alias", .(type, field, data_type)] )) expect_true(identical( fieldDT[index == "the_best_alias", .(type, field, data_type)] , fieldDT[index == "the_test_alias", .(type, field, data_type)] )) # get_fields should work targeting a specific index with aliases fieldDT <- get_fields( es_host = ES_HOST , es_indices = "shakespeare" ) expect_true(data.table::is.data.table(fieldDT)) expect_true(nrow(fieldDT) > 0) expect_named( fieldDT , c("index", "type", "field", "data_type") , ignore.order = TRUE , ignore.case = FALSE ) expect_true(is.character(fieldDT$index)) expect_true(is.character(fieldDT$type)) expect_true(is.character(fieldDT$field)) expect_true(is.character(fieldDT$data_type)) expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0) # get_fields should replace index names with their aliases expect_true(fieldDT[, all(c("the_best_alias", "the_nest_alias", "the_test_alias") %in% index)]) expect_true( fieldDT[, sum(index == "shakespeare")] == 0 , info = "get_fields didn't replace index names with their aliases" ) # since we aliased the same index three times, the subsections should all be identical expect_true(identical( fieldDT[index == "the_best_alias", .(type, field, data_type)] , fieldDT[index == "the_nest_alias", .(type, field, data_type)] )) expect_true(identical( fieldDT[index == "the_best_alias", .(type, field, data_type)] , fieldDT[index == "the_test_alias", .(type, field, data_type)] )) # delete the aliases we created (to keep tests self-contained) .alias_action("remove", "the_test_alias") .alias_action("remove", "the_best_alias") .alias_action("remove", "the_nest_alias") # confirm that they're gone resultDT <- .get_aliases("http://127.0.0.1:9200") expect_null(resultDT) }) test_that("get_fields works when you target a single index with no aliases", { testthat::skip_on_cran() fieldDT <- get_fields( es_host = ES_HOST , es_indices = "empty_index" ) expect_true(data.table::is.data.table(fieldDT)) expect_true(nrow(fieldDT) > 0) expect_named( fieldDT , c("index", "type", "field", "data_type") , ignore.order = TRUE , ignore.case = FALSE ) expect_true(is.character(fieldDT$index)) expect_true(is.character(fieldDT$type)) expect_true(is.character(fieldDT$field)) expect_true(is.character(fieldDT$data_type)) expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0) # should only give us back records on the one index we requested expect_true(fieldDT[, all(index == "empty_index")]) }) test_that("get_fields works when you pass a vector of index names", { testthat::skip_on_cran() fieldDT <- get_fields( es_host = ES_HOST , es_indices = c("empty_index", "shakespeare") ) expect_true(data.table::is.data.table(fieldDT)) expect_true(nrow(fieldDT) > 0) expect_named( fieldDT , c("index", "type", "field", "data_type") , ignore.order = TRUE , ignore.case = FALSE ) expect_true(is.character(fieldDT$index)) expect_true(is.character(fieldDT$type)) expect_true(is.character(fieldDT$field)) expect_true(is.character(fieldDT$data_type)) expect_true(sum(is.na(fieldDT[, .(index, field, data_type)])) == 0) # should only give us back records on indexes we requested expect_true(fieldDT[, any(index == "empty_index")]) expect_true(fieldDT[, length(unique(index))] >= 2) }) #--- HTTP request helpers test_that(".request() works for requests without a body", { testthat::skip_on_cran() response <- uptasticsearch:::.request( verb = "POST" , url = "https://httpbin.org/status/201" , body = NULL ) expect_true(response$method == "POST") expect_true(response$status_code == 201L) expect_true(response$url == "https://httpbin.org/status/201") }) test_that(".request() works for requests with a body", { testthat::skip_on_cran() response <- uptasticsearch:::.request( verb = "POST" , url = "https://httpbin.org/anything" , body = '{"data": {"cool_numbers": [312, 708, 773]}}' ) expect_true(response$method == "POST") expect_true(response$status_code == 200L) expect_true(response$url == "https://httpbin.org/anything") response_content <- jsonlite::fromJSON(rawToChar(response$content)) expect_true(identical(response_content[["json"]][["data"]][["cool_numbers"]], c(312L, 708L, 773L))) }) test_that("retry logic works as expected", { testthat::skip_on_cran() futile.logger::flog.threshold(futile.logger::DEBUG) log_lines <- testthat::capture_output({ response <- .request( verb = "GET" , url = "https://httpbin.org/status/502" , body = NULL ) }) futile.logger::flog.threshold(0) # should log the failures and sleep times expect_true(grepl("DEBUG.*Request failed.*status code 502.*Sleeping for", log_lines)) # should perform retry with backoff expect_true(grepl(".*Sleeping for 1\\.[0-9]+ seconds.*Sleeping for 2\\.[0-9]+ seconds", log_lines)) # should return the response expect_true(response$method == "GET") expect_true(response$status_code == 502L) expect_true(response$url == "https://httpbin.org/status/502") }) test_that("retry logic works as expected for requests with a body", { testthat::skip_on_cran() futile.logger::flog.threshold(futile.logger::DEBUG) log_lines <- testthat::capture_output({ response <- .request( verb = "POST" , url = "https://httpbin.org/status/429" , body = '{"some_key": 708}' ) }) futile.logger::flog.threshold(0) # should log the failures and sleep times expect_true(grepl("DEBUG.*Request failed.*status code 429.*Sleeping for", log_lines)) # should perform retry with backoff expect_true(grepl(".*Sleeping for 1\\.[0-9]+ seconds.*Sleeping for 2\\.[0-9]+ seconds", log_lines)) # should return the response expect_true(response$method == "POST") expect_true(response$status_code == 429L) expect_true(response$url == "https://httpbin.org/status/429") }) ##### TEST TEAR DOWN ##### futile.logger::flog.threshold(origLogThreshold)