# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. test_that("altrep test functions do not include base altrep", { expect_false(is_arrow_altrep(1:10)) expect_identical(test_arrow_altrep_is_materialized(1:10), NA) expect_error( test_arrow_altrep_force_materialize(1:10), "is not arrow ALTREP" ) expect_error( test_arrow_altrep_copy_by_element(1:10), "is not arrow ALTREP" ) expect_error( test_arrow_altrep_copy_by_region(1:10, 1024), "is not arrow ALTREP" ) expect_error( test_arrow_altrep_copy_by_dataptr(1:10), "is not arrow ALTREP" ) }) test_that(".Internal(inspect()) prints out Arrow altrep info", { withr::local_options(list(arrow.use_altrep = TRUE)) alt <- as.vector(Array$create(1:1000)) expect_output(.Internal(inspect(alt)), "\\] arrow::array_int_vector") expect_true(test_arrow_altrep_force_materialize(alt)) expect_output(.Internal(inspect(alt)), "materialized arrow::array_int_vector") }) test_that("altrep vectors from int32 and dbl arrays with no nulls", { withr::local_options(list(arrow.use_altrep = TRUE)) v_int <- Array$create(1:1000) v_dbl <- Array$create(as.numeric(1:1000)) c_int <- ChunkedArray$create(1:1000) c_dbl <- ChunkedArray$create(as.numeric(1:1000)) expect_true(is_arrow_altrep(as.vector(v_int))) expect_true(is_arrow_altrep(as.vector(v_int$Slice(1)))) expect_true(is_arrow_altrep(as.vector(v_dbl))) expect_true(is_arrow_altrep(as.vector(v_dbl$Slice(1)))) expect_equal(c_int$num_chunks, 1L) expect_true(is_arrow_altrep(as.vector(c_int))) expect_true(is_arrow_altrep(as.vector(c_int$Slice(1)))) expect_equal(c_dbl$num_chunks, 1L) expect_true(is_arrow_altrep(as.vector(c_dbl))) expect_true(is_arrow_altrep(as.vector(c_dbl$Slice(1)))) withr::local_options(list(arrow.use_altrep = NULL)) expect_true(is_arrow_altrep(as.vector(v_int))) expect_true(is_arrow_altrep(as.vector(v_int$Slice(1)))) expect_true(is_arrow_altrep(as.vector(v_dbl))) expect_true(is_arrow_altrep(as.vector(v_dbl$Slice(1)))) withr::local_options(list(arrow.use_altrep = FALSE)) expect_false(is_arrow_altrep(as.vector(v_int))) expect_false(is_arrow_altrep(as.vector(v_int$Slice(1)))) expect_false(is_arrow_altrep(as.vector(v_dbl))) expect_false(is_arrow_altrep(as.vector(v_dbl$Slice(1)))) }) test_that("element access methods for int32 ALTREP with no nulls", { withr::local_options(list(arrow.use_altrep = TRUE)) original <- 1:1000 v_int <- Array$create(original) altrep <- as.vector(v_int) expect_false(test_arrow_altrep_is_materialized(altrep)) # altrep-aware iterating should not materialize expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_false(test_arrow_altrep_is_materialized(altrep)) # because there are no nulls, DATAPTR() does not materialize expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) expect_false(test_arrow_altrep_is_materialized(altrep)) # test element access after forcing materialization expect_true(test_arrow_altrep_force_materialize(altrep)) expect_true(test_arrow_altrep_is_materialized(altrep)) expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) }) test_that("element access methods for double ALTREP with no nulls", { withr::local_options(list(arrow.use_altrep = TRUE)) original <- as.double(1:1000) v_dbl <- Array$create(original) altrep <- as.vector(v_dbl) expect_false(test_arrow_altrep_is_materialized(altrep)) # altrep-aware iterating should not materialize expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_false(test_arrow_altrep_is_materialized(altrep)) # because there are no nulls, DATAPTR() does not materialize expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) expect_false(test_arrow_altrep_is_materialized(altrep)) # test element access after forcing materialization expect_true(test_arrow_altrep_force_materialize(altrep)) expect_true(test_arrow_altrep_is_materialized(altrep)) expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) }) test_that("altrep vectors from int32 and dbl arrays with nulls", { withr::local_options(list(arrow.use_altrep = TRUE)) v_int <- Array$create(c(1L, NA, 3L)) v_dbl <- Array$create(c(1, NA, 3)) c_int <- ChunkedArray$create(c(1L, NA, 3L)) c_dbl <- ChunkedArray$create(c(1, NA, 3)) expect_true(is_arrow_altrep(as.vector(v_int))) expect_true(is_arrow_altrep(as.vector(v_int$Slice(1)))) expect_true(is_arrow_altrep(as.vector(v_dbl))) expect_true(is_arrow_altrep(as.vector(v_dbl$Slice(1)))) expect_true(is_arrow_altrep(as.vector(c_int))) expect_true(is_arrow_altrep(as.vector(c_int$Slice(1)))) expect_true(is_arrow_altrep(as.vector(c_dbl))) expect_true(is_arrow_altrep(as.vector(c_dbl$Slice(1)))) expect_true(is_arrow_altrep(as.vector(v_int$Slice(2)))) expect_true(is_arrow_altrep(as.vector(v_dbl$Slice(2)))) expect_true(is_arrow_altrep(as.vector(c_int$Slice(2)))) expect_true(is_arrow_altrep(as.vector(c_dbl$Slice(2)))) c_int <- ChunkedArray$create(0L, c(1L, NA, 3L)) c_dbl <- ChunkedArray$create(0, c(1, NA, 3)) expect_equal(c_int$num_chunks, 2L) expect_equal(c_dbl$num_chunks, 2L) expect_true(is_arrow_altrep(as.vector(c_int))) expect_true(is_arrow_altrep(as.vector(c_dbl))) expect_true(is_arrow_altrep(as.vector(c_int$Slice(3)))) expect_true(is_arrow_altrep(as.vector(c_dbl$Slice(3)))) }) test_that("element access methods for int32 ALTREP with nulls", { withr::local_options(list(arrow.use_altrep = TRUE)) original <- c(NA, 1:1000) v_int <- Array$create(original) altrep <- as.vector(v_int) expect_false(test_arrow_altrep_is_materialized(altrep)) # altrep-aware iterating should not materialize expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_false(test_arrow_altrep_is_materialized(altrep)) # because there are no nulls, DATAPTR() does not materialize expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) expect_true(test_arrow_altrep_is_materialized(altrep)) # test element access after materialization expect_true(test_arrow_altrep_is_materialized(altrep)) expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) }) test_that("element access methods for double ALTREP with nulls", { withr::local_options(list(arrow.use_altrep = TRUE)) original <- as.double(c(NA, 1:1000)) v_dbl <- Array$create(original) altrep <- as.vector(v_dbl) expect_false(test_arrow_altrep_is_materialized(altrep)) # altrep-aware iterating should not materialize expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_false(test_arrow_altrep_is_materialized(altrep)) # because there are no nulls, DATAPTR() does not materialize expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) expect_true(test_arrow_altrep_is_materialized(altrep)) # test element access after materialization expect_true(test_arrow_altrep_is_materialized(altrep)) expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) }) test_that("altrep vectors from string arrays", { withr::local_options(list(arrow.use_altrep = TRUE)) v_chr <- Array$create(c("one", NA, "three")) c_chr <- ChunkedArray$create(c("one", NA, "three")) expect_true(is_arrow_altrep(as.vector(v_chr))) expect_true(is_arrow_altrep(as.vector(v_chr$Slice(1)))) expect_true(is_arrow_altrep(as.vector(c_chr))) expect_true(is_arrow_altrep(as.vector(c_chr$Slice(1)))) expect_true(is_arrow_altrep(as.vector(v_chr$Slice(2)))) expect_true(is_arrow_altrep(as.vector(c_chr$Slice(2)))) c_chr <- ChunkedArray$create("zero", c("one", NA, "three")) expect_equal(c_chr$num_chunks, 2L) expect_true(is_arrow_altrep(as.vector(c_chr))) expect_true(is_arrow_altrep(as.vector(c_chr$Slice(3)))) }) test_that("can't SET_STRING_ELT() on character ALTREP", { withr::local_options(list(arrow.use_altrep = TRUE)) alt <- as.vector(Array$create(c("one", "two", "three"))) expect_error( test_arrow_altrep_set_string_elt(alt, 0, "value"), "are immutable" ) }) test_that("element access methods for character ALTREP", { withr::local_options(list(arrow.use_altrep = TRUE)) original <- as.character(c(NA, 1:1000)) v_chr <- Array$create(original) altrep <- as.vector(v_chr) expect_false(test_arrow_altrep_is_materialized(altrep)) # altrep-aware iterating should not materialize expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_false(test_arrow_altrep_is_materialized(altrep)) # DATAPTR() should always materialize for strings expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) expect_true(test_arrow_altrep_is_materialized(altrep)) # test element access after materialization expect_true(test_arrow_altrep_is_materialized(altrep)) expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) }) test_that("element access methods for character ALTREP from large_utf8()", { withr::local_options(list(arrow.use_altrep = TRUE)) original <- as.character(c(NA, 1:1000)) v_chr <- Array$create(original, type = large_utf8()) altrep <- as.vector(v_chr) expect_false(test_arrow_altrep_is_materialized(altrep)) # altrep-aware iterating should not materialize expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_false(test_arrow_altrep_is_materialized(altrep)) # DATAPTR() should always materialize for strings expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) expect_true(test_arrow_altrep_is_materialized(altrep)) # test element access after materialization expect_true(test_arrow_altrep_is_materialized(altrep)) expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_identical(test_arrow_altrep_copy_by_dataptr(altrep), original) }) test_that("empty vectors are not altrep", { withr::local_options(list(arrow.use_altrep = TRUE)) v_int <- Array$create(integer()) v_dbl <- Array$create(numeric()) v_str <- Array$create(character()) expect_false(is_arrow_altrep(as.vector(v_int))) expect_false(is_arrow_altrep(as.vector(v_dbl))) expect_false(is_arrow_altrep(as.vector(v_str))) }) test_that("ChunkedArray sith 0 chunks are not altrep", { z_int <- ChunkedArray$create(type = int32()) z_dbl <- ChunkedArray$create(type = float64()) z_str <- ChunkedArray$create(type = utf8()) expect_false(is_arrow_altrep(as.vector(z_int))) expect_false(is_arrow_altrep(as.vector(z_dbl))) expect_false(is_arrow_altrep(as.vector(z_str))) }) test_that("chunked array become altrep", { s1 <- c("un", "deux", NA) s2 <- c("quatre", "cinq") a <- Array$create(s1) v <- a$as_vector() expect_equal(v, s1) expect_true(is_arrow_altrep(v)) ca <- ChunkedArray$create(s1, s2) cv <- ca$as_vector() expect_equal(cv, c(s1, s2)) expect_true(is_arrow_altrep(cv)) # chunked array with 2 chunks c_int <- ChunkedArray$create(0L, c(1L, NA, 3L)) c_dbl <- ChunkedArray$create(0, c(1, NA, 3)) expect_equal(c_int$num_chunks, 2L) expect_equal(c_dbl$num_chunks, 2L) expect_true(is_arrow_altrep(as.vector(c_int))) expect_true(is_arrow_altrep(as.vector(c_dbl))) }) test_that("as.data.frame(