# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. test_that("as_nanoarrow_array() works for nanoarrow_array", { array <- as_nanoarrow_array(1:10) expect_identical(as_nanoarrow_array(array), array) array <- as_nanoarrow_array(1:10, schema = na_int32()) expect_identical(as_nanoarrow_array(array), array) skip_if_not_installed("arrow") casted <- as_nanoarrow_array(array, schema = na_int64()) expect_identical(infer_nanoarrow_schema(casted)$format, "l") expect_identical(convert_array(casted), as.double(1:10)) }) test_that("as_nanoarrow_array() works for logical() -> na_bool()", { # Without nulls array <- as_nanoarrow_array(c(TRUE, FALSE, TRUE, FALSE), schema = na_bool()) expect_identical(infer_nanoarrow_schema(array)$format, "b") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[2]]), as.raw(packBits(c(TRUE, FALSE, TRUE, FALSE, rep(FALSE, 4)))) ) # With nulls array <- as_nanoarrow_array(c(TRUE, FALSE, NA), schema = na_bool()) expect_identical(infer_nanoarrow_schema(array)$format, "b") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 2), FALSE, rep(FALSE, 5))) ) expect_identical( as.raw(array$buffers[[2]]), as.raw(packBits(c(TRUE, FALSE, FALSE, rep(FALSE, 5)))) ) }) test_that("as_nanoarrow_array() errors for bad logical() creation", { skip_if_not_installed("arrow") expect_snapshot_error( as_nanoarrow_array(TRUE, schema = na_string()) ) }) test_that("as_nanoarrow_array() works for logical() -> na_int32()", { # Without nulls array <- as_nanoarrow_array(c(TRUE, FALSE, TRUE, FALSE), schema = na_int32()) expect_identical(infer_nanoarrow_schema(array)$format, "i") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(TRUE, FALSE, TRUE, FALSE))) ) # With nulls array <- as_nanoarrow_array(c(TRUE, FALSE, NA), schema = na_int32()) expect_identical(infer_nanoarrow_schema(array)$format, "i") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 2), FALSE, rep(FALSE, 5))) ) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(TRUE, FALSE, NA))) ) }) test_that("as_nanoarrow_array() works for integer() -> na_int32()", { # Without nulls array <- as_nanoarrow_array(1:10) expect_identical(infer_nanoarrow_schema(array)$format, "i") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical(as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(1:10))) # With nulls array <- as_nanoarrow_array(c(1:10, NA)) expect_identical(infer_nanoarrow_schema(array)$format, "i") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5))) ) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(1:10, NA))) ) }) test_that("as_nanoarrow_array() works for integer -> na_int64()", { skip_if_not_installed("arrow") casted <- as_nanoarrow_array(1:10, schema = na_int64()) expect_identical(infer_nanoarrow_schema(casted)$format, "l") expect_identical(convert_array(casted), as.double(1:10)) }) test_that("as_nanoarrow_array() works for double() -> na_double()", { # Without nulls array <- as_nanoarrow_array(as.double(1:10)) expect_identical(infer_nanoarrow_schema(array)$format, "g") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(as.double(1:10))) ) # With nulls array <- as_nanoarrow_array(c(1:10, NA_real_)) expect_identical(infer_nanoarrow_schema(array)$format, "g") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5))) ) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(1:10, NA_real_))) ) }) test_that("as_nanoarrow_array() works for double() -> na_int32()", { # Without nulls array <- as_nanoarrow_array(as.double(1:10), schema = na_int32()) expect_identical(infer_nanoarrow_schema(array)$format, "i") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(1:10)) ) # With nulls array <- as_nanoarrow_array(c(1:10, NA_real_), schema = na_int32()) expect_identical(infer_nanoarrow_schema(array)$format, "i") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5))) ) # The last element here is 0 because (int)nan is undefined behaviour expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(1:10, 0L))) ) # With overflow expect_warning( as_nanoarrow_array(.Machine$integer.max + as.double(1:5), schema = na_int32()), class = "nanoarrow_warning_lossy_conversion" ) }) test_that("as_nanoarrow_array() works for double() -> na_int64()", { # Without nulls array <- as_nanoarrow_array(as.double(1:10), schema = na_int64()) expect_identical(infer_nanoarrow_schema(array)$format, "l") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) # This *is* how we create int64 buffers, so just check the roundtrip expect_identical(convert_array(array), as.double(1:10)) # With nulls array <- as_nanoarrow_array(c(1:10, NA_real_), schema = na_int64()) expect_identical(infer_nanoarrow_schema(array)$format, "l") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5))) ) expect_identical(convert_array(array), as.double(c(1:10, NA_real_))) }) test_that("as_nanoarrow_array() works for integer64() -> na_int32()", { skip_if_not_installed("bit64") # Without nulls array <- as_nanoarrow_array(bit64::as.integer64(1:10), schema = na_int32()) expect_identical(infer_nanoarrow_schema(array)$format, "i") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(1:10)) ) # With nulls array <- as_nanoarrow_array(bit64::as.integer64(c(1:10, NA_real_)), schema = na_int32()) expect_identical(infer_nanoarrow_schema(array)$format, "i") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5))) ) # The last element here is 0 because (int)nan is undefined behaviour expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(1:10, 0L))) ) }) test_that("as_nanoarrow_array() works for integer64() -> na_int64()", { skip_if_not_installed("bit64") # Default roundtrip array <- as_nanoarrow_array(bit64::as.integer64(1:10)) expect_identical(convert_array(array, double()), as.double(1:10)) # Without nulls array <- as_nanoarrow_array(bit64::as.integer64(1:10), schema = na_int64()) expect_identical(infer_nanoarrow_schema(array)$format, "l") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) # This *is* how we create int64 buffers, so just check the roundtrip expect_identical(convert_array(array, double()), as.double(1:10)) # With nulls array <- as_nanoarrow_array(bit64::as.integer64(c(1:10, NA_real_)), schema = na_int64()) expect_identical(infer_nanoarrow_schema(array)$format, "l") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5))) ) expect_identical(convert_array(array, double()), as.double(c(1:10, NA_real_))) }) test_that("as_nanoarrow_array() works for double -> na_int8()", { skip_if_not_installed("arrow") casted <- as_nanoarrow_array(as.double(1:10), schema = na_int8()) expect_identical(infer_nanoarrow_schema(casted)$format, "c") expect_identical(convert_array(casted), 1:10) }) test_that("as_nanoarrow_array() works for character() -> na_string()", { # Without nulls array <- as_nanoarrow_array(letters) expect_identical(infer_nanoarrow_schema(array)$format, "u") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(0:26)) ) expect_identical( as.raw(array$buffers[[3]]), as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))) ) # With nulls array <- as_nanoarrow_array(c(letters, NA)) expect_identical(infer_nanoarrow_schema(array)$format, "u") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5))) ) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(0:26, 26L))) ) expect_identical( as.raw(array$buffers[[3]]), as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))) ) }) test_that("as_nanoarrow_array() works for character() -> na_large_string()", { skip_if_not_installed("arrow") # Without nulls array <- as_nanoarrow_array(letters, schema = na_large_string()) expect_identical(infer_nanoarrow_schema(array)$format, "U") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[3]]), as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))) ) # With nulls array <- as_nanoarrow_array(c(letters, NA), schema = na_large_string()) expect_identical(infer_nanoarrow_schema(array)$format, "U") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5))) ) expect_identical( as.raw(array$buffers[[3]]), as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))) ) }) test_that("as_nanoarrow_array() works for factor() -> na_dictionary()", { array <- as_nanoarrow_array( factor(letters), schema = na_dictionary(na_string(), na_int32()) ) expect_identical(infer_nanoarrow_schema(array)$format, "i") expect_identical(infer_nanoarrow_schema(array$dictionary)$format, "u") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(0:25)) ) expect_identical( as.raw(array$dictionary$buffers[[3]]), charToRaw(paste0(letters, collapse = "")) ) }) test_that("as_nanoarrow_array() works for factor() -> na_string()", { array <- as_nanoarrow_array( factor(letters), schema = na_string() ) expect_identical(infer_nanoarrow_schema(array)$format, "u") expect_null(array$dictionary) expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(0:26)) ) expect_identical( as.raw(array$buffers[[3]]), charToRaw(paste0(letters, collapse = "")) ) }) test_that("as_nanoarrow_array() works for data.frame() -> na_struct()", { array <- as_nanoarrow_array(data.frame(x = 1:10)) expect_identical(array$length, 10L) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical(infer_nanoarrow_schema(array)$format, "+s") expect_identical(infer_nanoarrow_schema(array$children$x)$format, "i") expect_identical(as.raw(array$children$x$buffers[[2]]), as.raw(as_nanoarrow_buffer(1:10))) }) test_that("as_nanoarrow_array() errors for bad data.frame() -> na_struct()", { expect_error( as_nanoarrow_array(data.frame(x = 1:10), schema = na_struct()), "Expected 1 schema children" ) skip_if_not_installed("arrow") expect_snapshot_error( as_nanoarrow_array(data.frame(x = 1:10), schema = na_int32()) ) }) test_that("as_nanoarrow_array() works for Date -> na_date32()", { array <- as_nanoarrow_array(as.Date(c("2000-01-01", "2023-02-03", NA))) expect_identical(infer_nanoarrow_schema(array)$format, "tdD") expect_identical(array$length, 3L) expect_identical(array$null_count, 1L) expect_identical(as.raw(array$buffers[[1]]), as.raw(0x03)) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(10957L, 19391L, NA))) ) }) test_that("as_nanoarrow_array() works for Date -> na_date64()", { array <- as_nanoarrow_array( as.Date(c("2000-01-01", "2023-02-03", NA)), schema = na_date64() ) expect_identical(infer_nanoarrow_schema(array)$format, "tdm") expect_identical(array$length, 3L) expect_identical(array$null_count, 1L) expect_identical(as.raw(array$buffers[[1]]), as.raw(0x03)) storage <- as_nanoarrow_array( c(10957L, 19391L, NA) * 86400000, schema = na_int64() ) expect_identical( as.raw(array$buffers[[2]]), as.raw(storage$buffers[[2]]) ) }) test_that("as_nanoarrow_array() works for POSIXct -> na_timestamp()", { array <- as_nanoarrow_array( as.POSIXct(c("2000-01-01", "2023-02-03", NA), tz = "UTC"), schema = na_timestamp("ms", timezone = "UTC") ) expect_identical(infer_nanoarrow_schema(array)$format, "tsm:UTC") expect_identical(array$length, 3L) expect_identical(array$null_count, 1L) expect_identical(as.raw(array$buffers[[1]]), as.raw(0x03)) storage <- as_nanoarrow_array( c(10957L, 19391L, NA) * 86400000, schema = na_int64() ) expect_identical( as.raw(array$buffers[[2]]), as.raw(storage$buffers[[2]]) ) }) test_that("as_nanoarrow_array() works for difftime -> na_duration()", { array <- as_nanoarrow_array( as.difftime(c(1:5, NA), units = "secs"), schema = na_duration("ms") ) expect_identical(infer_nanoarrow_schema(array)$format, "tDm") expect_identical(array$length, 6L) expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 5), FALSE, rep(FALSE, 2))) ) storage <- as_nanoarrow_array( c(1:5, NA) * 1000, schema = na_int64() ) expect_identical( as.raw(array$buffers[[2]]), as.raw(storage$buffers[[2]]) ) }) test_that("as_nanoarrow_array() works for blob::blob() -> na_binary()", { skip_if_not_installed("blob") # Without nulls array <- as_nanoarrow_array(blob::as_blob(letters)) expect_identical(infer_nanoarrow_schema(array)$format, "z") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(0:26)) ) expect_identical( as.raw(array$buffers[[3]]), as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))) ) # With nulls array <- as_nanoarrow_array(blob::as_blob(c(letters, NA))) expect_identical(infer_nanoarrow_schema(array)$format, "z") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5))) ) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(0:26, 26L))) ) expect_identical( as.raw(array$buffers[[3]]), as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))) ) }) test_that("as_nanoarrow_array() works for blob::blob() -> na_large_binary()", { skip_if_not_installed("arrow") # Without nulls array <- as_nanoarrow_array(blob::as_blob(letters), schema = na_large_binary()) expect_identical(infer_nanoarrow_schema(array)$format, "Z") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[3]]), as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))) ) # With nulls array <- as_nanoarrow_array( blob::as_blob(c(letters, NA)), schema = na_large_binary() ) expect_identical(infer_nanoarrow_schema(array)$format, "Z") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5))) ) expect_identical( as.raw(array$buffers[[3]]), as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))) ) }) test_that("as_nanoarrow_array() works for list(raw()) -> na_binary()", { # Without nulls array <- as_nanoarrow_array(lapply(letters, charToRaw)) expect_identical(infer_nanoarrow_schema(array)$format, "z") expect_identical(as.raw(array$buffers[[1]]), raw()) expect_identical(array$offset, 0L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(0:26)) ) expect_identical( as.raw(array$buffers[[3]]), as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))) ) # With nulls array <- as_nanoarrow_array(c(lapply(letters, charToRaw), list(NULL))) expect_identical(infer_nanoarrow_schema(array)$format, "z") expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5))) ) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(0:26, 26L))) ) expect_identical( as.raw(array$buffers[[3]]), as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))) ) }) test_that("as_nanoarrow_array() works for list(NULL) -> na_list(na_na())", { array <- as_nanoarrow_array(list(NULL)) expect_identical(infer_nanoarrow_schema(array)$format, "+l") expect_identical(array$length, 1L) expect_identical(array$null_count, 1L) expect_identical( as.raw(array$buffers[[1]]), as.raw(as_nanoarrow_array(FALSE)$buffers[[2]]) ) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(0L, 0L))) ) expect_identical(infer_nanoarrow_schema(array$children[[1]])$format, "n") expect_identical(array$children[[1]]$length, 0L) }) test_that("as_nanoarrow_array() works for list(integer()) -> na_list(na_int32())", { array <- as_nanoarrow_array(list(1:5, 6:10), schema = na_list(na_int32())) expect_identical(infer_nanoarrow_schema(array)$format, "+l") expect_identical(array$length, 2L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[1]]), as.raw(as_nanoarrow_array(c(TRUE, TRUE))$buffers[[2]]) ) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(0L, 5L, 10L))) ) expect_identical(infer_nanoarrow_schema(array$children[[1]])$format, "i") expect_identical(array$children[[1]]$length, 10L) }) test_that("as_nanoarrow_array() works for unspecified() -> na_na()", { skip_if_not_installed("vctrs") array <- as_nanoarrow_array(vctrs::unspecified(5)) expect_identical(infer_nanoarrow_schema(array)$format, "n") expect_identical(array$length, 5L) expect_identical(array$null_count, 5L) }) test_that("as_nanoarrow_array() works for bad unspecified() create", { skip_if_not_installed("vctrs") skip_if_not_installed("arrow") expect_snapshot_error( as_nanoarrow_array(vctrs::unspecified(5), schema = na_interval_day_time()) ) }) test_that("as_nanoarrow_array() can convert data.frame() to sparse_union()", { # Features: At least one element with more than one non-NA value, # one element with all NA values. test_df <- data.frame( lgl = c(TRUE, NA, NA, NA, NA, FALSE), int = c(NA, 123L, NA, NA, NA, NA), dbl = c(NA, NA, 456, NA, NA, NA), chr = c(NA, NA, NA, "789", NA, NA), stringsAsFactors = FALSE ) array <- as_nanoarrow_array( test_df, schema = na_sparse_union(lapply(test_df, infer_nanoarrow_schema)) ) expect_identical(infer_nanoarrow_schema(array)$format, "+us:0,1,2,3") expect_identical(array$length, 6L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[1]]), as.raw(as_nanoarrow_buffer(as.raw(c(0L, 1L, 2L, 3L, 0L, 0L)))) ) expect_identical( lapply(array$children, convert_array), lapply(test_df, identity) ) expect_identical(convert_array(array), test_df) }) test_that("as_nanoarrow_array() can convert data.frame() to sparse_union()", { test_df <- data.frame( lgl = c(TRUE, NA, NA, NA, NA, FALSE), int = c(NA, 123L, NA, NA, NA, NA), dbl = c(NA, NA, 456, NA, NA, NA), chr = c(NA, NA, NA, "789", NA, NA), stringsAsFactors = FALSE ) array <- as_nanoarrow_array( test_df, schema = na_dense_union(lapply(test_df, infer_nanoarrow_schema)) ) expect_identical(infer_nanoarrow_schema(array)$format, "+ud:0,1,2,3") expect_identical(array$length, 6L) expect_identical(array$null_count, 0L) expect_identical( as.raw(array$buffers[[1]]), as.raw(as_nanoarrow_buffer(as.raw(c(0L, 1L, 2L, 3L, 0L, 0L)))) ) expect_identical( as.raw(array$buffers[[2]]), as.raw(as_nanoarrow_buffer(c(0L, 0L, 0L, 0L, 1L, 2L))) ) expect_identical( lapply(array$children, convert_array), list( lgl = c(TRUE, NA, FALSE), int = 123L, dbl = 456, chr = "789" ) ) expect_identical(convert_array(array), test_df) }) test_that("as_nanoarrow_array() for union type errors for unsupported objects", { expect_error( as_nanoarrow_array(data.frame(), schema = na_dense_union()), "Can't convert data frame with 0 columns" ) })