From fc5ed2a9558e2fb6ada822b9f066712a67fb31f8 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 7 Oct 2024 16:54:30 -0500 Subject: [PATCH] feat(r): Add float16 support for R bindings (#650) I'd forgotten to do this after the addition of float16 <-> double conversion in the C library! ``` r library(nanoarrow) array <- as_nanoarrow_array(1.23 + 1:5, schema = na_half_float()) convert_array(array) #> [1] 2.228516 3.228516 4.226562 5.226562 6.226562 ``` Created on 2024-10-07 with [reprex v2.1.1](https://reprex.tidyverse.org) --- r/src/as_array.c | 21 +++++++++++++- r/src/infer_ptype.c | 1 + r/src/materialize_dbl.h | 1 + r/tests/testthat/test-as-array.R | 40 +++++++++++++++++++++++++++ r/tests/testthat/test-convert-array.R | 1 + 5 files changed, 63 insertions(+), 1 deletion(-) diff --git a/r/src/as_array.c b/r/src/as_array.c index 4825033d8..aeb4930d2 100644 --- a/r/src/as_array.c +++ b/r/src/as_array.c @@ -188,6 +188,8 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr // (mostly so that we can support date/time types with various units) switch (schema_view->type) { case NANOARROW_TYPE_DOUBLE: + case NANOARROW_TYPE_FLOAT: + case NANOARROW_TYPE_HALF_FLOAT: case NANOARROW_TYPE_INT64: case NANOARROW_TYPE_INT32: break; @@ -228,7 +230,7 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr buffer->size_bytes = len * sizeof(int64_t); - } else { + } else if (schema_view->type == NANOARROW_TYPE_INT32) { // double -> int32_t struct ArrowBuffer* buffer = ArrowArrayBuffer(array, 1); result = ArrowBufferReserve(buffer, len * sizeof(int32_t)); @@ -257,6 +259,23 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr } buffer->size_bytes = len * sizeof(int32_t); + } else { + result = ArrowArrayStartAppending(array); + if (result != NANOARROW_OK) { + Rf_error("ArrowArrayStartAppending() failed"); + } + + result = ArrowArrayReserve(array, len); + if (result != NANOARROW_OK) { + Rf_error("ArrowArrayReserve() failed"); + } + + for (int64_t i = 0; i < len; i++) { + result = ArrowArrayAppendDouble(array, x_data[i]); + if (result != NANOARROW_OK) { + Rf_error("ArrowArrayAppendDouble() failed"); + } + } } // Set the array fields diff --git a/r/src/infer_ptype.c b/r/src/infer_ptype.c index 2a3527586..37ef6e331 100644 --- a/r/src/infer_ptype.c +++ b/r/src/infer_ptype.c @@ -49,6 +49,7 @@ enum VectorType nanoarrow_infer_vector_type(enum ArrowType type) { case NANOARROW_TYPE_UINT32: case NANOARROW_TYPE_INT64: case NANOARROW_TYPE_UINT64: + case NANOARROW_TYPE_HALF_FLOAT: case NANOARROW_TYPE_FLOAT: case NANOARROW_TYPE_DOUBLE: case NANOARROW_TYPE_DECIMAL128: diff --git a/r/src/materialize_dbl.h b/r/src/materialize_dbl.h index a69b4eb70..f24ff07de 100644 --- a/r/src/materialize_dbl.h +++ b/r/src/materialize_dbl.h @@ -69,6 +69,7 @@ static inline int nanoarrow_materialize_dbl(struct RConverter* converter) { case NANOARROW_TYPE_UINT16: case NANOARROW_TYPE_INT32: case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_HALF_FLOAT: case NANOARROW_TYPE_FLOAT: // No need to bounds check these types for (R_xlen_t i = 0; i < dst->length; i++) { diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R index ed65ecd1d..1a95b643f 100644 --- a/r/tests/testthat/test-as-array.R +++ b/r/tests/testthat/test-as-array.R @@ -197,6 +197,46 @@ test_that("as_nanoarrow_array() works for double() -> na_int64()", { expect_identical(convert_array(array), as.double(c(1:10, NA_real_))) }) +test_that("as_nanoarrow_array() works for double() -> na_float()", { + # Without nulls + array <- as_nanoarrow_array(as.double(1:10), schema = na_float()) + expect_identical(infer_nanoarrow_schema(array)$format, "f") + expect_identical(as.raw(array$buffers[[1]]), raw()) + expect_identical(array$offset, 0L) + expect_identical(array$null_count, 0L) + expect_identical(convert_array(array), as.double(1:10)) + + # With nulls + array <- as_nanoarrow_array(c(1:10, NA_real_), schema = na_float()) + expect_identical(infer_nanoarrow_schema(array)$format, "f") + expect_identical(array$null_count, 1L) + expect_identical( + as.raw(array$buffers[[1]]), + packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5))) + ) + expect_identical(convert_array(array), c(1:10, NA_real_)) +}) + +test_that("as_nanoarrow_array() works for double() -> na_half_float()", { + # Without nulls + array <- as_nanoarrow_array(as.double(1:10), schema = na_half_float()) + expect_identical(infer_nanoarrow_schema(array)$format, "e") + expect_identical(as.raw(array$buffers[[1]]), raw()) + expect_identical(array$offset, 0L) + expect_identical(array$null_count, 0L) + expect_identical(convert_array(array), as.double(1:10)) + + # With nulls + array <- as_nanoarrow_array(c(1:10, NA_real_), schema = na_half_float()) + expect_identical(infer_nanoarrow_schema(array)$format, "e") + expect_identical(array$null_count, 1L) + expect_identical( + as.raw(array$buffers[[1]]), + packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5))) + ) + expect_identical(convert_array(array), c(1:10, NA_real_)) +}) + test_that("as_nanoarrow_array() works for integer64() -> na_int32()", { skip_if_not_installed("bit64") diff --git a/r/tests/testthat/test-convert-array.R b/r/tests/testthat/test-convert-array.R index bf7f45c95..42cea423b 100644 --- a/r/tests/testthat/test-convert-array.R +++ b/r/tests/testthat/test-convert-array.R @@ -560,6 +560,7 @@ test_that("convert to vector works for valid double()", { uint32 = arrow::uint32(), int64 = arrow::int64(), uint64 = arrow::uint64(), + float16 = arrow::float16(), float32 = arrow::float32(), float64 = arrow::float64() )