Skip to content

Commit bed1949

Browse files
authored
feat: Add support for Decimal32/64 to R package (#717)
This PR adds support for decimal32, 64, and 256, and migrates support for decimal128 to use nanoarrow's internals rather than call Arrow. This removes the last dependency on arrow for conversion from Arrow -> R, for which some (undiagnosed as of yet) bug results in very slow conversions when many batches are involved ( apache/arrow-adbc#2508 ). Decimal conversions show up in database land quite lot because decimal types are common in database output (notably: Snowflake). To support the types that the arrow package does not yet support (Decimal32, 64, and 256), this PR also implements conversion from R -> Arrow for decimal types. This conversion is a little hacky because it involves strings, but the basic idea is to use the fact that R has all the functionality needed (rounding, appending adding zeroes, etc.). This code path is probably rare because one has to explicitly opt-in to a decimal type but I put it in because it's helpful to ensure we (and others) can create test data or specific typed input for a database append if required. ``` r library(nanoarrow) # Can create a decimal array from integer or double (dec_array <- as_nanoarrow_array(-10:10, schema = na_decimal32(9, 2))) #> <nanoarrow_array decimal32(9, 2)[21]> #> $ length : int 21 #> $ null_count: int 0 #> $ offset : int 0 #> $ buffers :List of 2 #> ..$ :<nanoarrow_buffer validity<bool>[null] `` #> ..$ :<nanoarrow_buffer data<decimal32>[21][84 b]> `-1000 -900 -800 -700 -6...` #> $ dictionary: NULL #> $ children : list() # Default conversion to R is a double, which matches what arrow does # (and what nanoarrow did before this change for decimal128) str(convert_array(dec_array)) #> num [1:21] -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 ... # One can also convert to character(), which keeps better fidelity for # out-of-range types. str(convert_array(dec_array, character())) #> chr [1:21] "-10.00" "-9.00" "-8.00" "-7.00" "-6.00" "-5.00" "-4.00" ... # Print method for decimal buffers! dec_array$buffers[[2]] #> <nanoarrow_buffer data<decimal32>[21][84 b]> `-1000 -900 -800 -700 -600 -500...` ``` <sup>Created on 2025-03-15 with [reprex v2.1.1](https://reprex.tidyverse.org)</sup>
1 parent 48c6564 commit bed1949

22 files changed

+579
-71
lines changed

r/NAMESPACE

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ S3method(as_nanoarrow_schema,nanoarrow_schema)
6363
S3method(as_nanoarrow_schema,nanoarrow_vctr)
6464
S3method(c,nanoarrow_vctr)
6565
S3method(convert_array,default)
66-
S3method(convert_array,double)
6766
S3method(convert_array,factor)
6867
S3method(convert_array,nanoarrow_vctr)
6968
S3method(convert_array,vctrs_partial_frame)
@@ -155,6 +154,8 @@ export(na_date32)
155154
export(na_date64)
156155
export(na_decimal128)
157156
export(na_decimal256)
157+
export(na_decimal32)
158+
export(na_decimal64)
158159
export(na_dense_union)
159160
export(na_dictionary)
160161
export(na_double)
@@ -173,8 +174,10 @@ export(na_interval_month_day_nano)
173174
export(na_interval_months)
174175
export(na_large_binary)
175176
export(na_large_list)
177+
export(na_large_list_view)
176178
export(na_large_string)
177179
export(na_list)
180+
export(na_list_view)
178181
export(na_map)
179182
export(na_na)
180183
export(na_sparse_union)

r/R/as-array.R

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,3 +475,29 @@ as_nanoarrow_array_from_c <- function(x, schema) {
475475

476476
result
477477
}
478+
479+
# Helper to allow us to use nanoarrow's string parser, which parses integers
480+
# to set decimal storage but not the slightly more useful case of parsing
481+
# things with decimal points yet.
482+
storage_integer_for_decimal <- function(numbers, scale) {
483+
rounded_formatted <- storage_decimal_for_decimal(numbers, scale)
484+
gsub(".", "", rounded_formatted, fixed = TRUE)
485+
}
486+
487+
storage_decimal_for_decimal <- function(numbers, scale) {
488+
if (scale > 0) {
489+
rounded_formatted <- sprintf("%0.*f", scale, numbers)
490+
rounded_formatted[is.na(numbers)] <- NA_character_
491+
} else {
492+
rounded <- round(numbers, scale)
493+
is_zero <- !is.na(rounded) & rounded == 0
494+
rounded_formatted <- as.character(rounded)
495+
rounded_formatted[!is_zero] <- gsub(
496+
paste0("0{", -scale, "}$"),
497+
"",
498+
rounded_formatted[!is_zero]
499+
)
500+
}
501+
502+
rounded_formatted
503+
}

r/R/buffer.R

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,24 @@ as_nanoarrow_array.nanoarrow_buffer <- function(x, ..., schema = NULL) {
225225
buffers = list(NULL, offsets, x)
226226
)
227227
)
228+
} else if (data_type %in% c("decimal32", "decimal64", "decimal128", "decimal256")) {
229+
# Create an array with max precision and scale 0, which results in the
230+
# decimal integer value getting displayed.
231+
array <- nanoarrow_array_init(
232+
na_type(
233+
data_type,
234+
precision = max_decimal_precision(data_type),
235+
scale = 0
236+
)
237+
)
238+
nanoarrow_array_modify(
239+
array,
240+
list(
241+
length = logical_length,
242+
null_count = 0,
243+
buffers = list(NULL, x)
244+
)
245+
)
228246
} else if (data_type %in% c("string_view", "binary_view")) {
229247
stop("Can't convert buffer of type string_view or binary_view to array")
230248
} else {

r/R/convert-array.R

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -155,26 +155,6 @@ convert_array.nanoarrow_vctr <- function(array, to, ...) {
155155
new_nanoarrow_vctr(list(array), schema, class(to))
156156
}
157157

158-
#' @export
159-
convert_array.double <- function(array, to, ...) {
160-
# Handle conversion from decimal128 via arrow
161-
schema <- infer_nanoarrow_schema(array)
162-
parsed <- nanoarrow_schema_parse(schema)
163-
if (parsed$type == "decimal128") {
164-
assert_arrow_installed(
165-
sprintf(
166-
"convert %s array to object of type double",
167-
nanoarrow_schema_formatted(schema)
168-
)
169-
)
170-
171-
arrow_array <- as_arrow_array.nanoarrow_array(array)
172-
arrow_array$as_vector()
173-
} else {
174-
NextMethod()
175-
}
176-
}
177-
178158
#' @export
179159
convert_array.vctrs_partial_frame <- function(array, to, ...) {
180160
ptype <- infer_nanoarrow_ptype(array)

r/R/type.R

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
#' na_struct(list(col1 = na_int32()))
6666
#'
6767
na_type <- function(type_name, byte_width = NULL, unit = NULL, timezone = NULL,
68+
precision = NULL, scale = NULL,
6869
column_types = NULL, item_type = NULL, key_type = NULL,
6970
value_type = NULL, index_type = NULL, ordered = NULL,
7071
list_size = NULL, keys_sorted = NULL, storage_type = NULL,
@@ -76,6 +77,8 @@ na_type <- function(type_name, byte_width = NULL, unit = NULL, timezone = NULL,
7677
byte_width = byte_width,
7778
unit = unit,
7879
timezone = timezone,
80+
precision = precision,
81+
scale = scale,
7982
column_types = column_types,
8083
item_type = item_type,
8184
key_type = key_type,
@@ -307,6 +310,30 @@ na_timestamp <- function(unit = c("us", "ns", "s", "ms"), timezone = "", nullabl
307310
)
308311
}
309312

313+
#' @rdname na_type
314+
#' @export
315+
na_decimal32 <- function(precision, scale, nullable = TRUE) {
316+
.Call(
317+
nanoarrow_c_schema_init_decimal,
318+
NANOARROW_TYPE$DECIMAL32,
319+
as.integer(precision)[1],
320+
as.integer(scale)[1],
321+
isTRUE(nullable)
322+
)
323+
}
324+
325+
#' @rdname na_type
326+
#' @export
327+
na_decimal64 <- function(precision, scale, nullable = TRUE) {
328+
.Call(
329+
nanoarrow_c_schema_init_decimal,
330+
NANOARROW_TYPE$DECIMAL64,
331+
as.integer(precision)[1],
332+
as.integer(scale)[1],
333+
isTRUE(nullable)
334+
)
335+
}
336+
310337
#' @rdname na_type
311338
#' @export
312339
na_decimal128 <- function(precision, scale, nullable = TRUE) {
@@ -371,6 +398,22 @@ na_large_list <- function(item_type, nullable = TRUE) {
371398
schema
372399
}
373400

401+
#' @rdname na_type
402+
#' @export
403+
na_list_view <- function(item_type, nullable = TRUE) {
404+
schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LIST_VIEW, isTRUE(nullable))
405+
schema$children[[1]] <- item_type
406+
schema
407+
}
408+
409+
#' @rdname na_type
410+
#' @export
411+
na_large_list_view <- function(item_type, nullable = TRUE) {
412+
schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LARGE_LIST_VIEW, isTRUE(nullable))
413+
schema$children[[1]] <- item_type
414+
schema
415+
}
416+
374417
#' @rdname na_type
375418
#' @export
376419
na_fixed_size_list <- function(item_type, list_size, nullable = TRUE) {
@@ -430,6 +473,17 @@ time_unit_id <- function(time_unit) {
430473
match(time_unit, c("s", "ms", "us", "ns")) - 1L
431474
}
432475

476+
max_decimal_precision <- function(type) {
477+
switch(
478+
type,
479+
decimal32 = 9,
480+
decimal64 = 18,
481+
decimal128 = 38,
482+
decimal256 = 76,
483+
stop(sprintf("non-decimal type name: %s", type))
484+
)
485+
}
486+
433487
# These values aren't guaranteed to stay stable between nanoarrow versions,
434488
# so we keep them internal but use them in these functions to simplify the
435489
# number of C functions we need to build all the types.
@@ -475,7 +529,11 @@ NANOARROW_TYPE <- list(
475529
INTERVAL_MONTH_DAY_NANO = 38L,
476530
RUN_END_ENCODED = 39L,
477531
BINARY_VIEW = 40L,
478-
STRING_VIEW = 41L
532+
STRING_VIEW = 41L,
533+
DECIMAL32 = 42L,
534+
DECIMAL64 = 43L,
535+
LIST_VIEW = 44L,
536+
LARGE_LIST_VIEW = 45L
479537
)
480538

481539
ARROW_FLAG <- list(

r/man/na_type.Rd

Lines changed: 18 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/nanoarrow.Rproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Version: 1.0
2+
ProjectId: 66584247-a2ea-4bae-bcbd-86d4e09bc627
23

34
RestoreWorkspace: No
45
SaveWorkspace: No

r/src/as_array.c

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,29 @@ static void call_as_nanoarrow_array(SEXP x_sexp, struct ArrowArray* array,
4646
UNPROTECT(3);
4747
}
4848

49+
static SEXP call_storage_integer_for_decimal(SEXP x_sexp, int scale) {
50+
SEXP scale_sexp = PROTECT(Rf_ScalarInteger(scale));
51+
SEXP fun = PROTECT(Rf_install("storage_integer_for_decimal"));
52+
SEXP call = PROTECT(Rf_lang3(fun, x_sexp, scale_sexp));
53+
SEXP result = PROTECT(Rf_eval(call, nanoarrow_ns_pkg));
54+
UNPROTECT(4);
55+
return result;
56+
}
57+
58+
static void as_decimal_array(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr,
59+
struct ArrowSchemaView* schema_view,
60+
struct ArrowError* error);
61+
4962
static void as_array_int(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr,
5063
struct ArrowSchemaView* schema_view, struct ArrowError* error) {
5164
// Consider integer -> numeric types that are easy to implement
5265
switch (schema_view->type) {
66+
case NANOARROW_TYPE_DECIMAL32:
67+
case NANOARROW_TYPE_DECIMAL64:
68+
case NANOARROW_TYPE_DECIMAL128:
69+
case NANOARROW_TYPE_DECIMAL256:
70+
as_decimal_array(x_sexp, array, schema_xptr, schema_view, error);
71+
return;
5372
case NANOARROW_TYPE_DOUBLE:
5473
case NANOARROW_TYPE_FLOAT:
5574
case NANOARROW_TYPE_HALF_FLOAT:
@@ -215,6 +234,12 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr
215234
// Consider double -> na_double() and double -> na_int64()/na_int32()
216235
// (mostly so that we can support date/time types with various units)
217236
switch (schema_view->type) {
237+
case NANOARROW_TYPE_DECIMAL32:
238+
case NANOARROW_TYPE_DECIMAL64:
239+
case NANOARROW_TYPE_DECIMAL128:
240+
case NANOARROW_TYPE_DECIMAL256:
241+
as_decimal_array(x_sexp, array, schema_xptr, schema_view, error);
242+
return;
218243
case NANOARROW_TYPE_DOUBLE:
219244
case NANOARROW_TYPE_FLOAT:
220245
case NANOARROW_TYPE_HALF_FLOAT:
@@ -346,6 +371,58 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr
346371
}
347372
}
348373

374+
static void as_decimal_array(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr,
375+
struct ArrowSchemaView* schema_view,
376+
struct ArrowError* error) {
377+
// Use R to generate the input we need for ArrowDecimalSetDigits()
378+
SEXP x_digits_sexp =
379+
PROTECT(call_storage_integer_for_decimal(x_sexp, schema_view->decimal_scale));
380+
381+
struct ArrowDecimal item;
382+
ArrowDecimalInit(&item, schema_view->decimal_bitwidth, schema_view->decimal_precision,
383+
schema_view->decimal_scale);
384+
385+
int result = ArrowArrayInitFromType(array, schema_view->type);
386+
if (result != NANOARROW_OK) {
387+
Rf_error("ArrowArrayInitFromType() failed");
388+
}
389+
390+
result = ArrowArrayStartAppending(array);
391+
if (result != NANOARROW_OK) {
392+
Rf_error("ArrowArrayStartAppending() failed");
393+
}
394+
395+
int64_t len = Rf_xlength(x_sexp);
396+
result = ArrowArrayReserve(array, len);
397+
if (result != NANOARROW_OK) {
398+
Rf_error("ArrowArrayReserve() failed");
399+
}
400+
401+
struct ArrowStringView item_digits_view;
402+
for (int64_t i = 0; i < len; i++) {
403+
SEXP item_sexp = STRING_ELT(x_digits_sexp, i);
404+
if (item_sexp == NA_STRING) {
405+
result = ArrowArrayAppendNull(array, 1);
406+
} else {
407+
item_digits_view.data = CHAR(item_sexp);
408+
item_digits_view.size_bytes = Rf_length(item_sexp);
409+
ArrowDecimalSetDigits(&item, item_digits_view);
410+
result = ArrowArrayAppendDecimal(array, &item);
411+
}
412+
413+
if (result != NANOARROW_OK) {
414+
Rf_error("ArrowArrayAppendDecimal() failed");
415+
}
416+
}
417+
418+
UNPROTECT(1);
419+
420+
result = ArrowArrayFinishBuildingDefault(array, error);
421+
if (result != NANOARROW_OK) {
422+
Rf_error("ArrowArrayFinishBuildingDefault(): %s", error->message);
423+
}
424+
}
425+
349426
static void as_array_chr(SEXP x_sexp, struct ArrowArray* array, SEXP schema_xptr,
350427
struct ArrowSchemaView* schema_view, struct ArrowError* error) {
351428
switch (schema_view->type) {

r/src/convert_array.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
#include "altrep.h"
2525
#include "array.h"
26-
#include "array_view.h"
2726
#include "convert.h"
2827
#include "util.h"
2928

0 commit comments

Comments
 (0)