Skip to content

Commit 100ded6

Browse files
authored
feat(r): Add zstd decompression support to R package (#733)
This PR adds support for ZSTD decompression when reading IPC streams. This works on R >= 4.0 although doesn't support ZSTD on 32-bit Windows or Linux where a user didn't install libzstd before installing the package. Another approach would be to vendor if the system lib isn't found, but I'll leave that as a battle for another day (if it ever comes up that somebody requests that support). ``` r library(nanoarrow) url <- "https://github.com/geoarrow/geoarrow-data/releases/download/v0.2.0-rc6/ns-water_water-point_wkb.arrows" read_nanoarrow(url) |> tibble::as_tibble() #> # A tibble: 44,690 × 8 #> OBJECTID FEAT_CODE ZVALUE PT_CLASS NAMEID_1 NAME_1 HID geometry #> <dbl> <chr> <dbl> <int> <chr> <chr> <chr> <blob> #> 1 1055 WARK60 -0.5 4 <NA> <NA> 252C345D59374D… <raw 29 B> #> 2 1023 WARK60 0.6 4 <NA> <NA> 1DAB1D800FB84E… <raw 29 B> #> 3 1021 WARK60 0.5 4 <NA> <NA> 838438F1BBE745… <raw 29 B> #> 4 985 WARK60 0 4 <NA> <NA> 0A4BE2AB03D845… <raw 29 B> #> 5 994 WARK60 1.9 4 <NA> <NA> 6ACD71128B6B49… <raw 29 B> #> 6 995 WARK60 1.4 4 <NA> <NA> B10B26FA32FB44… <raw 29 B> #> 7 997 WARK60 1.1 4 <NA> <NA> 28E47E22D71549… <raw 29 B> #> 8 993 WARK60 1.9 4 <NA> <NA> FC9A29123BEF4A… <raw 29 B> #> 9 1003 WARK60 0.7 4 <NA> <NA> 3C7CA3CD0E8840… <raw 29 B> #> 10 1001 WARK60 0.7 4 <NA> <NA> A6F508B066DC4A… <raw 29 B> #> # ℹ 44,680 more rows ``` <sup>Created on 2025-03-24 with [reprex v2.1.1](https://reprex.tidyverse.org)</sup>
1 parent 945fa61 commit 100ded6

19 files changed

+250
-38
lines changed

.github/workflows/r-check.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ jobs:
4040
matrix:
4141
config:
4242
- {os: macOS-latest, r: 'release'}
43-
- {os: windows-latest, r: '4.0'}
43+
- {os: windows-latest, r: '4.1'}
44+
- {os: windows-latest, r: '4.2'}
4445
- {os: windows-latest, r: 'release'}
4546
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
4647
- {os: ubuntu-latest, r: 'release'}
@@ -83,3 +84,8 @@ jobs:
8384
with:
8485
upload-snapshots: true
8586
working-directory: r
87+
88+
- name: Show install output
89+
if: always()
90+
run: find r/check -name '00install.out*' -exec cat '{}' \; || true
91+
shell: bash

r/.Rbuildignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@
1212
^bootstrap\.R$
1313
^\.cache$
1414
^compile_commands\.json$
15+
^src/Makevars$
1516
^configure\.win$
16-
^configure$

r/DESCRIPTION

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,6 @@ Suggests:
3434
tibble,
3535
vctrs,
3636
withr
37+
SystemRequirements: libzstd (optional)
3738
Config/testthat/edition: 3
3839
Config/build/bootstrap: TRUE

r/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ export(nanoarrow_schema_modify)
215215
export(nanoarrow_schema_parse)
216216
export(nanoarrow_vctr)
217217
export(nanoarrow_version)
218+
export(nanoarrow_with_zstd)
218219
export(read_nanoarrow)
219220
export(register_nanoarrow_extension)
220221
export(resolve_nanoarrow_extension)

r/R/ipc.R

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,20 @@ write_nanoarrow.character <- function(data, x, ...) {
145145
write_nanoarrow(data, con)
146146
}
147147

148-
#' @rdname read_nanoarrow
148+
#' Example Arrow IPC Data
149+
#'
150+
#' An example stream that can be used for testing or examples.
151+
#'
152+
#' @param compression One of "none" or "zstd"
153+
#'
154+
#' @return A raw vector that can be passed to [read_nanoarrow()]
149155
#' @export
150-
example_ipc_stream <- function() {
156+
#'
157+
#' @examples
158+
#' as.data.frame(read_nanoarrow(example_ipc_stream()))
159+
example_ipc_stream <- function(compression = c("none", "zstd")) {
160+
compression <- match.arg(compression)
161+
151162
# data.frame(some_col = c(1L, 2L, 3L)) as a serialized schema/batch
152163
schema <- as.raw(c(
153164
0xff, 0xff, 0xff, 0xff, 0x10, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -172,20 +183,40 @@ example_ipc_stream <- function() {
172183
0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
173184
))
174185

175-
batch <- as.raw(c(
176-
0xff, 0xff, 0xff, 0xff, 0x88, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00,
177-
0x00, 0x00, 0x0c, 0x00, 0x16, 0x00, 0x06, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0c, 0x00,
178-
0x0c, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x10, 0x00,
179-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x18, 0x00, 0x0c, 0x00,
180-
0x04, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x10, 0x00,
181-
0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
182-
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
183-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
184-
0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
185-
0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
186-
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00,
187-
0x00, 0x00, 0x00, 0x00, 0x00, 0x00
188-
))
186+
if (identical(compression, "zstd")) {
187+
batch <- as.raw(c(
188+
0xff, 0xff, 0xff, 0xff, 0xa0, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00,
189+
0x00, 0x00, 0x0c, 0x00, 0x18, 0x00, 0x06, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0c, 0x00,
190+
0x0c, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x20, 0x00,
191+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x1e, 0x00,
192+
0x10, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x50, 0x00,
193+
0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
194+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00,
195+
0x07, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00,
196+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
197+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
198+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00,
199+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
200+
0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xb5, 0x2f, 0xfd, 0x20, 0x0c,
201+
0x61, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
202+
0x00, 0x00, 0x00, 0x00
203+
))
204+
} else {
205+
batch <- as.raw(c(
206+
0xff, 0xff, 0xff, 0xff, 0x88, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00,
207+
0x00, 0x00, 0x0c, 0x00, 0x16, 0x00, 0x06, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0c, 0x00,
208+
0x0c, 0x00, 0x00, 0x00, 0x00, 0x03, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x10, 0x00,
209+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x18, 0x00, 0x0c, 0x00,
210+
0x04, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x10, 0x00,
211+
0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
212+
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
213+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
214+
0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
215+
0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
216+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00,
217+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00
218+
))
219+
}
189220

190221
c(schema, batch)
191222
}

r/R/nanoarrow-package.R

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,17 @@ NULL
3535
#'
3636
#' @examples
3737
#' nanoarrow_version()
38-
#'
38+
#' nanoarrow_with_zstd()
3939
nanoarrow_version <- function(runtime = TRUE) {
4040
if (runtime) {
4141
.Call(nanoarrow_c_version_runtime)
4242
} else {
4343
.Call(nanoarrow_c_version)
4444
}
4545
}
46+
47+
#' @rdname nanoarrow_version
48+
#' @export
49+
nanoarrow_with_zstd <- function() {
50+
.Call(nanoarrow_c_with_zstd)
51+
}

r/configure

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,37 @@ fi
2323

2424
if [ -f "src/nanoarrow.h" ] && [ -f "src/nanoarrow.c" ]; then
2525
echo "Found vendored nanoarrow"
26-
exit 0
26+
else
27+
echo "Vendored src/nanoarrow.h and/or src/nanoarrow.c are missing"
28+
echo "This source tarball was built incorrectly."
29+
exit 1
2730
fi
2831

29-
echo "Vendored src/nanoarrow.h and/or src/nanoarrow.c are missing"
30-
echo "This source tarball was built incorrectly."
31-
exit 1
32+
# Check to see if a zstd test file compiles and links without any help from pkg-config
33+
ZSTD_FOUND=""
34+
PKG_CPPFLAGS="$PKG_CPPFLAGS" PKG_LIBS="$PKG_LIBS -lzstd" \
35+
$R_HOME/bin/R CMD SHLIB tools/test_zstd.c -o test_zstd >test_zstd.log 2>&1
36+
if [ $? -eq 0 ]; then
37+
echo "tools/test_zstd.c compiled without error"
38+
PKG_CPPFLAGS="$PKG_CPPFLAGS -DNANOARROW_IPC_WITH_ZSTD"
39+
PKG_LIBS="$PKG_LIBS -lzstd"
40+
ZSTD_FOUND="yes"
41+
fi
42+
43+
# Add pkg-config for libzstd if possible
44+
if [ -z "$ZSTD_FOUND" ] && pkg-config libzstd --exists >/dev/null 2>&1; then
45+
PKG_CPPFLAGS="`pkg-config libzstd --cflags` -DNANOARROW_IPC_WITH_ZSTD $PKG_CPPFLAGS"
46+
PKG_LIBS="`pkg-config libzstd --libs` $PKG_LIBS"
47+
echo "Using pkg-config libzstd"
48+
ZSTD_FOUND="yes"
49+
fi
50+
51+
rm -f tools/test_zstd.o test_zstd test_zstd.log || true
52+
53+
echo "Using PKG_CPPFLAGS=$PKG_CPPFLAGS"
54+
echo "Using PKG_LIBS=$PKG_LIBS"
55+
56+
sed \
57+
-e "s|@cppflags@|$PKG_CPPFLAGS|" \
58+
-e "s|@libs@|$PKG_LIBS|" \
59+
src/Makevars.in > src/Makevars

r/man/example_ipc_stream.Rd

Lines changed: 20 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/man/nanoarrow_version.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/man/read_nanoarrow.Rd

Lines changed: 0 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/src/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ nanoarrow.h
2323
nanoarrow_ipc.h
2424
nanoarrow_ipc.c
2525
flatcc*
26+
Makevars

r/src/Makevars renamed to r/src/Makevars.in

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,6 @@
1919
# that do not provide aligned_alloc. Allocating flatbuffers memory is not
2020
# performance-critical for what we do in the nanoarrow R package (and may not
2121
# occur at all until IPC write support is added)
22-
PKG_CPPFLAGS=-I../inst/include -I../src -DFLATCC_USE_GENERIC_ALIGNED_ALLOC
22+
23+
PKG_CPPFLAGS=-I../inst/include -I../src -DFLATCC_USE_GENERIC_ALIGNED_ALLOC @cppflags@
24+
PKG_LIBS=@libs@

r/src/Makevars.ucrt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# This Makevars handles R >= 4.2 on Windows (pkg-config is available on all such versions)
19+
20+
ifeq (,$(shell pkg-config libzstd --libs 2>/dev/null))
21+
ZSTD_CFLAGS =
22+
ZSTD_LIB_FLAGS =
23+
else
24+
ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd) -DNANOARROW_IPC_WITH_ZSTD
25+
ZSTD_LIB_FLAGS = $(shell pkg-config --libs libzstd)
26+
endif
27+
28+
PKG_CPPFLAGS = -I../inst/include -I../src -DFLATCC_USE_GENERIC_ALIGNED_ALLOC $(ZSTD_CFLAGS)
29+
PKG_LIBS = $(ZSTD_LIB_FLAGS)

r/src/Makevars.win

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# This Makevars handles R 4.0 and 4.1 on Windows. These packages are no longer built
19+
# by CRAN, but on GitHub Actions pkg-config seems to be able to resolve and link a
20+
# zstd that works on 64-bit (but not 32-bit).
21+
22+
ifeq (,$(shell pkg-config libzstd --libs 2>/dev/null))
23+
ZSTD_CFLAGS =
24+
ZSTD_LIB_FLAGS =
25+
else
26+
ifeq "$(WIN)" "64"
27+
ZSTD_CFLAGS = $(shell pkg-config --cflags libzstd) -DNANOARROW_IPC_WITH_ZSTD
28+
ZSTD_LIB_FLAGS = $(shell pkg-config --libs libzstd)
29+
else
30+
ZSTD_CFLAGS =
31+
ZSTD_LIB_FLAGS =
32+
endif
33+
endif
34+
35+
PKG_CPPFLAGS = -I../inst/include -I../src -DFLATCC_USE_GENERIC_ALIGNED_ALLOC $(ZSTD_CFLAGS)
36+
PKG_LIBS = $(ZSTD_LIB_FLAGS)

r/src/as_array.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -406,12 +406,14 @@ static void as_decimal_array(SEXP x_sexp, struct ArrowArray* array, SEXP schema_
406406
} else {
407407
item_digits_view.data = CHAR(item_sexp);
408408
item_digits_view.size_bytes = Rf_length(item_sexp);
409-
ArrowDecimalSetDigits(&item, item_digits_view);
410-
result = ArrowArrayAppendDecimal(array, &item);
409+
result = ArrowDecimalSetDigits(&item, item_digits_view);
410+
if (result == NANOARROW_OK) {
411+
result = ArrowArrayAppendDecimal(array, &item);
412+
}
411413
}
412414

413415
if (result != NANOARROW_OK) {
414-
Rf_error("ArrowArrayAppendDecimal() failed");
416+
Rf_error("ArrowArrayAppendDecimal() or ArrowDecimalSetDigits() failed");
415417
}
416418
}
417419

r/src/init.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ extern SEXP nanoarrow_c_vctr_chunk_resolve(SEXP indices_sexp, SEXP offsets_sexp)
9898
extern SEXP nanoarrow_c_vctr_as_slice(SEXP indices_sexp);
9999
extern SEXP nanoarrow_c_version(void);
100100
extern SEXP nanoarrow_c_version_runtime(void);
101+
extern SEXP nanoarrow_c_with_zstd(void);
101102

102103
static const R_CallMethodDef CallEntries[] = {
103104
{"nanoarrow_c_make_altrep_chr", (DL_FUNC)&nanoarrow_c_make_altrep_chr, 1},
@@ -178,6 +179,7 @@ static const R_CallMethodDef CallEntries[] = {
178179
{"nanoarrow_c_vctr_as_slice", (DL_FUNC)&nanoarrow_c_vctr_as_slice, 1},
179180
{"nanoarrow_c_version", (DL_FUNC)&nanoarrow_c_version, 0},
180181
{"nanoarrow_c_version_runtime", (DL_FUNC)&nanoarrow_c_version_runtime, 0},
182+
{"nanoarrow_c_with_zstd", (DL_FUNC)&nanoarrow_c_with_zstd, 0},
181183
{NULL, NULL, 0}};
182184
/* end generated by tools/make-callentries.R */
183185

r/src/version.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,11 @@
2424
SEXP nanoarrow_c_version(void) { return Rf_mkString(NANOARROW_VERSION); }
2525

2626
SEXP nanoarrow_c_version_runtime(void) { return Rf_mkString(ArrowNanoarrowVersion()); }
27+
28+
SEXP nanoarrow_c_with_zstd(void) {
29+
#if defined(NANOARROW_IPC_WITH_ZSTD)
30+
return Rf_ScalarLogical(1);
31+
#else
32+
return Rf_ScalarLogical(0);
33+
#endif
34+
}

0 commit comments

Comments
 (0)