Skip to content

Commit 29bffa1

Browse files
committed
Add Arrow Table support
1 parent 3cd7cfd commit 29bffa1

18 files changed

+245
-1026
lines changed

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/GraalPythonModuleBuiltins.java

+18-12
Original file line numberDiff line numberDiff line change
@@ -91,17 +91,20 @@
9191
import com.oracle.graal.python.builtins.PythonBuiltinClassType;
9292
import com.oracle.graal.python.builtins.PythonBuiltins;
9393
import com.oracle.graal.python.builtins.modules.GraalPythonModuleBuiltinsFactory.DebugNodeFactory;
94+
import com.oracle.graal.python.builtins.modules.cext.PythonCextCapsuleBuiltins;
9495
import com.oracle.graal.python.builtins.objects.PNone;
9596
import com.oracle.graal.python.builtins.objects.array.PArray;
9697
import com.oracle.graal.python.builtins.objects.bytes.PBytes;
9798
import com.oracle.graal.python.builtins.objects.bytes.PBytesLike;
99+
import com.oracle.graal.python.builtins.objects.capsule.PyCapsule;
98100
import com.oracle.graal.python.builtins.objects.cext.PythonAbstractNativeObject;
99101
import com.oracle.graal.python.builtins.objects.cext.capi.CApiContext;
100102
import com.oracle.graal.python.builtins.objects.cext.capi.PySequenceArrayWrapper.ToNativeStorageNode;
101103
import com.oracle.graal.python.builtins.objects.cext.capi.PythonNativeWrapper;
102104
import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions;
103105
import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.PythonObjectReference;
104106
import com.oracle.graal.python.builtins.objects.cext.capi.transitions.GetNativeWrapperNode;
107+
import com.oracle.graal.python.builtins.objects.cext.common.CArrayWrappers;
105108
import com.oracle.graal.python.builtins.objects.cext.copying.NativeLibraryLocator;
106109
import com.oracle.graal.python.builtins.objects.code.CodeNodes;
107110
import com.oracle.graal.python.builtins.objects.code.PCode;
@@ -137,9 +140,6 @@
137140
import com.oracle.graal.python.nodes.PRaiseNode;
138141
import com.oracle.graal.python.nodes.arrow.ArrowArray;
139142
import com.oracle.graal.python.nodes.arrow.ArrowSchema;
140-
import com.oracle.graal.python.nodes.arrow.capsule.CreateArrowPyCapsuleNode;
141-
import com.oracle.graal.python.nodes.arrow.vector.VectorToArrowArrayNode;
142-
import com.oracle.graal.python.nodes.arrow.vector.VectorToArrowSchemaNode;
143143
import com.oracle.graal.python.nodes.builtins.FunctionNodes.GetCallTargetNode;
144144
import com.oracle.graal.python.nodes.bytecode.PBytecodeRootNode;
145145
import com.oracle.graal.python.nodes.call.CallNode;
@@ -1333,18 +1333,24 @@ Object invokeMember(String member, Object[] arguments) throws UnsupportedMessage
13331333
}
13341334
}
13351335

1336-
@Builtin(name = "export_arrow_vector", minNumOfPositionalArgs = 1)
1336+
@Builtin(name = "create_arrow_py_capsule", minNumOfPositionalArgs = 2)
13371337
@GenerateNodeFactory
1338-
public abstract static class ExportArrowVector extends PythonUnaryBuiltinNode {
1338+
public abstract static class CreateArrowPyCapsule extends PythonBinaryBuiltinNode {
1339+
13391340
@Specialization
1340-
static PTuple doExport(Object vector,
1341+
static PTuple doCreate(long arrowArrayAddr, long arrowSchemaAddr,
13411342
@Bind("this") Node inliningTarget,
1342-
@Cached VectorToArrowArrayNode exportArray,
1343-
@Cached VectorToArrowSchemaNode exportSchema,
1344-
@Cached CreateArrowPyCapsuleNode createArrowCapsuleNode) {
1345-
ArrowArray arrowArray = exportArray.execute(inliningTarget, vector);
1346-
ArrowSchema arrowSchema = exportSchema.execute(inliningTarget, vector);
1347-
return createArrowCapsuleNode.execute(inliningTarget, arrowArray, arrowSchema);
1343+
@Cached PythonCextCapsuleBuiltins.PyCapsuleNewNode pyCapsuleNewNode) {
1344+
var ctx = getContext(inliningTarget);
1345+
1346+
long arrayDestructor = ctx.arrowSupport.getArrowArrayDestructor();
1347+
var arrayCapsuleName = new CArrayWrappers.CByteArrayWrapper(ArrowArray.CAPSULE_NAME);
1348+
PyCapsule arrowArrayCapsule = pyCapsuleNewNode.execute(inliningTarget, arrowArrayAddr, arrayCapsuleName, arrayDestructor);
1349+
1350+
long schemaDestructor = ctx.arrowSupport.getArrowSchemaDestructor();
1351+
var schemaCapsuleName = new CArrayWrappers.CByteArrayWrapper(ArrowSchema.CAPSULE_NAME);
1352+
PyCapsule arrowSchemaCapsule = pyCapsuleNewNode.execute(inliningTarget, arrowSchemaAddr, schemaCapsuleName, schemaDestructor);
1353+
return PFactory.createTuple(ctx.getLanguage(inliningTarget), new Object[]{arrowSchemaCapsule, arrowArrayCapsule});
13481354
}
13491355
}
13501356
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/arrow/ArrowArray.java

+23-49
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -89,67 +89,41 @@ public class ArrowArray {
8989
private static final long RELEASE_CALLBACK_INDEX = 8 * POINTER_SIZE;
9090
private static final long PRIVATE_DATA_INDEX = 9 * POINTER_SIZE;
9191

92-
public final long memoryAddr;
92+
private final long memoryAddr;
9393

9494
private ArrowArray(long memoryAddr) {
9595
this.memoryAddr = memoryAddr;
9696
}
9797

98-
public static ArrowArray allocate() {
99-
var arrowArray = new ArrowArray(unsafe.allocateMemory(SIZE_OF));
100-
arrowArray.markReleased();
101-
return arrowArray;
98+
public static ArrowArray allocate(long length, long nullCount, long offset, long nBuffers, long nChildren, long buffers, long children, long dictionary, long release, long privateData) {
99+
var memoryAddr = unsafe.allocateMemory(SIZE_OF);
100+
unsafe.putLong(memoryAddr + LENGTH_INDEX, length);
101+
unsafe.putLong(memoryAddr + NULL_COUNT_INDEX, nullCount);
102+
unsafe.putLong(memoryAddr + OFFSET_INDEX, offset);
103+
unsafe.putLong(memoryAddr + N_BUFFERS_INDEX, nBuffers);
104+
unsafe.putLong(memoryAddr + N_CHILDREN_INDEX, nChildren);
105+
unsafe.putLong(memoryAddr + BUFFERS_INDEX, buffers);
106+
unsafe.putLong(memoryAddr + CHILDREN_INDEX, children);
107+
unsafe.putLong(memoryAddr + DICTIONARY_INDEX, dictionary);
108+
unsafe.putLong(memoryAddr + RELEASE_CALLBACK_INDEX, release);
109+
unsafe.putLong(memoryAddr + PRIVATE_DATA_INDEX, privateData);
110+
111+
return new ArrowArray(memoryAddr);
102112
}
103113

104-
public static ArrowArray allocateFromSnapshot(Snapshot snapshot) {
105-
var arrowArray = new ArrowArray(unsafe.allocateMemory(SIZE_OF));
106-
arrowArray.load(snapshot);
107-
return arrowArray;
114+
public long memoryAddress() {
115+
return memoryAddr;
108116
}
109117

110-
public static ArrowArray wrap(long arrowArrayPointer) {
111-
return new ArrowArray(arrowArrayPointer);
118+
public long releaseCallback() {
119+
return unsafe.getLong(memoryAddr + RELEASE_CALLBACK_INDEX);
112120
}
113121

114-
public void markReleased() {
115-
unsafe.putLong(memoryAddr + RELEASE_CALLBACK_INDEX, NULL);
122+
public static ArrowArray wrap(long arrowArrayPointer) {
123+
return new ArrowArray(arrowArrayPointer);
116124
}
117125

118126
public boolean isReleased() {
119-
return unsafe.getLong(memoryAddr + RELEASE_CALLBACK_INDEX) == NULL;
120-
}
121-
122-
public long getBuffers() {
123-
return unsafe.getLong(memoryAddr + BUFFERS_INDEX);
124-
}
125-
126-
public long getValueBuffer() {
127-
return unsafe.getLong(getBuffers() + POINTER_SIZE);
128-
}
129-
130-
private void load(Snapshot snapshot) {
131-
unsafe.putLong(memoryAddr + LENGTH_INDEX, snapshot.length);
132-
unsafe.putLong(memoryAddr + NULL_COUNT_INDEX, snapshot.null_count);
133-
unsafe.putLong(memoryAddr + OFFSET_INDEX, snapshot.offset);
134-
unsafe.putLong(memoryAddr + N_BUFFERS_INDEX, snapshot.n_buffers);
135-
unsafe.putLong(memoryAddr + N_CHILDREN_INDEX, snapshot.n_children);
136-
unsafe.putLong(memoryAddr + BUFFERS_INDEX, snapshot.buffers);
137-
unsafe.putLong(memoryAddr + CHILDREN_INDEX, snapshot.children);
138-
unsafe.putLong(memoryAddr + DICTIONARY_INDEX, snapshot.dictionary);
139-
unsafe.putLong(memoryAddr + RELEASE_CALLBACK_INDEX, snapshot.release);
140-
unsafe.putLong(memoryAddr + PRIVATE_DATA_INDEX, snapshot.private_data);
141-
}
142-
143-
public static class Snapshot {
144-
public long length = 0L;
145-
public long null_count = 0L;
146-
public long offset = 0L;
147-
public long n_buffers = 0L;
148-
public long n_children = 0L;
149-
public long buffers = 0L;
150-
public long children = 0L;
151-
public long dictionary = 0L;
152-
public long release = 0L;
153-
public long private_data = 0L;
127+
return releaseCallback() == NULL;
154128
}
155129
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/arrow/ArrowSchema.java

+20-45
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -86,64 +86,39 @@ public class ArrowSchema {
8686
private static final long RELEASE_CALLBACK_INDEX = 7 * POINTER_SIZE;
8787
private static final long PRIVATE_DATA_INDEX = 8 * POINTER_SIZE;
8888

89-
public final long memoryAddr;
89+
private final long memoryAddr;
9090

9191
private ArrowSchema(long memoryAddr) {
9292
this.memoryAddr = memoryAddr;
9393
}
9494

95-
public static ArrowSchema allocate() {
96-
var arrowSchema = new ArrowSchema(unsafe.allocateMemory(SIZE_OF));
97-
arrowSchema.markRelease();
98-
return arrowSchema;
99-
}
100-
101-
public static ArrowSchema allocateFromSnapshot(Snapshot snapshot) {
102-
var arrowSchema = new ArrowSchema(unsafe.allocateMemory(SIZE_OF));
103-
arrowSchema.load(snapshot);
104-
return arrowSchema;
95+
public static ArrowSchema allocate(long format, long name, long metadata, long flags, long nChildren, long children, long dictionary, long release, long privateData) {
96+
var memoryAddr = unsafe.allocateMemory(SIZE_OF);
97+
unsafe.putLong(memoryAddr + FORMAT_INDEX, format);
98+
unsafe.putLong(memoryAddr + NAME_INDEX, name);
99+
unsafe.putLong(memoryAddr + METADATA_INDEX, metadata);
100+
unsafe.putLong(memoryAddr + FLAGS_INDEX, flags);
101+
unsafe.putLong(memoryAddr + N_CHILDREN_INDEX, nChildren);
102+
unsafe.putLong(memoryAddr + CHILDREN_INDEX, children);
103+
unsafe.putLong(memoryAddr + DICTIONARY_INDEX, dictionary);
104+
unsafe.putLong(memoryAddr + RELEASE_CALLBACK_INDEX, release);
105+
unsafe.putLong(memoryAddr + PRIVATE_DATA_INDEX, privateData);
106+
return new ArrowSchema(memoryAddr);
105107
}
106108

107109
public static ArrowSchema wrap(long arrowSchemaPointer) {
108110
return new ArrowSchema(arrowSchemaPointer);
109111
}
110112

111-
public long getFormat() {
112-
return unsafe.getLong(memoryAddr);
113-
}
114-
115-
public boolean isReleased() {
116-
return unsafe.getLong(memoryAddr + RELEASE_CALLBACK_INDEX) == NULL;
113+
public long memoryAddress() {
114+
return memoryAddr;
117115
}
118116

119-
public void markRelease() {
120-
unsafe.putLong(memoryAddr + RELEASE_CALLBACK_INDEX, NULL);
117+
public long releaseCallback() {
118+
return unsafe.getLong(memoryAddr + RELEASE_CALLBACK_INDEX);
121119
}
122120

123-
public void load(Snapshot snapshot) {
124-
unsafe.putLong(memoryAddr + FORMAT_INDEX, snapshot.format);
125-
unsafe.putLong(memoryAddr + NAME_INDEX, snapshot.name);
126-
unsafe.putLong(memoryAddr + METADATA_INDEX, snapshot.metadata);
127-
unsafe.putLong(memoryAddr + FLAGS_INDEX, snapshot.flags);
128-
unsafe.putLong(memoryAddr + N_CHILDREN_INDEX, snapshot.n_children);
129-
unsafe.putLong(memoryAddr + CHILDREN_INDEX, snapshot.children);
130-
unsafe.putLong(memoryAddr + DICTIONARY_INDEX, snapshot.dictionary);
131-
unsafe.putLong(memoryAddr + RELEASE_CALLBACK_INDEX, snapshot.release);
132-
unsafe.putLong(memoryAddr + PRIVATE_DATA_INDEX, snapshot.private_data);
133-
}
134-
135-
public static class Snapshot {
136-
137-
public long format = 0L;
138-
public long name = 0L;
139-
public long metadata = 0L;
140-
public long flags = 0L;
141-
public long n_children = 0L;
142-
public long children = 0L;
143-
public long dictionary = 0L;
144-
public long release = 0L;
145-
public long private_data = 0L;
146-
121+
public boolean isReleased() {
122+
return releaseCallback() == NULL;
147123
}
148-
149124
}
+41-26
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, 2025, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -38,48 +38,63 @@
3838
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3939
* SOFTWARE.
4040
*/
41-
package com.oracle.graal.python.nodes.arrow.vector;
41+
package com.oracle.graal.python.nodes.arrow;
4242

43+
import com.oracle.graal.python.builtins.objects.cext.common.NativePointer;
4344
import com.oracle.graal.python.nodes.PNodeWithContext;
44-
import com.oracle.graal.python.nodes.arrow.ArrowSchema;
45-
import com.oracle.graal.python.runtime.arrow.ArrowVectorSupport;
4645
import com.oracle.graal.python.runtime.PythonContext;
46+
import com.oracle.graal.python.runtime.arrow.ArrowUtil;
4747
import com.oracle.truffle.api.CompilerDirectives;
4848
import com.oracle.truffle.api.dsl.Bind;
4949
import com.oracle.truffle.api.dsl.Cached;
50-
import com.oracle.truffle.api.dsl.Fallback;
5150
import com.oracle.truffle.api.dsl.GenerateCached;
5251
import com.oracle.truffle.api.dsl.GenerateInline;
52+
import com.oracle.truffle.api.dsl.GenerateUncached;
5353
import com.oracle.truffle.api.dsl.Specialization;
54+
import com.oracle.truffle.api.library.CachedLibrary;
5455
import com.oracle.truffle.api.nodes.Node;
56+
import com.oracle.truffle.nfi.api.SignatureLibrary;
5557

56-
import static com.oracle.graal.python.nodes.arrow.ArrowArray.NULL;
58+
@GenerateCached
59+
@GenerateInline(inlineByDefault = true)
60+
@GenerateUncached
61+
public abstract class InvokeArrowReleaseCallbackNode extends PNodeWithContext {
5762

58-
@GenerateCached(false)
59-
@GenerateInline
60-
public abstract class VectorToArrowSchemaNode extends PNodeWithContext {
63+
public abstract void execute(Node inliningTarget, long releaseCallback, long baseStructure);
6164

62-
public abstract ArrowSchema execute(Node inliningTarget, Object vector);
65+
public final void executeCached(long releaseCallback, long baseStructure) {
66+
execute(this, releaseCallback, baseStructure);
67+
}
6368

64-
@Specialization(guards = "arrowVectorSupport.isFixedWidthVector(vector)")
65-
static ArrowSchema doIntVector(Node inliningTarget, Object vector,
69+
@Specialization
70+
static void doIt(Node inliningTarget, long releaseCallback, long baseStructure,
6671
@Bind("getContext(inliningTarget)") PythonContext ctx,
67-
@Bind("ctx.arrowVectorSupport") ArrowVectorSupport arrowVectorSupport,
68-
@Cached GetFormatFromVectorNode formatNode) {
69-
Object hostVector = ctx.getEnv().asHostObject(vector);
70-
var unsafe = ctx.getUnsafe();
71-
var snapshot = new ArrowSchema.Snapshot();
72-
// + 1 NULL terminator
73-
snapshot.format = unsafe.allocateMemory(2);
74-
unsafe.putByte(snapshot.format, formatNode.execute(inliningTarget, hostVector));
75-
unsafe.putByte(snapshot.format + 1, NULL);
76-
snapshot.release = ctx.arrowSupport.getArrowSchemaReleaseCallback();
72+
@Cached(value = "createReleaseCallbackSignature(ctx)", allowUncached = true) Object callbackSignature,
73+
@CachedLibrary(limit = "1") SignatureLibrary signatureLibrary) {
74+
try {
75+
signatureLibrary.call(callbackSignature, new NativePointer(releaseCallback), baseStructure);
76+
} catch (Exception e) {
77+
throw CompilerDirectives.shouldNotReachHere("Unable to call release callback. Error:", e);
78+
}
79+
}
7780

78-
return ArrowSchema.allocateFromSnapshot(snapshot);
81+
static Object createReleaseCallbackSignature(PythonContext context) {
82+
return ArrowUtil.createNfiSignature("(UINT64):VOID", context);
7983
}
8084

81-
@Fallback
82-
static ArrowSchema doError(Object object) {
83-
throw CompilerDirectives.shouldNotReachHere();
85+
@GenerateCached(false)
86+
@GenerateInline
87+
@GenerateUncached
88+
public abstract static class Lazy extends Node {
89+
public final InvokeArrowReleaseCallbackNode get(Node inliningTarget) {
90+
return execute(inliningTarget);
91+
}
92+
93+
abstract InvokeArrowReleaseCallbackNode execute(Node inliningTarget);
94+
95+
@Specialization
96+
static InvokeArrowReleaseCallbackNode doIt(@Cached(inline = false) InvokeArrowReleaseCallbackNode node) {
97+
return node;
98+
}
8499
}
85100
}

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/nodes/arrow/capsule/ArrowArrayCapsuleDestructor.java

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* The Universal Permissive License (UPL), Version 1.0
@@ -45,7 +45,7 @@
4545
import com.oracle.graal.python.builtins.objects.cext.capi.transitions.CApiTransitions.NativeToPythonNode;
4646
import com.oracle.graal.python.builtins.objects.cext.common.CArrayWrappers;
4747
import com.oracle.graal.python.nodes.arrow.ArrowArray;
48-
import com.oracle.graal.python.nodes.arrow.release_callback.ArrowArrayReleaseCallbackNode;
48+
import com.oracle.graal.python.nodes.arrow.InvokeArrowReleaseCallbackNode;
4949
import com.oracle.graal.python.runtime.PythonContext;
5050
import com.oracle.truffle.api.CompilerDirectives;
5151
import com.oracle.truffle.api.dsl.Bind;
@@ -72,15 +72,14 @@ static class Execute {
7272

7373
@Specialization(guards = "isPointer(args, interopLib)")
7474
static Object doRelease(ArrowArrayCapsuleDestructor self, Object[] args,
75-
@CachedLibrary(limit = "1") InteropLibrary interopLib,
7675
@Bind("$node") Node inliningTarget,
76+
@CachedLibrary(limit = "1") InteropLibrary interopLib,
7777
@Cached NativeToPythonNode nativeToPythonNode,
7878
@Cached PyCapsuleGetPointerNode capsuleGetPointerNode,
79-
@Cached ArrowArrayReleaseCallbackNode arrayReleaseNode) {
79+
@Cached InvokeArrowReleaseCallbackNode.Lazy invokeReleaseCallbackNode) {
8080
Object capsule = nativeToPythonNode.execute(args[0]);
8181
var capsuleName = new CArrayWrappers.CByteArrayWrapper(ArrowArray.CAPSULE_NAME);
82-
var arrowArrayPointer = (long) capsuleGetPointerNode.execute(inliningTarget, capsule, capsuleName);
83-
var arrowArray = ArrowArray.wrap(arrowArrayPointer);
82+
var arrowArray = ArrowArray.wrap((long) capsuleGetPointerNode.execute(inliningTarget, capsule, capsuleName));
8483
/*
8584
* The exported PyCapsules should have a destructor that calls the release callback of
8685
* the Arrow struct, if it is not already null. This prevents a memory leak in case the
@@ -91,9 +90,9 @@ static Object doRelease(ArrowArrayCapsuleDestructor self, Object[] args,
9190
* semantics
9291
*/
9392
if (!arrowArray.isReleased()) {
94-
arrayReleaseNode.execute(inliningTarget, arrowArray);
93+
invokeReleaseCallbackNode.get(inliningTarget).executeCached(arrowArray.releaseCallback(), arrowArray.memoryAddress());
9594
}
96-
PythonContext.get(inliningTarget).getUnsafe().freeMemory(arrowArrayPointer);
95+
PythonContext.get(inliningTarget).getUnsafe().freeMemory(arrowArray.memoryAddress());
9796
return PNone.NO_VALUE;
9897
}
9998

0 commit comments

Comments
 (0)