Skip to content

Commit a712b05

Browse files
committed
add record batch import/export
1 parent 28bb49c commit a712b05

File tree

8 files changed

+189
-2
lines changed

8 files changed

+189
-2
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
// Licensed to the Apache Software Foundation (ASF) under one
19+
// or more contributor license agreements. See the NOTICE file
20+
// distributed with this work for additional information
21+
// regarding copyright ownership. The ASF licenses this file
22+
// to you under the Apache License, Version 2.0 (the
23+
// "License"); you may not use this file except in compliance
24+
// with the License. You may obtain a copy of the License at
25+
//
26+
// http://www.apache.org/licenses/LICENSE-2.0
27+
//
28+
// Unless required by applicable law or agreed to in writing,
29+
// software distributed under the License is distributed on an
30+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
31+
// KIND, either express or implied. See the License for the
32+
// specific language governing permissions and limitations
33+
// under the License.
34+
35+
#include "arrow/record_batch.h"
36+
#include "arrow/c/bridge.h"
37+
38+
#include "arrow/matlab/c/proxy/record_batch_importer.h"
39+
#include "arrow/matlab/tabular/proxy/record_batch.h"
40+
#include "arrow/matlab/error/error.h"
41+
42+
#include "libmexclass/proxy/ProxyManager.h"
43+
44+
namespace arrow::matlab::c::proxy {
45+
46+
RecordBatchImporter::RecordBatchImporter() {
47+
// Register Proxy methods.
48+
REGISTER_METHOD(RecordBatchImporter, importFromC);
49+
}
50+
51+
libmexclass::proxy::MakeResult RecordBatchImporter::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) {
52+
return std::make_shared<RecordBatchImporter>();
53+
}
54+
55+
void RecordBatchImporter::importFromC(libmexclass::proxy::method::Context& context) {
56+
namespace mda = ::matlab::data;
57+
using namespace libmexclass::proxy;
58+
using RecordBatchProxy = arrow::matlab::tabular::proxy::RecordBatch;
59+
60+
mda::StructArray args = context.inputs[0];
61+
const mda::TypedArray<uint64_t> array_address_mda = args[0]["ArrowArrayAddress"];
62+
const mda::TypedArray<uint64_t> schema_address_mda = args[0]["ArrowSchemaAddress"];
63+
64+
const auto array_address = uint64_t(array_address_mda[0]);
65+
const auto schema_address = uint64_t(schema_address_mda[0]);
66+
67+
struct ArrowArray* arrow_array = reinterpret_cast<struct ArrowArray*>(array_address);
68+
struct ArrowSchema* arrow_schema = reinterpret_cast<struct ArrowSchema*>(schema_address);
69+
70+
MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(std::shared_ptr<arrow::RecordBatch> record_batch,
71+
arrow::ImportRecordBatch(arrow_array, arrow_schema),
72+
context, "arrow:c:ImportFailed");
73+
74+
auto record_batch_proxy = std::make_shared<RecordBatchProxy>(record_batch);
75+
76+
mda::ArrayFactory factory;
77+
const auto record_batch_proxy_id = ProxyManager::manageProxy(record_batch_proxy);
78+
const auto record_batch_proxy_id_mda = factory.createScalar(record_batch_proxy_id);
79+
context.outputs[0] = record_batch_proxy_id_mda;
80+
}
81+
82+
} // namespace arrow::matlab::c::proxy
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#pragma once
19+
20+
#include "libmexclass/proxy/Proxy.h"
21+
22+
namespace arrow::matlab::c::proxy {
23+
24+
class RecordBatchImporter : public libmexclass::proxy::Proxy {
25+
public:
26+
RecordBatchImporter();
27+
28+
~RecordBatchImporter() = default;
29+
30+
static libmexclass::proxy::MakeResult make(
31+
const libmexclass::proxy::FunctionArguments& constructor_arguments);
32+
33+
protected:
34+
void importFromC(libmexclass::proxy::method::Context& context);
35+
};
36+
37+
} // namespace arrow::matlab::c::proxy

matlab/src/cpp/arrow/matlab/proxy/factory.cc

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "arrow/matlab/buffer/proxy/buffer.h"
2828
#include "arrow/matlab/c/proxy/array_c_struct.h"
2929
#include "arrow/matlab/c/proxy/array_importer.h"
30+
#include "arrow/matlab/c/proxy/record_batch_importer.h"
3031
#include "arrow/matlab/c/proxy/schema_c_struct.h"
3132
#include "arrow/matlab/error/error.h"
3233
#include "arrow/matlab/io/csv/proxy/table_reader.h"
@@ -106,6 +107,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy(
106107
REGISTER_PROXY(arrow.c.proxy.ArrayCStruct , arrow::matlab::c::proxy::ArrayCStruct);
107108
REGISTER_PROXY(arrow.c.proxy.SchemaCStruct , arrow::matlab::c::proxy::SchemaCStruct);
108109
REGISTER_PROXY(arrow.c.proxy.ArrayImporter , arrow::matlab::c::proxy::ArrayImporter);
110+
REGISTER_PROXY(arrow.c.proxy.RecordBatchImporter, arrow::matlab::c::proxy::RecordBatchImporter);
109111
// clang-format on
110112
return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID,
111113
"Did not find matching C++ proxy for " + class_name};

matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc

+15
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
#include "libmexclass/proxy/ProxyManager.h"
1919

20+
#include "arrow/c/bridge.h"
21+
2022
#include "arrow/matlab/array/proxy/array.h"
2123
#include "arrow/matlab/array/proxy/wrap.h"
2224

@@ -66,6 +68,7 @@ RecordBatch::RecordBatch(std::shared_ptr<arrow::RecordBatch> record_batch)
6668
REGISTER_METHOD(RecordBatch, getColumnByName);
6769
REGISTER_METHOD(RecordBatch, getSchema);
6870
REGISTER_METHOD(RecordBatch, getRowAsString);
71+
REGISTER_METHOD(RecordBatch, exportToC);
6972
}
7073

7174
std::shared_ptr<arrow::RecordBatch> RecordBatch::unwrap() { return record_batch; }
@@ -259,4 +262,16 @@ void RecordBatch::getRowAsString(libmexclass::proxy::method::Context& context) {
259262
context.outputs[0] = factory.createScalar(row_str_utf16);
260263
}
261264

265+
void RecordBatch::exportToC(libmexclass::proxy::method::Context& context) {
266+
namespace mda = ::matlab::data;
267+
mda::StructArray opts = context.inputs[0];
268+
const mda::TypedArray<uint64_t> array_address_mda = opts[0]["ArrowArrayAddress"];
269+
const mda::TypedArray<uint64_t> schema_address_mda = opts[0]["ArrowSchemaAddress"];
270+
271+
struct ArrowArray* arrow_array = reinterpret_cast<struct ArrowArray*>(uint64_t(array_address_mda[0]));
272+
struct ArrowSchema* arrow_schema = reinterpret_cast<struct ArrowSchema*>(uint64_t(schema_address_mda[0]));
273+
274+
MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT(arrow::ExportRecordBatch(*record_batch, arrow_array, arrow_schema), context, "arrow:c:export:Failed");
275+
}
276+
262277
} // namespace arrow::matlab::tabular::proxy

matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ class RecordBatch : public libmexclass::proxy::Proxy {
4343
void getColumnByName(libmexclass::proxy::method::Context& context);
4444
void getSchema(libmexclass::proxy::method::Context& context);
4545
void getRowAsString(libmexclass::proxy::method::Context& context);
46-
46+
void exportToC(libmexclass::proxy::method::Context& context);
4747
std::shared_ptr<arrow::RecordBatch> record_batch;
4848
};
4949

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
%RECORDBATCHIMPORTER Creates arrow record batches using the C Data Interface Format.
2+
3+
% Licensed to the Apache Software Foundation (ASF) under one or more
4+
% contributor license agreements. See the NOTICE file distributed with
5+
% this work for additional information regarding copyright ownership.
6+
% The ASF licenses this file to you under the Apache License, Version
7+
% 2.0 (the "License"); you may not use this file except in compliance
8+
% with the License. You may obtain a copy of the License at
9+
%
10+
% http://www.apache.org/licenses/LICENSE-2.0
11+
%
12+
% Unless required by applicable law or agreed to in writing, software
13+
% distributed under the License is distributed on an "AS IS" BASIS,
14+
% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15+
% implied. See the License for the specific language governing
16+
% permissions and limitations under the License.
17+
classdef RecordBatchImporter
18+
19+
properties (Hidden, SetAccess=private, GetAccess=public)
20+
Proxy
21+
end
22+
23+
methods
24+
function obj = RecordBatchImporter()
25+
proxyName = "arrow.c.proxy.RecordBatchImporter";
26+
proxy = arrow.internal.proxy.create(proxyName, struct());
27+
obj.Proxy = proxy;
28+
end
29+
30+
function recordBatch = import(obj, cArray, cSchema)
31+
arguments
32+
obj(1, 1) arrow.c.internal.ArrayImporter
33+
cArray(1, 1) arrow.c.ArrayCStruct
34+
cSchema(1, 1) arrow.c.SchemaCStruct
35+
end
36+
args = struct(ArrowArrayAddress=cArray.Address, ...
37+
ArrowSchemaAddress=cSchema.Address);
38+
proxyID = obj.Proxy.importFromC(args);
39+
proxyName = "arrow.tabular.proxy.RecordBatch";
40+
proxy = libmexclass.proxy.Proxy(Name=proxyName, ID=proxyID);
41+
recordBatch = arrow.tabular.RecordBatch(proxy);
42+
end
43+
end
44+
end

matlab/src/matlab/+arrow/+tabular/RecordBatch.m

+6
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,10 @@ function displayScalarObject(obj)
142142
recordBatch = arrow.tabular.RecordBatch(proxy);
143143
end
144144
end
145+
146+
methods(Hidden)
147+
function exportToC(cArrayAddress, cSchemaAddress)
148+
obj.Proxy.exportToC(cArrayAddress, cSchemaAddress);
149+
end
150+
end
145151
end

matlab/tools/cmake/BuildMatlabArrowInterface.cmake

+2-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a
7878
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer/proxy/buffer.cc"
7979
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array_c_struct.cc"
8080
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/schema_c_struct.cc"
81-
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array_importer.cc")
81+
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array_importer.cc"
82+
"${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc")
8283

8384

8485
set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy")

0 commit comments

Comments
 (0)