forked from alibaba/paimon-cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrecord_batch_test.cpp
More file actions
119 lines (107 loc) · 5.14 KB
/
record_batch_test.cpp
File metadata and controls
119 lines (107 loc) · 5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/*
* Copyright 2024-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "paimon/record_batch.h"
#include <utility>
#include "arrow/array/array_base.h"
#include "arrow/array/array_nested.h"
#include "arrow/array/builder_binary.h"
#include "arrow/array/builder_nested.h"
#include "arrow/array/builder_primitive.h"
#include "arrow/c/abi.h"
#include "arrow/c/bridge.h"
#include "arrow/ipc/json_simple.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "gtest/gtest.h"
#include "paimon/result.h"
#include "paimon/status.h"
#include "paimon/testing/utils/testharness.h"
namespace paimon::test {
TEST(RecordBatchTest, TestSimple) {
// prepare an arrow array with struct<col1:string,col2:int32,col3:int64,col4:bool>
auto string_field = arrow::field("col1", arrow::utf8());
auto int_field = arrow::field("col2", arrow::int32());
auto long_field = arrow::field("col3", arrow::int64());
auto bool_field = arrow::field("col4", arrow::boolean());
auto struct_type = arrow::struct_({string_field, int_field, long_field, bool_field});
auto schema =
arrow::schema(arrow::FieldVector({string_field, int_field, long_field, bool_field}));
arrow::StructBuilder struct_builder(
struct_type, arrow::default_memory_pool(),
{std::make_shared<arrow::StringBuilder>(), std::make_shared<arrow::Int32Builder>(),
std::make_shared<arrow::Int64Builder>(), std::make_shared<arrow::BooleanBuilder>()});
auto string_builder = static_cast<arrow::StringBuilder*>(struct_builder.field_builder(0));
auto int_builder = static_cast<arrow::Int32Builder*>(struct_builder.field_builder(1));
auto long_builder = static_cast<arrow::Int64Builder*>(struct_builder.field_builder(2));
auto bool_builder = static_cast<arrow::BooleanBuilder*>(struct_builder.field_builder(3));
for (int32_t i = 0; i < 10; ++i) {
ASSERT_TRUE(struct_builder.Append().ok());
ASSERT_TRUE(string_builder->Append("20240813").ok());
ASSERT_TRUE(int_builder->Append(23).ok());
ASSERT_TRUE(long_builder->Append(static_cast<int64_t>(1722848484308ll + i)).ok());
ASSERT_TRUE(bool_builder->Append(static_cast<bool>(i % 2)).ok());
}
std::shared_ptr<arrow::Array> array;
ASSERT_TRUE(struct_builder.Finish(&array).ok());
::ArrowArray arrow_array;
ASSERT_TRUE(arrow::ExportArray(*array, &arrow_array).ok());
RecordBatchBuilder batch_builder(&arrow_array);
std::map<std::string, std::string> partition = {{"col1", "20240813"}, {"col2", "23"}};
ASSERT_NOK(batch_builder.SetPartition(partition)
.SetRowKinds({RecordBatch::RowKind::INSERT, RecordBatch::RowKind::INSERT})
.Finish());
::ArrowArray arrow_array2;
ASSERT_TRUE(arrow::ExportArray(*array, &arrow_array2).ok());
ASSERT_OK_AND_ASSIGN(std::unique_ptr<RecordBatch> batch2,
batch_builder.MoveData(&arrow_array2).SetPartition(partition).Finish());
RecordBatch batch3 = std::move(*batch2);
ASSERT_EQ(batch3.GetPartition(), partition);
RecordBatch batch4(std::move(batch3));
ASSERT_EQ(batch4.GetPartition(), partition);
}
TEST(RecordBatchTest, TestAssignAndMove) {
arrow::FieldVector fields = {arrow::field("f0", arrow::boolean()),
arrow::field("f1", arrow::int8())};
std::map<std::string, std::string> partition = {{"f1", "1"}};
auto old_array = std::dynamic_pointer_cast<arrow::StructArray>(
arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({fields}), R"([
[true, 1]
])")
.ValueOrDie());
::ArrowArray old_arrow_array;
ASSERT_TRUE(arrow::ExportArray(*old_array, &old_arrow_array).ok());
RecordBatch old_batch(partition, /*bucket=*/0, {RecordBatch::RowKind::INSERT},
&old_arrow_array);
auto new_array = std::dynamic_pointer_cast<arrow::StructArray>(
arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_({fields}), R"([
[false, 1]
])")
.ValueOrDie());
::ArrowArray new_arrow_array;
ASSERT_TRUE(arrow::ExportArray(*new_array, &new_arrow_array).ok());
RecordBatch new_batch(partition, /*bucket=*/1, {RecordBatch::RowKind::INSERT},
&new_arrow_array);
old_batch = std::move(new_batch);
ASSERT_EQ(old_batch.GetBucket(), 1);
ASSERT_FALSE(
new_batch.GetData()); // NOLINT(bugprone-use-after-move, clang-analyzer-cplusplus.Move)
new_batch = std::move(old_batch);
ASSERT_EQ(new_batch.GetBucket(), 1);
ASSERT_FALSE(
old_batch.GetData()); // NOLINT(bugprone-use-after-move, clang-analyzer-cplusplus.Move)
}
} // namespace paimon::test