Skip to content

Commit 9a9f428

Browse files
committed
add test
1 parent 466e105 commit 9a9f428

18 files changed

+438
-78
lines changed

include/paimon/data/blob.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,10 @@ class PAIMON_EXPORT Blob {
9494
/// It automatically injects Paimon-specific metadata to identify the field as a BLOB.
9595
///
9696
/// @param field_name The name of the Arrow field.
97-
/// @param nullable Whether the field can contain null values (defaults to false).
9897
/// @param metadata A map of key-value metadata to be attached to the field.
9998
/// @return A result containing a unique pointer to the generated `ArrowSchema` or an error.
10099
static Result<std::unique_ptr<::ArrowSchema>> ArrowField(
101-
const std::string& field_name, bool nullable = false,
102-
std::unordered_map<std::string, std::string> metadata = {});
100+
const std::string& field_name, std::unordered_map<std::string, std::string> metadata = {});
103101

104102
private:
105103
class Impl;

src/paimon/common/data/abstract_binary_writer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ void AbstractBinaryWriter::WriteSegmentsToVarLenPart(int32_t pos,
163163
if (segments.size() == 1) {
164164
segments[0].CopyTo(offset, &segment_, cursor_, size);
165165
} else {
166+
assert(false);
167+
// Now BinarySecyion only in single segment.
166168
WriteMultiSegmentsToVarLenPart(segments, offset, size);
167169
}
168170
SetOffsetAndSize(pos, cursor_, size);
@@ -172,6 +174,8 @@ void AbstractBinaryWriter::WriteSegmentsToVarLenPart(int32_t pos,
172174

173175
void AbstractBinaryWriter::WriteMultiSegmentsToVarLenPart(
174176
const std::vector<MemorySegment>& segments, int32_t offset, int32_t size) {
177+
// Now BinarySecyion only in single segment.
178+
assert(false);
175179
// Write the bytes to the variable length portion.
176180
int32_t need_copy = size;
177181
int32_t from_offset = offset;

src/paimon/common/data/binary_array_test.cpp

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ TEST(BinaryArrayTest, TestSetAndGet) {
183183
}
184184
// timestamp
185185
{
186+
// not compact (precision > 3)
186187
std::vector<Timestamp> arr = {Timestamp(0, 0), Timestamp(12345, 1)};
187188
BinaryArray array;
188189
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), 8, pool.get());
@@ -193,6 +194,18 @@ TEST(BinaryArrayTest, TestSetAndGet) {
193194
ASSERT_EQ(arr[0], array.GetTimestamp(0, 9));
194195
ASSERT_EQ(arr[1], array.GetTimestamp(1, 9));
195196
}
197+
{
198+
// compact (precision <= 3)
199+
std::vector<Timestamp> arr = {Timestamp(0, 0), Timestamp(12345, 0)};
200+
BinaryArray array;
201+
BinaryArrayWriter writer = BinaryArrayWriter(&array, arr.size(), 8, pool.get());
202+
for (size_t i = 0; i < arr.size(); i++) {
203+
writer.WriteTimestamp(i, arr[i], 3);
204+
}
205+
writer.Complete();
206+
ASSERT_EQ(arr[0], array.GetTimestamp(0, 3));
207+
ASSERT_EQ(arr[1], array.GetTimestamp(1, 3));
208+
}
196209
// binary
197210
{
198211
std::vector<Bytes> arr;
@@ -379,4 +392,124 @@ TEST(BinaryArrayTest, TestReset) {
379392
ASSERT_EQ(arr, array.ToLongArray().value());
380393
}
381394

395+
TEST(BinaryArrayTest, TestGetElementSize) {
396+
ASSERT_EQ(sizeof(bool), BinaryArrayWriter::GetElementSize(arrow::Type::type::BOOL));
397+
ASSERT_EQ(sizeof(int8_t), BinaryArrayWriter::GetElementSize(arrow::Type::type::INT8));
398+
ASSERT_EQ(sizeof(int16_t), BinaryArrayWriter::GetElementSize(arrow::Type::type::INT16));
399+
ASSERT_EQ(sizeof(int32_t), BinaryArrayWriter::GetElementSize(arrow::Type::type::INT32));
400+
ASSERT_EQ(sizeof(int32_t), BinaryArrayWriter::GetElementSize(arrow::Type::type::DATE32));
401+
ASSERT_EQ(sizeof(int64_t), BinaryArrayWriter::GetElementSize(arrow::Type::type::INT64));
402+
ASSERT_EQ(sizeof(float), BinaryArrayWriter::GetElementSize(arrow::Type::type::FLOAT));
403+
ASSERT_EQ(sizeof(double), BinaryArrayWriter::GetElementSize(arrow::Type::type::DOUBLE));
404+
// default cases: variable-length types use 8 bytes (offset + length)
405+
ASSERT_EQ(8, BinaryArrayWriter::GetElementSize(arrow::Type::type::STRING));
406+
ASSERT_EQ(8, BinaryArrayWriter::GetElementSize(arrow::Type::type::BINARY));
407+
ASSERT_EQ(8, BinaryArrayWriter::GetElementSize(arrow::Type::type::TIMESTAMP));
408+
ASSERT_EQ(8, BinaryArrayWriter::GetElementSize(arrow::Type::type::DECIMAL128));
409+
}
410+
411+
TEST(BinaryArrayTest, TestSetNullAtWithArrowType) {
412+
auto pool = GetDefaultPool();
413+
414+
{
415+
// BOOL
416+
BinaryArray array;
417+
BinaryArrayWriter writer(&array, 2, sizeof(bool), pool.get());
418+
writer.WriteBoolean(0, true);
419+
writer.SetNullAt(1, arrow::Type::type::BOOL);
420+
writer.Complete();
421+
ASSERT_FALSE(array.IsNullAt(0));
422+
ASSERT_TRUE(array.GetBoolean(0));
423+
ASSERT_TRUE(array.IsNullAt(1));
424+
}
425+
{
426+
// INT8
427+
BinaryArray array;
428+
BinaryArrayWriter writer(&array, 2, sizeof(int8_t), pool.get());
429+
writer.WriteByte(0, 42);
430+
writer.SetNullAt(1, arrow::Type::type::INT8);
431+
writer.Complete();
432+
ASSERT_FALSE(array.IsNullAt(0));
433+
ASSERT_EQ(42, array.GetByte(0));
434+
ASSERT_TRUE(array.IsNullAt(1));
435+
}
436+
{
437+
// INT16
438+
BinaryArray array;
439+
BinaryArrayWriter writer(&array, 2, sizeof(int16_t), pool.get());
440+
writer.WriteShort(0, 1000);
441+
writer.SetNullAt(1, arrow::Type::type::INT16);
442+
writer.Complete();
443+
ASSERT_FALSE(array.IsNullAt(0));
444+
ASSERT_EQ(1000, array.GetShort(0));
445+
ASSERT_TRUE(array.IsNullAt(1));
446+
}
447+
{
448+
// INT32
449+
BinaryArray array;
450+
BinaryArrayWriter writer(&array, 2, sizeof(int32_t), pool.get());
451+
writer.WriteInt(0, 100000);
452+
writer.SetNullAt(1, arrow::Type::type::INT32);
453+
writer.Complete();
454+
ASSERT_FALSE(array.IsNullAt(0));
455+
ASSERT_EQ(100000, array.GetInt(0));
456+
ASSERT_TRUE(array.IsNullAt(1));
457+
}
458+
{
459+
// DATE32
460+
BinaryArray array;
461+
BinaryArrayWriter writer(&array, 2, sizeof(int32_t), pool.get());
462+
writer.WriteInt(0, 19000);
463+
writer.SetNullAt(1, arrow::Type::type::DATE32);
464+
writer.Complete();
465+
ASSERT_FALSE(array.IsNullAt(0));
466+
ASSERT_EQ(19000, array.GetDate(0));
467+
ASSERT_TRUE(array.IsNullAt(1));
468+
}
469+
{
470+
// INT64
471+
BinaryArray array;
472+
BinaryArrayWriter writer(&array, 2, sizeof(int64_t), pool.get());
473+
writer.WriteLong(0, 123456789L);
474+
writer.SetNullAt(1, arrow::Type::type::INT64);
475+
writer.Complete();
476+
ASSERT_FALSE(array.IsNullAt(0));
477+
ASSERT_EQ(123456789L, array.GetLong(0));
478+
ASSERT_TRUE(array.IsNullAt(1));
479+
}
480+
{
481+
// FLOAT
482+
BinaryArray array;
483+
BinaryArrayWriter writer(&array, 2, sizeof(float), pool.get());
484+
writer.WriteFloat(0, 3.14f);
485+
writer.SetNullAt(1, arrow::Type::type::FLOAT);
486+
writer.Complete();
487+
ASSERT_FALSE(array.IsNullAt(0));
488+
ASSERT_FLOAT_EQ(3.14f, array.GetFloat(0));
489+
ASSERT_TRUE(array.IsNullAt(1));
490+
}
491+
{
492+
// DOUBLE
493+
BinaryArray array;
494+
BinaryArrayWriter writer(&array, 2, sizeof(double), pool.get());
495+
writer.WriteDouble(0, 2.718);
496+
writer.SetNullAt(1, arrow::Type::type::DOUBLE);
497+
writer.Complete();
498+
ASSERT_FALSE(array.IsNullAt(0));
499+
ASSERT_DOUBLE_EQ(2.718, array.GetDouble(0));
500+
ASSERT_TRUE(array.IsNullAt(1));
501+
}
502+
{
503+
// STRING (default path, uses 8-byte null)
504+
BinaryArray array;
505+
BinaryArrayWriter writer(&array, 2, 8, pool.get());
506+
writer.WriteString(0, BinaryString::FromString("hello", pool.get()));
507+
writer.SetNullAt(1, arrow::Type::type::STRING);
508+
writer.Complete();
509+
ASSERT_FALSE(array.IsNullAt(0));
510+
ASSERT_EQ("hello", std::string(array.GetStringView(0)));
511+
ASSERT_TRUE(array.IsNullAt(1));
512+
}
513+
}
514+
382515
} // namespace paimon::test

src/paimon/common/data/blob.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,8 @@ Result<PAIMON_UNIQUE_PTR<Bytes>> Blob::ToData(const std::shared_ptr<FileSystem>&
106106
}
107107

108108
Result<std::unique_ptr<ArrowSchema>> Blob::ArrowField(
109-
const std::string& field_name, bool nullable,
110-
std::unordered_map<std::string, std::string> metadata) {
111-
auto blob_field = BlobUtils::ToArrowField(field_name, nullable, metadata);
109+
const std::string& field_name, std::unordered_map<std::string, std::string> metadata) {
110+
auto blob_field = BlobUtils::ToArrowField(field_name, /*nullable=*/false, metadata);
112111
auto field = std::make_unique<::ArrowSchema>();
113112
PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportField(*blob_field, field.get()));
114113
return field;

src/paimon/common/data/blob_test.cpp

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616

1717
#include "paimon/data/blob.h"
1818

19+
#include "arrow/api.h"
20+
#include "arrow/c/bridge.h"
1921
#include "gtest/gtest.h"
2022
#include "paimon/fs/local/local_file_system.h"
2123
#include "paimon/memory/memory_pool.h"
22-
#include "paimon/status.h"
2324
#include "paimon/testing/utils/testharness.h"
2425

2526
namespace paimon::test {
@@ -142,4 +143,43 @@ TEST_F(BlobTest, TestNewInputStreamWithDynamicLength) {
142143
ASSERT_EQ("cdefghijklmn", buffer);
143144
}
144145

146+
TEST_F(BlobTest, TestArrowField) {
147+
{
148+
// basic: field name, non-nullable by default
149+
ASSERT_OK_AND_ASSIGN(auto schema, Blob::ArrowField("my_blob"));
150+
ASSERT_NE(schema, nullptr);
151+
152+
// import back to arrow::Field to verify
153+
auto field_result = arrow::ImportField(schema.get());
154+
ASSERT_TRUE(field_result.ok());
155+
auto field = field_result.ValueUnsafe();
156+
157+
ASSERT_EQ(field->name(), "my_blob");
158+
ASSERT_EQ(field->type()->id(), arrow::Type::LARGE_BINARY);
159+
ASSERT_FALSE(field->nullable());
160+
ASSERT_TRUE(field->HasMetadata());
161+
auto extension_type = field->metadata()->Get("paimon.extension.type");
162+
ASSERT_TRUE(extension_type.ok());
163+
ASSERT_EQ(extension_type.ValueUnsafe(), "paimon.type.blob");
164+
}
165+
{
166+
// with custom metadata
167+
std::unordered_map<std::string, std::string> custom_metadata = {
168+
{"custom_key", "custom_value"}};
169+
ASSERT_OK_AND_ASSIGN(auto schema, Blob::ArrowField("meta_blob", custom_metadata));
170+
auto field = arrow::ImportField(schema.get()).ValueUnsafe();
171+
ASSERT_EQ(field->name(), "meta_blob");
172+
ASSERT_FALSE(field->nullable());
173+
ASSERT_TRUE(field->HasMetadata());
174+
// blob extension metadata should be present
175+
auto extension_type = field->metadata()->Get("paimon.extension.type");
176+
ASSERT_TRUE(extension_type.ok());
177+
ASSERT_EQ(extension_type.ValueUnsafe(), "paimon.type.blob");
178+
// custom metadata should also be present
179+
auto custom_val = field->metadata()->Get("custom_key");
180+
ASSERT_TRUE(custom_val.ok());
181+
ASSERT_EQ(custom_val.ValueUnsafe(), "custom_value");
182+
}
183+
}
184+
145185
} // namespace paimon::test

src/paimon/common/data/columnar/columnar_row_test.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,4 +386,69 @@ TEST(ColumnarRowTest, TestDataLifeCycle) {
386386
ASSERT_EQ(result_row->GetLong(3), 5);
387387
}
388388

389+
TEST(ColumnarRowTest, TestColumnarRowRefGetBinary) {
390+
auto pool = GetDefaultPool();
391+
std::shared_ptr<arrow::DataType> target_type = arrow::struct_({
392+
arrow::field("f0", arrow::binary()),
393+
arrow::field("f1", arrow::binary()),
394+
});
395+
auto f0 =
396+
arrow::ipc::internal::json::ArrayFromJSON(arrow::binary(), R"(["hello", "world", null])")
397+
.ValueOrDie();
398+
auto f1 = arrow::ipc::internal::json::ArrayFromJSON(arrow::binary(), R"(["abc", "", "xyz"])")
399+
.ValueOrDie();
400+
auto data = arrow::StructArray::Make({f0, f1}, target_type->fields()).ValueOrDie();
401+
402+
auto ctx = std::make_shared<ColumnarBatchContext>(data->fields(), pool);
403+
404+
{
405+
ColumnarRowRef row(ctx, 0);
406+
auto binary = row.GetBinary(0);
407+
ASSERT_TRUE(binary);
408+
ASSERT_EQ(std::string(binary->data(), binary->size()), "hello");
409+
410+
auto binary1 = row.GetBinary(1);
411+
ASSERT_TRUE(binary1);
412+
ASSERT_EQ(std::string(binary1->data(), binary1->size()), "abc");
413+
}
414+
{
415+
ColumnarRowRef row(ctx, 1);
416+
auto binary = row.GetBinary(0);
417+
ASSERT_TRUE(binary);
418+
ASSERT_EQ(std::string(binary->data(), binary->size()), "world");
419+
420+
auto binary1 = row.GetBinary(1);
421+
ASSERT_TRUE(binary1);
422+
ASSERT_EQ(binary1->size(), 0);
423+
}
424+
{
425+
ColumnarRowRef row(ctx, 2);
426+
ASSERT_TRUE(row.IsNullAt(0));
427+
428+
auto binary1 = row.GetBinary(1);
429+
ASSERT_TRUE(binary1);
430+
ASSERT_EQ(std::string(binary1->data(), binary1->size()), "xyz");
431+
}
432+
}
433+
434+
TEST(ColumnarRowTest, TestColumnarRowRefToString) {
435+
auto pool = GetDefaultPool();
436+
std::shared_ptr<arrow::DataType> target_type =
437+
arrow::struct_({arrow::field("f0", arrow::int32())});
438+
auto f0 =
439+
arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), R"([1, 2, 3])").ValueOrDie();
440+
auto data = arrow::StructArray::Make({f0}, target_type->fields()).ValueOrDie();
441+
442+
auto ctx = std::make_shared<ColumnarBatchContext>(data->fields(), pool);
443+
444+
{
445+
ColumnarRowRef row(ctx, 0);
446+
ASSERT_EQ(row.ToString(), "ColumnarRowRef, row_id 0");
447+
}
448+
{
449+
ColumnarRowRef row(ctx, 2);
450+
ASSERT_EQ(row.ToString(), "ColumnarRowRef, row_id 2");
451+
}
452+
}
453+
389454
} // namespace paimon::test

src/paimon/common/data/data_define.h

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -128,18 +128,12 @@ class DataDefine {
128128
case arrow::Type::type::DOUBLE:
129129
return Literal(GetVariantValue<double>(value));
130130
case arrow::Type::type::STRING: {
131-
auto binary_string_ptr = GetVariantPtr<BinaryString>(value);
132-
if (binary_string_ptr == nullptr) {
133-
return Status::Invalid(
134-
"VariantValueToLiteral failed, cannot get BinaryString from VariantType, "
135-
"input value maybe string view");
136-
}
137-
auto str = binary_string_ptr->ToString();
138-
return Literal(FieldType::STRING, str.data(), str.size());
131+
auto view = GetStringView(value);
132+
return Literal(FieldType::STRING, view.data(), view.size());
139133
}
140134
case arrow::Type::type::BINARY: {
141-
auto bytes = GetVariantValue<std::shared_ptr<Bytes>>(value);
142-
return Literal(FieldType::BINARY, bytes->data(), bytes->size());
135+
auto view = GetStringView(value);
136+
return Literal(FieldType::BINARY, view.data(), view.size());
143137
}
144138
case arrow::Type::type::TIMESTAMP:
145139
return Literal(GetVariantValue<Timestamp>(value));

0 commit comments

Comments
 (0)