Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/user_guide/data_types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ and `Arrow DataTypes <https://arrow.apache.org/docs/format/Columnar.html#data-ty

This type fills the gap between time zone free and time zone mandatory
timestamp types by allowing the interpretation of UTC timestamps according
to the configured session time zone. A conversion from and to int describes
to the configured session time zone. A conversion from and to int describes
the number of seconds since epoch. A conversion from and to long describes the number of milliseconds since epoch.

* - ``ARRAY<t>``
Expand Down
4 changes: 2 additions & 2 deletions src/paimon/common/utils/string_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,11 @@ Result<int32_t> StringUtils::StringToDate(const std::string& str) {
std::istringstream ss(str);
ss >> std::get_time(&timeinfo, "%Y-%m-%d");
if (ss.fail()) {
return Status::Invalid(fmt::format("failed to convert string {} to date", str));
return Status::Invalid(fmt::format("failed to convert string '{}' to date", str));
}
std::time_t time = timegm(&timeinfo);
if (time == -1) {
return Status::Invalid(fmt::format("failed to convert string {} to date", str));
return Status::Invalid(fmt::format("failed to convert string '{}' to date", str));
}
static const int64_t SECONDS_PER_DAY = 86400l; // = 24 * 60 * 60
return time / SECONDS_PER_DAY;
Expand Down
40 changes: 21 additions & 19 deletions src/paimon/core/casting/cast_executor_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ TEST_F(CastExecutorTest, TestStringToBooleanCastExecutorCastLiteral) {
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::boolean());
ASSERT_TRUE(
msg.find("StringToBooleanCastExecutor cast failed: STRING cannot cast to BOOLEAN") !=
msg.find("StringToBooleanCastExecutor cast failed: STRING '' cannot cast to BOOLEAN") !=
std::string::npos);
}
{
Expand All @@ -847,7 +847,7 @@ TEST_F(CastExecutorTest, TestStringToBooleanCastExecutorCastLiteral) {
src_data, arrow::boolean());
ASSERT_TRUE(
msg.find(
"StringToBooleanCastExecutor cast failed: STRING ttrue cannot cast to BOOLEAN") !=
"StringToBooleanCastExecutor cast failed: STRING 'ttrue' cannot cast to BOOLEAN") !=
std::string::npos);
}
}
Expand All @@ -864,7 +864,7 @@ TEST_F(CastExecutorTest, TestStringToBooleanCastExecutorCastArray) {
auto msg =
CheckArrayInvalidResult(cast_executor, arrow::utf8(), arrow::boolean(), R"([""])");
ASSERT_TRUE(
msg.find("StringToBooleanCastExecutor cast failed: STRING cannot cast to BOOLEAN") !=
msg.find("StringToBooleanCastExecutor cast failed: STRING '' cannot cast to BOOLEAN") !=
std::string::npos);
}
{
Expand All @@ -873,7 +873,7 @@ TEST_F(CastExecutorTest, TestStringToBooleanCastExecutorCastArray) {
R"(["true", "ttrue"])");
ASSERT_TRUE(
msg.find(
"StringToBooleanCastExecutor cast failed: STRING ttrue cannot cast to BOOLEAN") !=
"StringToBooleanCastExecutor cast failed: STRING 'ttrue' cannot cast to BOOLEAN") !=
std::string::npos);
}
}
Expand Down Expand Up @@ -940,57 +940,57 @@ TEST_F(CastExecutorTest, TestStringToNumericPrimitiveCastExecutorCastLiteral) {
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::int8());
ASSERT_TRUE(msg.find("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast 128 from STRING to TINYINT") != std::string::npos);
"cast '128' from STRING to TINYINT") != std::string::npos);
}
{
std::string src_data = "-129";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::int8());
ASSERT_TRUE(msg.find("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast -129 from STRING to TINYINT") != std::string::npos);
"cast '-129' from STRING to TINYINT") != std::string::npos);
}
{
std::string src_data = "32768";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::int16());
ASSERT_TRUE(msg.find("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast 32768 from STRING to SMALLINT") != std::string::npos);
"cast '32768' from STRING to SMALLINT") != std::string::npos);
}
{
std::string src_data = "-32769";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::int16());
ASSERT_TRUE(msg.find("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast -32769 from STRING to SMALLINT") != std::string::npos);
"cast '-32769' from STRING to SMALLINT") != std::string::npos);
}
{
std::string src_data = "2147483648";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::int32());
ASSERT_TRUE(msg.find("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast 2147483648 from STRING to INT") != std::string::npos);
"cast '2147483648' from STRING to INT") != std::string::npos);
}
{
std::string src_data = "-2147483649";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::int32());
ASSERT_TRUE(msg.find("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast -2147483649 from STRING to INT") != std::string::npos);
"cast '-2147483649' from STRING to INT") != std::string::npos);
}
{
std::string src_data = "9223372036854775808";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::int64());
ASSERT_TRUE(msg.find("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast 9223372036854775808 from STRING to BIGINT") !=
"cast '9223372036854775808' from STRING to BIGINT") !=
std::string::npos);
}
{
std::string src_data = "-9223372036854775809";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::int64());
ASSERT_TRUE(msg.find("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast -9223372036854775809 from STRING to BIGINT") !=
"cast '-9223372036854775809' from STRING to BIGINT") !=
std::string::npos);
}
{
Expand All @@ -1011,14 +1011,14 @@ TEST_F(CastExecutorTest, TestStringToNumericPrimitiveCastExecutorCastLiteral) {
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::int16());
ASSERT_TRUE(msg.find("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast from STRING to SMALLINT") != std::string::npos);
"cast '' from STRING to SMALLINT") != std::string::npos);
}
{
std::string src_data = "abc";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::int32());
ASSERT_TRUE(msg.find("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast abc from STRING to INT") != std::string::npos);
"cast 'abc' from STRING to INT") != std::string::npos);
}
}

Expand Down Expand Up @@ -1421,36 +1421,38 @@ TEST_F(CastExecutorTest, TestStringToDateCastExecutorCastLiteral) {
std::string src_data = "9223372036854775807";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::date32());
ASSERT_TRUE(msg.find("failed to convert string 9223372036854775807 to date") !=
ASSERT_TRUE(msg.find("failed to convert string '9223372036854775807' to date") !=
std::string::npos);
}
{
// invalid date str
std::string src_data = "11970-01-02";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::date32());
ASSERT_TRUE(msg.find("failed to convert string 11970-01-02 to date") != std::string::npos);
ASSERT_TRUE(msg.find("failed to convert string '11970-01-02' to date") !=
std::string::npos);
}
{
// invalid date str
std::string src_data = "-1970-01-02";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::date32());
ASSERT_TRUE(msg.find("failed to convert string -1970-01-02 to date") != std::string::npos);
ASSERT_TRUE(msg.find("failed to convert string '-1970-01-02' to date") !=
std::string::npos);
}
{
// invalid date str
std::string src_data = "";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::date32());
ASSERT_TRUE(msg.find("failed to convert string to date") != std::string::npos);
ASSERT_TRUE(msg.find("failed to convert string '' to date") != std::string::npos);
}
{
// invalid date str
std::string src_data = "0x1";
auto msg = CheckLiteralInvalidResult<std::string>(cast_executor, FieldType::STRING,
src_data, arrow::date32());
ASSERT_TRUE(msg.find("failed to convert string 0x1 to date") != std::string::npos);
ASSERT_TRUE(msg.find("failed to convert string '0x1' to date") != std::string::npos);
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/paimon/core/casting/string_to_boolean_cast_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ Result<Literal> StringToBooleanCastExecutor::Cast(
std::optional<bool> bool_value = StringUtils::StringToValue<bool>(value);
if (bool_value == std::nullopt) {
return Status::Invalid(fmt::format(
"StringToBooleanCastExecutor cast failed: STRING {} cannot cast to BOOLEAN", value));
"StringToBooleanCastExecutor cast failed: STRING '{}' cannot cast to BOOLEAN", value));
}
return Literal(bool_value.value());
}
Expand All @@ -71,7 +71,7 @@ Result<std::shared_ptr<arrow::Array>> StringToBooleanCastExecutor::Cast(
StringUtils::StringToValue<bool>(string_array->GetString(i));
if (bool_value == std::nullopt) {
return Status::Invalid(fmt::format(
"StringToBooleanCastExecutor cast failed: STRING {} cannot cast to BOOLEAN",
"StringToBooleanCastExecutor cast failed: STRING '{}' cannot cast to BOOLEAN",
string_array->GetString(i)));
}
PAIMON_RETURN_NOT_OK_FROM_ARROW(bool_builder->Append(bool_value.value()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Result<Literal> StringToNumericPrimitiveCastExecutor::CastLiteral(const Literal&
if (!success) {
return Status::Invalid(
fmt::format("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast {} from STRING to {}",
"cast '{}' from STRING to {}",
value, FieldTypeUtils::FieldTypeToString(target_type)));
}
return Literal(out);
Expand All @@ -80,7 +80,7 @@ Result<Literal> StringToNumericPrimitiveCastExecutor::CastLiteral(const Literal&
if (!casted_value) {
return Status::Invalid(
fmt::format("cast literal in StringToNumericPrimitiveCastExecutor failed: cannot "
"cast {} from STRING to {}",
"cast '{}' from STRING to {}",
value, FieldTypeUtils::FieldTypeToString(target_type)));
}
return Literal(casted_value.value());
Expand Down
2 changes: 1 addition & 1 deletion src/paimon/core/io/complete_row_tracking_fields_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ Status CompleteRowTrackingFieldsBatchReader::SetReadSchema(
int32_t sequence_id_idx = arrow_schema->GetFieldIndex(SpecialFields::SequenceNumber().Name());
if (sequence_id_idx != -1 &&
file_schema->GetFieldIndex(SpecialFields::SequenceNumber().Name()) == -1) {
// read special fields but file not exist, remove special fields to format reader
// read special fields but file not exist, remove special fields to format reader
PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(arrow_schema, arrow_schema->RemoveField(sequence_id_idx));
}
ArrowSchema c_schema;
Expand Down
2 changes: 1 addition & 1 deletion src/paimon/core/io/row_to_arrow_array_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#include "paimon/memory/memory_pool.h"
#include "paimon/reader/batch_reader.h"
namespace paimon {
// convert row T to output R (R maybe BatchReader::ReadBatch or KeyValueBatch)
// convert row T to output R (R maybe BatchReader::ReadBatch or KeyValueBatch)
template <typename T, typename R>
class RowToArrowArrayConverter {
public:
Expand Down
1 change: 1 addition & 0 deletions src/paimon/format/avro/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ if(PAIMON_ENABLE_AVRO)
if(PAIMON_BUILD_TESTS)
add_paimon_test(avro_format_test
SOURCES
avro_direct_encoder_decoder_test.cpp
avro_file_batch_reader_test.cpp
avro_file_format_test.cpp
avro_format_writer_test.cpp
Expand Down
1 change: 1 addition & 0 deletions src/paimon/format/avro/avro_direct_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ Status DecodeFieldToBuilder(const ::avro::NodePtr& avro_node,

const auto& branch_node = avro_node->leafAt(branch_index);
if (branch_node->type() == ::avro::AVRO_NULL) {
decoder->decodeNull();
PAIMON_RETURN_NOT_OK_FROM_ARROW(array_builder->AppendNull());
return Status::OK();
} else {
Expand Down
17 changes: 7 additions & 10 deletions src/paimon/format/avro/avro_direct_encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ Result<UnionBranches> ValidateUnion(const ::avro::NodePtr& union_node) {
return UnionBranches{.null_index = 0, .value_index = 1, .value_node = branch_1};
}
if (branch_1->type() == ::avro::AVRO_NULL && branch_0->type() != ::avro::AVRO_NULL) {
return UnionBranches{.null_index = 1, .value_index = 0, .value_node = branch_0};
return Status::Invalid(
"Unexpected: In paimon, we expect the null branch to be the first branch in a union.");
}
return Status::Invalid("Union must have exactly one null branch");
}
Expand Down Expand Up @@ -92,10 +93,6 @@ Status AvroDirectEncoder::EncodeArrowToAvro(const ::avro::NodePtr& avro_node,
}

switch (avro_node->type()) {
case ::avro::AVRO_NULL:
encoder->encodeNull();
return Status::OK();

case ::avro::AVRO_BOOL: {
const auto& bool_array =
arrow::internal::checked_cast<const arrow::BooleanArray&>(array);
Expand Down Expand Up @@ -230,9 +227,7 @@ Status AvroDirectEncoder::EncodeArrowToAvro(const ::avro::NodePtr& avro_node,
const auto& binary_array =
arrow::internal::checked_cast<const arrow::BinaryArray&>(array);
std::string_view value = binary_array.GetView(row_index);
// TODO(jinli.zjw): need to copy to ctx?
ctx->assign(value.begin(), value.end());
encoder->encodeBytes(ctx->data(), ctx->size());
encoder->encodeBytes(reinterpret_cast<const uint8_t*>(value.data()), value.size());
return Status::OK();
}

Expand Down Expand Up @@ -294,7 +289,7 @@ Status AvroDirectEncoder::EncodeArrowToAvro(const ::avro::NodePtr& avro_node,
element_node->leaves() != 2)) {
return Status::Invalid(
fmt::format("Expected AVRO_RECORD for map key-value pair, got {}",
::avro::toString(element_node->type())));
AvroUtils::ToString(avro_node)));
}

const auto& map_array =
Expand Down Expand Up @@ -366,9 +361,11 @@ Status AvroDirectEncoder::EncodeArrowToAvro(const ::avro::NodePtr& avro_node,
return Status::OK();
}

case ::avro::AVRO_NULL:
case ::avro::AVRO_UNION:
// Already handled above
return Status::Invalid("Unexpected union handling");
return Status::Invalid(fmt::format("Unexpected Avro type handling: {}",
::avro::toString(avro_node->type())));
default:
return Status::Invalid(
fmt::format("Unsupported Avro type: {}", ::avro::toString(avro_node->type())));
Expand Down
Loading
Loading