|
47 | 47 | #include "paimon/predicate/literal.h" |
48 | 48 | #include "paimon/predicate/predicate_builder.h" |
49 | 49 | #include "paimon/scan_context.h" |
| 50 | +#include "paimon/testing/utils/binary_row_generator.h" |
50 | 51 | #include "paimon/testing/utils/testharness.h" |
51 | 52 |
|
52 | 53 | namespace arrow { |
@@ -323,4 +324,81 @@ TEST_F(KeyValueFileStoreScanTest, TestNoOverlapping) { |
323 | 324 | ASSERT_FALSE(KeyValueFileStoreScan::NoOverlapping(generate_manifest_entries({0, 1, 1}))); |
324 | 325 | ASSERT_FALSE(KeyValueFileStoreScan::NoOverlapping(generate_manifest_entries({2, 1, 1}))); |
325 | 326 | } |
| 327 | + |
| 328 | +TEST_F(KeyValueFileStoreScanTest, TestFilterByValueFilterWithValueStatsCols) { |
| 329 | + std::string table_path = |
| 330 | + paimon::test::GetDataDir() + "orc/pk_table_with_mor.db/pk_table_with_mor"; |
| 331 | + std::vector<std::map<std::string, std::string>> partition_filters = {}; |
| 332 | + |
| 333 | + // `v0` is at index 6 in schema-0 of pk_table_with_mor. |
| 334 | + auto greater_than = PredicateBuilder::GreaterThan(/*field_index=*/6, /*field_name=*/"v0", |
| 335 | + FieldType::DOUBLE, Literal(30.1)); |
| 336 | + auto scan_filter = std::make_shared<ScanFilter>(/*predicate=*/greater_than, |
| 337 | + /*partition_filters=*/partition_filters, |
| 338 | + /*bucket_filter=*/0, |
| 339 | + /*vector_search=*/nullptr); |
| 340 | + ASSERT_OK_AND_ASSIGN(std::unique_ptr<KeyValueFileStoreScan> scan, |
| 341 | + CreateFileStoreScan(table_path, scan_filter, |
| 342 | + /*table_schema_id=*/0, /*snapshot_id=*/1)); |
| 343 | + scan->EnableValueFilter(); |
| 344 | + |
| 345 | + // Build dense stats for only one column `v0`. |
| 346 | + auto pool = GetDefaultPool(); |
| 347 | + SimpleStats value_stats = BinaryRowGenerator::GenerateStats( |
| 348 | + /*min=*/{10.0}, /*max=*/{20.0}, /*null=*/{0}, pool.get()); |
| 349 | + std::vector<std::string> value_stats_cols = {"v0"}; |
| 350 | + ManifestEntry entry( |
| 351 | + /*kind=*/FileKind::Add(), /*partition=*/BinaryRow::EmptyRow(), /*bucket=*/0, |
| 352 | + /*total_buckets=*/1, |
| 353 | + std::make_shared<DataFileMeta>( |
| 354 | + /*file_name=*/"name", /*file_size=*/1024, /*row_count=*/10, |
| 355 | + /*min_key=*/BinaryRow::EmptyRow(), /*max_key=*/BinaryRow::EmptyRow(), |
| 356 | + /*key_stats=*/SimpleStats::EmptyStats(), |
| 357 | + /*value_stats=*/value_stats, |
| 358 | + /*min_sequence_number=*/0, |
| 359 | + /*max_sequence_number=*/10, |
| 360 | + /*schema_id=*/0, |
| 361 | + /*level=*/1, |
| 362 | + /*extra_files=*/std::vector<std::optional<std::string>>(), |
| 363 | + /*creation_time=*/Timestamp(0, 0), |
| 364 | + /*delete_row_count=*/std::nullopt, |
| 365 | + /*embedded_index=*/nullptr, |
| 366 | + /*file_source=*/FileSource::Append(), |
| 367 | + /*value_stats_cols=*/value_stats_cols, |
| 368 | + /*external_path=*/std::nullopt, |
| 369 | + /*first_row_id=*/std::nullopt, |
| 370 | + /*write_cols=*/std::nullopt)); |
| 371 | + |
| 372 | + // max(v0)=50 > 30.1, should be kept. |
| 373 | + SimpleStats value_stats_keep = BinaryRowGenerator::GenerateStats( |
| 374 | + /*min=*/{40.0}, /*max=*/{50.0}, /*null=*/{0}, pool.get()); |
| 375 | + ManifestEntry entry_keep( |
| 376 | + /*kind=*/FileKind::Add(), /*partition=*/BinaryRow::EmptyRow(), /*bucket=*/0, |
| 377 | + /*total_buckets=*/1, |
| 378 | + std::make_shared<DataFileMeta>( |
| 379 | + /*file_name=*/"name_keep", /*file_size=*/1024, /*row_count=*/10, |
| 380 | + /*min_key=*/BinaryRow::EmptyRow(), /*max_key=*/BinaryRow::EmptyRow(), |
| 381 | + /*key_stats=*/SimpleStats::EmptyStats(), |
| 382 | + /*value_stats=*/value_stats_keep, |
| 383 | + /*min_sequence_number=*/0, |
| 384 | + /*max_sequence_number=*/10, |
| 385 | + /*schema_id=*/0, |
| 386 | + /*level=*/1, |
| 387 | + /*extra_files=*/std::vector<std::optional<std::string>>(), |
| 388 | + /*creation_time=*/Timestamp(0, 0), |
| 389 | + /*delete_row_count=*/std::nullopt, |
| 390 | + /*embedded_index=*/nullptr, |
| 391 | + /*file_source=*/FileSource::Append(), |
| 392 | + /*value_stats_cols=*/value_stats_cols, |
| 393 | + /*external_path=*/std::nullopt, |
| 394 | + /*first_row_id=*/std::nullopt, |
| 395 | + /*write_cols=*/std::nullopt)); |
| 396 | + |
| 397 | + // max(v0)=20 <= 30.1, should be filtered out. |
| 398 | + ASSERT_OK_AND_ASSIGN(bool keep, scan->FilterByStats(entry)); |
| 399 | + ASSERT_FALSE(keep); |
| 400 | + |
| 401 | + ASSERT_OK_AND_ASSIGN(keep, scan->FilterByStats(entry_keep)); |
| 402 | + ASSERT_TRUE(keep); |
| 403 | +} |
326 | 404 | } // namespace paimon::test |
0 commit comments