@@ -419,4 +419,59 @@ TEST_F(LanceFileReaderWriterTest, TestTimestampType) {
419419 CheckResult (src_chunk_array, schema, /* enable_tz=*/ false );
420420}
421421
422+ TEST_F (LanceFileReaderWriterTest, TestPreviousBatchFirstRowNumber) {
423+ arrow::FieldVector fields = {arrow::field (" f1" , arrow::int32 ()),
424+ arrow::field (" f2" , arrow::utf8 ())};
425+ auto schema = arrow::schema (fields);
426+ auto array = std::dynamic_pointer_cast<arrow::StructArray>(
427+ arrow::ipc::internal::json::ArrayFromJSON (arrow::struct_ ({fields}), R"( [
428+ [1, "Hello"],
429+ [2, "World"],
430+ [3, "apple"],
431+ [4, "Alice"],
432+ [5, "Bob"],
433+ [6, "Lucy"]
434+ ])" )
435+ .ValueOrDie ());
436+ auto src_chunk_array = std::make_shared<arrow::ChunkedArray>(arrow::ArrayVector ({array}));
437+
438+ auto dir = paimon::test::UniqueTestDirectory::Create ();
439+ ASSERT_TRUE (dir);
440+ std::string file_path = dir->Str () + " /test.lance" ;
441+ WriteFile (file_path, src_chunk_array, schema);
442+ ASSERT_OK_AND_ASSIGN (
443+ std::unique_ptr<LanceFileBatchReader> reader,
444+ LanceFileBatchReader::Create (file_path, /* batch_size=*/ 4 , /* batch_readahead=*/ 2 ));
445+ ASSERT_EQ (std::numeric_limits<uint64_t >::max (),
446+ reader->GetPreviousBatchFirstRowNumber ().value ());
447+
448+ // first batch row 0-3
449+ ASSERT_OK_AND_ASSIGN (auto read_batch, reader->NextBatch ());
450+ ASSERT_OK_AND_ASSIGN (auto read_array,
451+ paimon::test::ReadResultCollector::GetArray (std::move (read_batch)));
452+ ASSERT_TRUE (read_array->Equals (array->Slice (0 , 4 )));
453+ ASSERT_EQ (0 , reader->GetPreviousBatchFirstRowNumber ().value ());
454+
455+ // second batch 4-5
456+ ASSERT_OK_AND_ASSIGN (read_batch, reader->NextBatch ());
457+ ASSERT_OK_AND_ASSIGN (read_array,
458+ paimon::test::ReadResultCollector::GetArray (std::move (read_batch)));
459+ ASSERT_TRUE (read_array->Equals (array->Slice (4 , 2 )));
460+ ASSERT_EQ (4 , reader->GetPreviousBatchFirstRowNumber ().value ());
461+
462+ // eof
463+ ASSERT_OK_AND_ASSIGN (read_batch, reader->NextBatch ());
464+ ASSERT_TRUE (BatchReader::IsEofBatch (read_batch));
465+ ASSERT_EQ (6 , reader->GetPreviousBatchFirstRowNumber ().value ());
466+
467+ // test with bitmap pushdown
468+ ArrowSchema c_read_schema;
469+ ASSERT_TRUE (arrow::ExportSchema (*schema, &c_read_schema).ok ());
470+ ASSERT_OK (reader->SetReadSchema (&c_read_schema, /* predicate=*/ nullptr ,
471+ /* selection_bitmap=*/ RoaringBitmap32::From ({0 , 3 })));
472+ ASSERT_NOK_WITH_MSG (
473+ reader->GetPreviousBatchFirstRowNumber (),
474+ " Cannot call GetPreviousBatchFirstRowNumber in LanceFileBatchReader because, after bitmap "
475+ " pushdown, rows in the array returned by NextBatch are no longer contiguous." );
476+ }
422477} // namespace paimon::lance::test
0 commit comments