2222#include " paimon/testing/utils/testharness.h"
2323
2424namespace paimon ::lucene::test {
25- TEST (LuceneInterfaceTest, TestSimple) {
25+ class LuceneInterfaceTest : public ::testing::Test {
26+ public:
27+ void SetUp () override {}
28+ void TearDown () override {}
29+
30+ class TestDocIdSetIterator : public Lucene ::DocIdSetIterator {
31+ public:
32+ explicit TestDocIdSetIterator (const std::vector<int32_t >& ids)
33+ : Lucene::DocIdSetIterator(), ids_(ids) {}
34+
35+ int32_t advance (int32_t target) override {
36+ int32_t doc_id = nextDoc ();
37+ while (doc_id < target) {
38+ doc_id = nextDoc ();
39+ }
40+ return doc_id;
41+ }
42+ int32_t docID () override {
43+ return ids_[cursor_];
44+ }
45+ int32_t nextDoc () override {
46+ if (cursor_ == ids_.size ()) {
47+ return Lucene::DocIdSetIterator::NO_MORE_DOCS;
48+ }
49+ return ids_[cursor_++];
50+ }
51+
52+ private:
53+ size_t cursor_ = 0 ;
54+ std::vector<int32_t > ids_;
55+ };
56+
57+ class TestDocIdSet : public Lucene ::DocIdSet {
58+ public:
59+ explicit TestDocIdSet (const std::vector<int32_t >& ids) : DocIdSet(), ids_(ids) {}
60+
61+ Lucene::DocIdSetIteratorPtr iterator () override {
62+ return Lucene::newLucene<TestDocIdSetIterator>(ids_);
63+ }
64+ bool isCacheable () override {
65+ return true ;
66+ }
67+
68+ private:
69+ std::vector<int32_t > ids_;
70+ };
71+
72+ class TestFilter : public Lucene ::Filter {
73+ public:
74+ explicit TestFilter (const std::vector<int32_t >& ids) : ids_(ids) {}
75+
76+ Lucene::DocIdSetPtr getDocIdSet (const Lucene::IndexReaderPtr& reader) override {
77+ return Lucene::newLucene<TestDocIdSet>(ids_);
78+ }
79+
80+ private:
81+ std::vector<int32_t > ids_;
82+ };
83+ };
84+
85+ TEST_F (LuceneInterfaceTest, TestSimple) {
2686 auto dir = paimon::test::UniqueTestDirectory::Create (" local" );
2787 std::string index_path = dir->Str () + " /lucene_test" ;
2888 auto lucene_dir = Lucene::FSDirectory::open (LuceneUtils::StringToWstring (index_path),
@@ -68,10 +128,17 @@ TEST(LuceneInterfaceTest, TestSimple) {
68128 parser->setAllowLeadingWildcard (true );
69129
70130 auto search = [&](const std::wstring& query_str, int32_t limit,
131+ const std::optional<std::vector<int32_t >> selected_id,
71132 const std::vector<int32_t >& expected_doc_id_vec,
72133 const std::vector<std::wstring>& expected_doc_id_content_vec) {
73134 Lucene::QueryPtr query = parser->parse (query_str);
74- Lucene::TopDocsPtr results = searcher->search (query, limit);
135+ Lucene::TopDocsPtr results;
136+ if (selected_id) {
137+ Lucene::FilterPtr lucene_filter = Lucene::newLucene<TestFilter>(selected_id.value ());
138+ results = searcher->search (query, lucene_filter, limit);
139+ } else {
140+ results = searcher->search (query, limit);
141+ }
75142 ASSERT_EQ (expected_doc_id_vec.size (), results->scoreDocs .size ());
76143
77144 std::vector<int32_t > resule_doc_id_vec;
@@ -86,18 +153,29 @@ TEST(LuceneInterfaceTest, TestSimple) {
86153 };
87154
88155 // result is sorted by tf-idf score
89- search (L" document" , /* limit=*/ 10 , std::vector<int32_t >({2 , 1 , 0 }),
156+ search (L" document" , /* limit=*/ 10 , /* selected_id= */ std:: nullopt , std::vector<int32_t >({2 , 1 , 0 }),
90157 std::vector<std::wstring>({L" 2" , L" 1" , L" 0" }));
91- search (L" document" , /* limit=*/ 1 , std::vector<int32_t >({2 }), std::vector<std::wstring>({L" 2" }));
92- search (L" test AND document" , /* limit=*/ 10 , std::vector<int32_t >({2 , 0 }),
93- std::vector<std::wstring>({L" 2" , L" 0" }));
94- search (L" test OR new" , /* limit=*/ 10 , std::vector<int32_t >({1 , 0 , 2 }),
95- std::vector<std::wstring>({L" 1" , L" 0" , L" 2" }));
96- search (L" \" test document\" " , /* limit=*/ 10 , std::vector<int32_t >({0 }),
97- std::vector<std::wstring>({L" 0" }));
98- search (L" unordered" , /* limit=*/ 10 , std::vector<int32_t >({3 }),
158+ search (L" document" , /* limit=*/ 1 , /* selected_id=*/ std::nullopt , std::vector<int32_t >({2 }),
159+ std::vector<std::wstring>({L" 2" }));
160+ search (L" test AND document" , /* limit=*/ 10 , /* selected_id=*/ std::nullopt ,
161+ std::vector<int32_t >({2 , 0 }), std::vector<std::wstring>({L" 2" , L" 0" }));
162+ search (L" test OR new" , /* limit=*/ 10 , /* selected_id=*/ std::nullopt ,
163+ std::vector<int32_t >({1 , 0 , 2 }), std::vector<std::wstring>({L" 1" , L" 0" , L" 2" }));
164+ search (L" \" test document\" " , /* limit=*/ 10 , /* selected_id=*/ std::nullopt ,
165+ std::vector<int32_t >({0 }), std::vector<std::wstring>({L" 0" }));
166+ search (L" unordered" , /* limit=*/ 10 , /* selected_id=*/ std::nullopt , std::vector<int32_t >({3 }),
99167 std::vector<std::wstring>({L" 5" }));
100- search (L" *orDer*" , /* limit=*/ 10 , std::vector<int32_t >({3 }), std::vector<std::wstring>({L" 5" }));
168+ search (L" *orDer*" , /* limit=*/ 10 , /* selected_id=*/ std::nullopt , std::vector<int32_t >({3 }),
169+ std::vector<std::wstring>({L" 5" }));
170+
171+ // test filter
172+ search (L" document" , /* limit=*/ 10 , /* selected_id=*/ std::vector<int32_t >({0 , 1 }),
173+ std::vector<int32_t >({1 , 0 }), std::vector<std::wstring>({L" 1" , L" 0" }));
174+ search (L" document OR unordered" , /* limit=*/ 10 ,
175+ /* selected_id=*/ std::vector<int32_t >({0 , 1 , 3 }), std::vector<int32_t >({3 , 1 , 0 }),
176+ std::vector<std::wstring>({L" 5" , L" 1" , L" 0" }));
177+ search (L" unordered" , /* limit=*/ 10 , /* selected_id=*/ std::vector<int32_t >({0 }),
178+ std::vector<int32_t >(), std::vector<std::wstring>());
101179
102180 reader->close ();
103181 lucene_dir->close ();
0 commit comments