Skip to content

Commit f7aadf2

Browse files
committed
HSEARCH-5305 - adjust vector search max dimensions for Lucene backend
Signed-off-by: Jan Schatteman <[email protected]>
1 parent fbfd3be commit f7aadf2

File tree

6 files changed

+13
-13
lines changed

6 files changed

+13
-13
lines changed

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/codec/impl/HibernateSearchKnnVectorsFormat.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@
1919

2020
public class HibernateSearchKnnVectorsFormat extends KnnVectorsFormat {
2121
// OpenSearch has a limit of 16000
22-
// Elasticsearch has a limit of 4096
23-
// We'll keep it at 4096 for now as well:
24-
public static final int DEFAULT_MAX_DIMENSIONS = 4096;
22+
public static final int DEFAULT_MAX_DIMENSIONS = 16000;
2523
private static final KnnVectorsFormat DEFAULT_KNN_VECTORS_FORMAT = new HibernateSearchKnnVectorsFormat();
2624

2725
public static KnnVectorsFormat defaultFormat() {

documentation/src/main/asciidoc/migration/index.adoc

+4
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ in Hibernate Search {hibernateSearchVersion}
7777
are backward-compatible with Hibernate Search {hibernateSearchPreviousStableVersionShort}:
7878
no database schema update is necessary for these tables.
7979

80+
[[vectorsize]]
81+
=== Vector search max dimension
82+
The vector search maximum dimension has been increased from 4096 to 16000. The increased vector size implies a larger memory requirement on behalf of the JVM.
83+
8084
[[configuration]]
8185
== Configuration
8286

documentation/src/main/asciidoc/public/reference/_mapping-directfieldmapping.adoc

+1-1
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,7 @@ include::../components/_incubating-warning.adoc[]
372372
The size of the stored vectors. This is a required field. This size should match the vector size of the vectors produced by
373373
the model used to convert the data into vector representation.
374374
It is expected to be a positive integer value. Maximum accepted value is backend-specific.
375-
For the <<backend-lucene, Lucene backend>> the dimension must be in `[1, 4096]` range.
375+
For the <<backend-lucene, Lucene backend>> the dimension must be in `[1, 16000]` range.
376376
As for the <<backend-elasticsearch, Elasticsearch backend>> the range depends on the distribution.
377377
See the link:{elasticsearchDocUrl}/dense-vector.html#dense-vector-params[Elasticsearch]/link:{openSearchDocUrl}/search-plugins/knn/approximate-knn/#get-started-with-approximate-k-nn[OpenSearch]
378378
specific documentation to learn about the vector types of these distributions.

integrationtest/backend/lucene/src/test/java/org/hibernate/search/integrationtest/backend/lucene/mapping/LuceneVectorFieldIT.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ class LuceneVectorFieldIT {
2020
public final SearchSetupHelper setupHelper = SearchSetupHelper.create();
2121

2222
@ParameterizedTest
23-
@ValueSource(ints = { -1, -1000, 4097, 10000, Integer.MAX_VALUE, Integer.MIN_VALUE })
23+
@ValueSource(ints = { -1, -1000, 16001, Integer.MAX_VALUE, Integer.MIN_VALUE })
2424
void assertDimension(int dimension) {
25-
test( dimension, 5, 10, "dimension", dimension, 4096 );
25+
test( dimension, 5, 10, "dimension", dimension, 16000 );
2626
}
2727

2828
@ParameterizedTest

integrationtest/mapper/pojo-standalone-realbackend/src/test/java/org/hibernate/search/integrationtest/mapper/pojo/standalone/realbackend/mapping/VectorFieldIT.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@
5252
class VectorFieldIT {
5353

5454
private static final String INDEX_NAME = "IndexName";
55-
private static final int BATCHES = 20;
56-
private static final int BATCH_SIZE = 1_000;
55+
private static final int BATCHES = 100;
56+
private static final int BATCH_SIZE = 200;
5757

5858
@RegisterExtension
5959
public StandalonePojoMappingSetupHelper setupHelper = StandalonePojoMappingSetupHelper.withSingleBackend(
@@ -78,7 +78,7 @@ static void beforeAll() {
7878
@RetryExtension.TestWithRetry
7979
void vectorSizeLimits_max_allowed_dimension_with_lots_of_documents() {
8080
// with OpenSearch 2.12 it allows up to 16000 which will lead to an OOM in this particular test:
81-
int maxDimension = Math.min( 4096, maxDimension() );
81+
int maxDimension = Math.min( 16000, maxDimension() );
8282
@Indexed(index = INDEX_NAME)
8383
class IndexedEntity {
8484
@DocumentId
@@ -212,7 +212,7 @@ class IndexedEntity {
212212

213213
private static int maxDimension() {
214214
if ( BackendConfiguration.isLucene() ) {
215-
return 4096;
215+
return 16000;
216216
}
217217
else {
218218
ElasticsearchVersion actualVersion = ElasticsearchTestDialect.getActualVersion();

lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/codec/impl/HibernateSearchKnnVectorsFormat.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@
1919

2020
public class HibernateSearchKnnVectorsFormat extends KnnVectorsFormat {
2121
// OpenSearch has a limit of 16000
22-
// Elasticsearch has a limit of 4096
23-
// We'll keep it at 4096 for now as well:
24-
public static final int DEFAULT_MAX_DIMENSIONS = 4096;
22+
public static final int DEFAULT_MAX_DIMENSIONS = 16000;
2523
private static final KnnVectorsFormat DEFAULT_KNN_VECTORS_FORMAT = new HibernateSearchKnnVectorsFormat();
2624

2725
public static KnnVectorsFormat defaultFormat() {

0 commit comments

Comments
 (0)