Skip to content

Commit 2181b8e

Browse files
committed
HSEARCH-4950 Add new knn option to the documentation
1 parent 51a87e9 commit 2181b8e

File tree

4 files changed

+88
-6
lines changed

4 files changed

+88
-6
lines changed

documentation/src/main/asciidoc/public/reference/_mapping-directfieldmapping.adoc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,6 @@ which generally gives much more flexibility.
136136
+
137137
include::../components/_incubating-warning.adoc[]
138138
+
139-
WARNING: Vector fields are only supported by the <<backend-lucene, Lucene backend>> for now.
140-
+
141139
Specific field type for vector fields to be used in a <<search-dsl-predicate-knn,vector search>>.
142140
+
143141
Vector fields accept values of type `float[]` or `byte[]` and *require* that

documentation/src/main/asciidoc/public/reference/_search-dsl-predicate.adoc

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,40 @@ include::{sourcedir}/org/hibernate/search/documentation/search/predicate/Predica
15041504
----
15051505
====
15061506

1507+
[[search-dsl-predicate-knn-limitations]]
1508+
=== Backend specifics and limitations
1509+
1510+
With the Elasticsearch backend a `knn` predicate can only be added as a top-level predicate,
1511+
i.e. a predicate directly passed to a where clause of a search query, or if a disjunction of multiple `knn` predicates is required,
1512+
they can be supplied as `should` clauses of a top-level <<search-dsl-predicate-boolean,`boolean` predicate>>
1513+
(the same can be achieved by using an <<search-dsl-predicate-or,`or` predicate>>).
1514+
Any other usages of a `knn` predicate, with this backend, would lead to an exception being thrown.
1515+
1516+
.Multiple `knn` predicates added via `should` clauses
1517+
====
1518+
[source, JAVA, indent=0, subs="+callouts"]
1519+
----
1520+
include::{sourcedir}/org/hibernate/search/documentation/search/predicate/PredicateDslIT.java[tags=knn-should]
1521+
----
1522+
====
1523+
1524+
The Elasticsearch backend also allows configuring a backend-specific `knn` predicate option: the number of candidates.
1525+
This option specifies a number of approximate nearest neighbor candidates to be found on each shard,
1526+
then the results from each shard are merged and the top `k` are selected.
1527+
See the Elasticsearch documentation for more details.
1528+
To access this option an Elasticsearch extension should be used:
1529+
1530+
.Setting a number of candidates Elasticsearch-specific knn option
1531+
====
1532+
[source, JAVA, indent=0, subs="+callouts"]
1533+
----
1534+
include::{sourcedir}/org/hibernate/search/documentation/search/predicate/PredicateDslIT.java[tags=knn-candidates]
1535+
----
1536+
<1> Get an extended, Elasticsearch-specific, predicate factory.
1537+
<2> Build a `knn` predicate as ususal.
1538+
<3> Provide an Elasticsearch-specific predicate option.
1539+
====
1540+
15071541
[[search-dsl-predicate-knn-other]]
15081542
=== Other options
15091543

documentation/src/test/java/org/hibernate/search/documentation/search/predicate/Book.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ public class Book {
4444
private String comment;
4545

4646
private float[] coverImageEmbeddings;
47+
private float[] alternativeCoverImageEmbeddings;
4748

4849
@ManyToMany
4950
@IndexedEmbedded(structure = ObjectStructure.NESTED)

documentation/src/test/java/org/hibernate/search/documentation/search/predicate/PredicateDslIT.java

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import jakarta.persistence.EntityManagerFactory;
2020

21+
import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension;
2122
import org.hibernate.search.documentation.testsupport.BackendConfigurations;
2223
import org.hibernate.search.documentation.testsupport.DocumentationSetupHelper;
2324
import org.hibernate.search.engine.search.common.BooleanOperator;
@@ -32,6 +33,7 @@
3233
import org.hibernate.search.mapper.orm.mapping.HibernateOrmSearchMappingConfigurer;
3334
import org.hibernate.search.mapper.orm.scope.SearchScope;
3435
import org.hibernate.search.mapper.orm.session.SearchSession;
36+
import org.hibernate.search.mapper.pojo.mapping.definition.programmatic.TypeMappingStep;
3537
import org.hibernate.search.util.common.data.RangeBoundInclusion;
3638
import org.hibernate.search.util.impl.integrationtest.common.extension.BackendConfiguration;
3739

@@ -64,10 +66,14 @@ void setup() {
6466
if ( BackendConfiguration.isLucene() ) {
6567
setupContext.withProperty(
6668
HibernateOrmMapperSettings.MAPPING_CONFIGURER,
67-
(HibernateOrmSearchMappingConfigurer) context -> context.programmaticMapping()
68-
.type( Book.class )
69-
.property( "coverImageEmbeddings" )
70-
.vectorField( 128 )
69+
(HibernateOrmSearchMappingConfigurer) context -> {
70+
TypeMappingStep book = context.programmaticMapping()
71+
.type( Book.class );
72+
book.property( "coverImageEmbeddings" )
73+
.vectorField( 128 );
74+
book.property( "alternativeCoverImageEmbeddings" )
75+
.vectorField( 128 );
76+
}
7177
);
7278
}
7379
entityManagerFactory = setupContext.setup( Book.class, Author.class, EmbeddableGeoPoint.class );
@@ -1143,6 +1149,49 @@ void knn() {
11431149
.extracting( Book::getId )
11441150
.containsExactlyInAnyOrder( BOOK1_ID, BOOK2_ID, BOOK3_ID );
11451151
} );
1152+
1153+
withinSearchSession( searchSession -> {
1154+
// tag::knn-should[]
1155+
float[] coverImageEmbeddingsVector = /*...*/
1156+
// end::knn-should[]
1157+
new float[128];
1158+
// tag::knn-should[]
1159+
float[] alternativeCoverImageEmbeddingsVector = /*...*/
1160+
// end::knn-should[]
1161+
new float[128];
1162+
// tag::knn-should[]
1163+
List<Book> hits = searchSession.search( Book.class )
1164+
.where( f -> f.bool()
1165+
.should( f.knn( 10 ).field( "coverImageEmbeddings" ).matching( coverImageEmbeddingsVector ) )
1166+
.should( f.knn( 5 ).field( "alternativeCoverImageEmbeddings" )
1167+
.matching( alternativeCoverImageEmbeddingsVector ) )
1168+
)
1169+
.fetchHits( 20 );
1170+
// end::knn-should[]
1171+
assertThat( hits )
1172+
.extracting( Book::getId )
1173+
.containsExactlyInAnyOrder( BOOK1_ID, BOOK2_ID, BOOK3_ID, BOOK4_ID );
1174+
} );
1175+
1176+
1177+
if ( BackendConfiguration.isElasticsearch() ) {
1178+
withinSearchSession( searchSession -> {
1179+
// tag::knn-candidates[]
1180+
float[] coverImageEmbeddingsVector = /*...*/
1181+
// end::knn-candidates[]
1182+
new float[128];
1183+
// tag::knn-candidates[]
1184+
List<Book> hits = searchSession.search( Book.class )
1185+
.where( f -> f.extension( ElasticsearchExtension.get() ) // <1>
1186+
.knn( 5 ).field( "coverImageEmbeddings" ).matching( coverImageEmbeddingsVector ) // <2>
1187+
.numberOfCandidates( 15 ) )// <3>
1188+
.fetchHits( 20 );
1189+
// end::knn-candidates[]
1190+
assertThat( hits )
1191+
.extracting( Book::getId )
1192+
.containsExactlyInAnyOrder( BOOK1_ID, BOOK2_ID, BOOK3_ID, BOOK4_ID );
1193+
} );
1194+
}
11461195
}
11471196

11481197
private MySearchParameters getSearchParameters() {

0 commit comments

Comments
 (0)