Skip to content

Commit

Permalink
HSEARCH-4950 Add new knn option to the documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
marko-bekhta committed Jan 5, 2024
1 parent 51a87e9 commit 2181b8e
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ which generally gives much more flexibility.
+
include::../components/_incubating-warning.adoc[]
+
WARNING: Vector fields are only supported by the <<backend-lucene, Lucene backend>> for now.
+
Specific field type for vector fields to be used in a <<search-dsl-predicate-knn,vector search>>.
+
Vector fields accept values of type `float[]` or `byte[]` and *require* that
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1504,6 +1504,40 @@ include::{sourcedir}/org/hibernate/search/documentation/search/predicate/Predica
----
====

[[search-dsl-predicate-knn-limitations]]
=== Backend specifics and limitations

With the Elasticsearch backend a `knn` predicate can only be added as a top-level predicate,
i.e. a predicate directly passed to a where clause of a search query, or if a disjunction of multiple `knn` predicates is required,
they can be supplied as `should` clauses of a top-level <<search-dsl-predicate-boolean,`boolean` predicate>>
(the same can be achieved by using an <<search-dsl-predicate-or,`or` predicate>>).
Any other usages of a `knn` predicate, with this backend, would lead to an exception being thrown.

.Multiple `knn` predicates added via `should` clauses
====
[source, JAVA, indent=0, subs="+callouts"]
----
include::{sourcedir}/org/hibernate/search/documentation/search/predicate/PredicateDslIT.java[tags=knn-should]
----
====

The Elasticsearch backend also allows configuring a backend-specific `knn` predicate option: the number of candidates.
This option specifies a number of approximate nearest neighbor candidates to be found on each shard,
then the results from each shard are merged and the top `k` are selected.
See the Elasticsearch documentation for more details.
To access this option an Elasticsearch extension should be used:

.Setting a number of candidates Elasticsearch-specific knn option
====
[source, JAVA, indent=0, subs="+callouts"]
----
include::{sourcedir}/org/hibernate/search/documentation/search/predicate/PredicateDslIT.java[tags=knn-candidates]
----
<1> Get an extended, Elasticsearch-specific, predicate factory.
<2> Build a `knn` predicate as ususal.
<3> Provide an Elasticsearch-specific predicate option.
====

[[search-dsl-predicate-knn-other]]
=== Other options

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public class Book {
private String comment;

private float[] coverImageEmbeddings;
private float[] alternativeCoverImageEmbeddings;

@ManyToMany
@IndexedEmbedded(structure = ObjectStructure.NESTED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import jakarta.persistence.EntityManagerFactory;

import org.hibernate.search.backend.elasticsearch.ElasticsearchExtension;
import org.hibernate.search.documentation.testsupport.BackendConfigurations;
import org.hibernate.search.documentation.testsupport.DocumentationSetupHelper;
import org.hibernate.search.engine.search.common.BooleanOperator;
Expand All @@ -32,6 +33,7 @@
import org.hibernate.search.mapper.orm.mapping.HibernateOrmSearchMappingConfigurer;
import org.hibernate.search.mapper.orm.scope.SearchScope;
import org.hibernate.search.mapper.orm.session.SearchSession;
import org.hibernate.search.mapper.pojo.mapping.definition.programmatic.TypeMappingStep;
import org.hibernate.search.util.common.data.RangeBoundInclusion;
import org.hibernate.search.util.impl.integrationtest.common.extension.BackendConfiguration;

Expand Down Expand Up @@ -64,10 +66,14 @@ void setup() {
if ( BackendConfiguration.isLucene() ) {
setupContext.withProperty(
HibernateOrmMapperSettings.MAPPING_CONFIGURER,
(HibernateOrmSearchMappingConfigurer) context -> context.programmaticMapping()
.type( Book.class )
.property( "coverImageEmbeddings" )
.vectorField( 128 )
(HibernateOrmSearchMappingConfigurer) context -> {
TypeMappingStep book = context.programmaticMapping()
.type( Book.class );
book.property( "coverImageEmbeddings" )
.vectorField( 128 );
book.property( "alternativeCoverImageEmbeddings" )
.vectorField( 128 );
}
);
}
entityManagerFactory = setupContext.setup( Book.class, Author.class, EmbeddableGeoPoint.class );
Expand Down Expand Up @@ -1143,6 +1149,49 @@ void knn() {
.extracting( Book::getId )
.containsExactlyInAnyOrder( BOOK1_ID, BOOK2_ID, BOOK3_ID );
} );

withinSearchSession( searchSession -> {
// tag::knn-should[]
float[] coverImageEmbeddingsVector = /*...*/
// end::knn-should[]
new float[128];
// tag::knn-should[]
float[] alternativeCoverImageEmbeddingsVector = /*...*/
// end::knn-should[]
new float[128];
// tag::knn-should[]
List<Book> hits = searchSession.search( Book.class )
.where( f -> f.bool()
.should( f.knn( 10 ).field( "coverImageEmbeddings" ).matching( coverImageEmbeddingsVector ) )
.should( f.knn( 5 ).field( "alternativeCoverImageEmbeddings" )
.matching( alternativeCoverImageEmbeddingsVector ) )
)
.fetchHits( 20 );
// end::knn-should[]
assertThat( hits )
.extracting( Book::getId )
.containsExactlyInAnyOrder( BOOK1_ID, BOOK2_ID, BOOK3_ID, BOOK4_ID );
} );


if ( BackendConfiguration.isElasticsearch() ) {
withinSearchSession( searchSession -> {
// tag::knn-candidates[]
float[] coverImageEmbeddingsVector = /*...*/
// end::knn-candidates[]
new float[128];
// tag::knn-candidates[]
List<Book> hits = searchSession.search( Book.class )
.where( f -> f.extension( ElasticsearchExtension.get() ) // <1>
.knn( 5 ).field( "coverImageEmbeddings" ).matching( coverImageEmbeddingsVector ) // <2>
.numberOfCandidates( 15 ) )// <3>
.fetchHits( 20 );
// end::knn-candidates[]
assertThat( hits )
.extracting( Book::getId )
.containsExactlyInAnyOrder( BOOK1_ID, BOOK2_ID, BOOK3_ID, BOOK4_ID );
} );
}
}

private MySearchParameters getSearchParameters() {
Expand Down

0 comments on commit 2181b8e

Please sign in to comment.