Skip to content

Commit 9c74fb3

Browse files
authored
unique tokens in query (#287)
1 parent 1fe42d8 commit 9c74fb3

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

fastembed/sparse/bm25.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,8 @@ def query_embed(self, query: Union[str, Iterable[str]], **kwargs) -> Iterable[Sp
250250
tokens = self.tokenizer.tokenize(text)
251251
stemmed_tokens = self._stem(tokens)
252252
token_ids = np.array(
253-
[self.compute_token_id(token) for token in stemmed_tokens],
254-
dtype=np.float32,
253+
list(set(self.compute_token_id(token) for token in stemmed_tokens)),
254+
dtype=np.int32,
255255
)
256256
values = np.ones_like(token_ids)
257257
yield SparseEmbedding(indices=token_ids, values=values)

0 commit comments

Comments
 (0)