Skip to content

Commit 5632fe1

Browse files
committed
new: replace union with | in token count
1 parent 985d185 commit 5632fe1

16 files changed

Lines changed: 16 additions & 18 deletions

fastembed/late_interaction/colbert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def _tokenize_documents(self, documents: list[str]) -> list[Encoding]:
9898

9999
def token_count(
100100
self,
101-
texts: Union[str, Iterable[str]],
101+
texts: str | Iterable[str],
102102
batch_size: int = 1024,
103103
is_doc: bool = True,
104104
include_extension: bool = False,

fastembed/late_interaction/late_interaction_embedding_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def embedding_size(self) -> int:
7272

7373
def token_count(
7474
self,
75-
texts: Union[str, Iterable[str]],
75+
texts: str | Iterable[str],
7676
batch_size: int = 1024,
7777
**kwargs: Any,
7878
) -> int:

fastembed/late_interaction/late_interaction_text_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def query_embed(self, query: str | Iterable[str], **kwargs: Any) -> Iterable[Num
154154

155155
def token_count(
156156
self,
157-
texts: Union[str, Iterable[str]],
157+
texts: str | Iterable[str],
158158
batch_size: int = 1024,
159159
is_doc: bool = True,
160160
include_extension: bool = False,

fastembed/late_interaction_multimodal/colpali.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def tokenize(self, documents: list[str], **kwargs: Any) -> list[Encoding]:
174174

175175
def token_count(
176176
self,
177-
texts: Union[str, Iterable[str]],
177+
texts: str | Iterable[str],
178178
batch_size: int = 1024,
179179
include_extension: bool = False,
180180
**kwargs: Any,

fastembed/late_interaction_multimodal/late_interaction_multimodal_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def embed_image(
165165

166166
def token_count(
167167
self,
168-
texts: Union[str, Iterable[str]],
168+
texts: str | Iterable[str],
169169
batch_size: int = 1024,
170170
include_extension: bool = False,
171171
**kwargs: Any,

fastembed/late_interaction_multimodal/late_interaction_multimodal_embedding_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def embedding_size(self) -> int:
7979

8080
def token_count(
8181
self,
82-
texts: Union[str, Iterable[str]],
82+
texts: str | Iterable[str],
8383
**kwargs: Any,
8484
) -> int:
8585
"""Returns the number of tokens in the texts."""

fastembed/sparse/bm25.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ def raw_embed(
268268
embeddings.append(SparseEmbedding.from_dict(token_id2value))
269269
return embeddings
270270

271-
def token_count(self, texts: Union[str, Iterable[str]], **kwargs: Any) -> int:
271+
def token_count(self, texts: str | Iterable[str], **kwargs: Any) -> int:
272272
token_num = 0
273273
texts = [texts] if isinstance(texts, str) else texts
274274
for text in texts:

fastembed/sparse/bm42.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ def _get_worker_class(cls) -> Type[TextEmbeddingWorker[SparseEmbedding]]:
351351
return Bm42TextEmbeddingWorker
352352

353353
def token_count(
354-
self, texts: Union[str, Iterable[str]], batch_size: int = 1024, **kwargs: Any
354+
self, texts: str | Iterable[str], batch_size: int = 1024, **kwargs: Any
355355
) -> int:
356356
if not hasattr(self, "model") or self.model is None:
357357
self.load_onnx_model() # loads the tokenizer as well

fastembed/sparse/minicoil.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ def load_onnx_model(self) -> None:
188188
)
189189

190190
def token_count(
191-
self, texts: Union[str, Iterable[str]], batch_size: int = 1024, **kwargs: Any
191+
self, texts: str | Iterable[str], batch_size: int = 1024, **kwargs: Any
192192
) -> int:
193193
return self._token_count(texts, batch_size=batch_size, **kwargs)
194194

fastembed/sparse/sparse_embedding_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,6 @@ def query_embed(self, query: str | Iterable[str], **kwargs: Any) -> Iterable[Spa
8585
else:
8686
yield from self.embed(query, **kwargs)
8787

88-
def token_count(self, texts: Union[str, Iterable[str]], **kwargs: Any) -> int:
88+
def token_count(self, texts: str | Iterable[str], **kwargs: Any) -> int:
8989
"""Returns the number of tokens in the texts."""
9090
raise NotImplementedError("Subclasses must implement this method")

0 commit comments

Comments
 (0)