refactor: replace remaining union and optional with |

joein · joein · commit 9eb12b7e1dd0 · 2025-12-02T17:14:19.000+07:00
diff --git a/fastembed/common/utils.py b/fastembed/common/utils.py
@@ -5,7 +5,7 @@
 import unicodedata
 from pathlib import Path
 from itertools import islice
-from typing import Iterable, Optional, TypeVar
+from typing import Iterable, TypeVar
 
 import numpy as np
 from numpy.typing import NDArray
@@ -45,7 +45,7 @@ def iter_batch(iterable: Iterable[T], size: int) -> Iterable[list[T]]:
         yield b
 
 
-def define_cache_dir(cache_dir: Optional[str] = None) -> Path:
+def define_cache_dir(cache_dir: str | None = None) -> Path:
     """
     Define the cache directory for fastembed
     """
diff --git a/fastembed/late_interaction_multimodal/onnx_multimodal_model.py b/fastembed/late_interaction_multimodal/onnx_multimodal_model.py
@@ -2,7 +2,7 @@
 import os
 from multiprocessing import get_all_start_methods
 from pathlib import Path
-from typing import Any, Iterable, Optional, Sequence, Type, Union
+from typing import Any, Iterable, Sequence, Type
 
 import numpy as np
 from PIL import Image
@@ -18,12 +18,12 @@
 
 
 class OnnxMultimodalModel(OnnxModel[T]):
-    ONNX_OUTPUT_NAMES: Optional[list[str]] = None
+    ONNX_OUTPUT_NAMES: list[str] | None = None
 
     def __init__(self) -> None:
         super().__init__()
-        self.tokenizer: Optional[Tokenizer] = None
-        self.processor: Optional[Compose] = None
+        self.tokenizer: Tokenizer | None = None
+        self.processor: Compose | None = None
         self.special_token_to_id: dict[str, int] = {}
 
     def _preprocess_onnx_text_input(
@@ -60,11 +60,11 @@ def _load_onnx_model(
         self,
         model_dir: Path,
         model_file: str,
-        threads: Optional[int],
-        providers: Optional[Sequence[OnnxProvider]] = None,
+        threads: int | None,
+        providers: Sequence[OnnxProvider] | None = None,
         cuda: bool = False,
-        device_id: Optional[int] = None,
-        extra_session_options: Optional[dict[str, Any]] = None,
+        device_id: int | None = None,
+        extra_session_options: dict[str, Any] | None = None,
     ) -> None:
         super()._load_onnx_model(
             model_dir=model_dir,
@@ -116,15 +116,15 @@ def _embed_documents(
         self,
         model_name: str,
         cache_dir: str,
-        documents: Union[str, Iterable[str]],
+        documents: str | Iterable[str],
         batch_size: int = 256,
-        parallel: Optional[int] = None,
-        providers: Optional[Sequence[OnnxProvider]] = None,
+        parallel: int | None = None,
+        providers: Sequence[OnnxProvider] | None = None,
         cuda: bool = False,
-        device_ids: Optional[list[int]] = None,
+        device_ids: list[int] | None = None,
         local_files_only: bool = False,
-        specific_model_path: Optional[str] = None,
-        extra_session_options: Optional[dict[str, Any]] = None,
+        specific_model_path: str | None = None,
+        extra_session_options: dict[str, Any] | None = None,
         **kwargs: Any,
     ) -> Iterable[T]:
         is_small = False
@@ -187,15 +187,15 @@ def _embed_images(
         self,
         model_name: str,
         cache_dir: str,
-        images: Union[Iterable[ImageInput], ImageInput],
+        images: Iterable[ImageInput] | ImageInput,
         batch_size: int = 256,
-        parallel: Optional[int] = None,
-        providers: Optional[Sequence[OnnxProvider]] = None,
+        parallel: int | None = None,
+        providers: Sequence[OnnxProvider] | None = None,
         cuda: bool = False,
-        device_ids: Optional[list[int]] = None,
+        device_ids: list[int] | None = None,
         local_files_only: bool = False,
-        specific_model_path: Optional[str] = None,
-        extra_session_options: Optional[dict[str, Any]] = None,
+        specific_model_path: str | None = None,
+        extra_session_options: dict[str, Any] | None = None,
         **kwargs: Any,
     ) -> Iterable[T]:
         is_small = False
diff --git a/fastembed/text/custom_text_embedding.py b/fastembed/text/custom_text_embedding.py
@@ -1,4 +1,4 @@
-from typing import Sequence, Any, Iterable, Optional
+from typing import Sequence, Any, Iterable
 
 from dataclasses import dataclass
 
@@ -64,7 +64,7 @@ def _post_process_onnx_output(
         return self._normalize(self._pool(output.model_output, output.attention_mask))
 
     def _pool(
-        self, embeddings: NumpyArray, attention_mask: Optional[NDArray[np.int64]] = None
+        self, embeddings: NumpyArray, attention_mask: NDArray[np.int64] | None = None
     ) -> NumpyArray:
         if self._pooling == PoolingType.CLS:
             return embeddings[:, 0]
diff --git a/tests/test_multi_gpu.py b/tests/test_multi_gpu.py
@@ -1,5 +1,5 @@
 import pytest
-from typing import Optional
+
 from fastembed import (
     TextEmbedding,
     SparseTextEmbedding,
@@ -14,7 +14,7 @@
 
 @pytest.mark.skip(reason="Requires a multi-gpu server")
 @pytest.mark.parametrize("device_id", [None, 0, 1])
-def test_gpu_via_providers(device_id: Optional[int]) -> None:
+def test_gpu_via_providers(device_id: int | None) -> None:
     docs = ["hello world", "flag embedding"]
 
     device_id = device_id if device_id is not None else 0
@@ -86,7 +86,7 @@ def test_gpu_via_providers(device_id: Optional[int]) -> None:
 
 @pytest.mark.skip(reason="Requires a multi-gpu server")
 @pytest.mark.parametrize("device_ids", [None, [0], [1], [0, 1]])
-def test_gpu_cuda_device_ids(device_ids: Optional[list[int]]) -> None:
+def test_gpu_cuda_device_ids(device_ids: list[int] | None) -> None:
     docs = ["hello world", "flag embedding"]
     device_id = device_ids[0] if device_ids else 0
     embedding_model = TextEmbedding(
@@ -171,7 +171,7 @@ def test_gpu_cuda_device_ids(device_ids: Optional[list[int]]) -> None:
 @pytest.mark.parametrize(
     "device_ids,parallel", [(None, None), (None, 2), ([1], None), ([1], 1), ([1], 2), ([0, 1], 2)]
 )
-def test_multi_gpu_parallel_inference(device_ids: Optional[list[int]], parallel: int) -> None:
+def test_multi_gpu_parallel_inference(device_ids: list[int] | None, parallel: int) -> None:
     docs = ["hello world", "flag embedding"] * 100
     batch_size = 5