new: update readme for CUDA 12.x, add warning for version conflicts (#239)

joein · web-flow · commit ec8f06978fb9 · 2024-05-14T21:43:03.000+02:00
* new: update readme for CUDA 12.x, add warning about onnxruntime-gpu and cuda compatibility

* fix: change warning type

* new: update readme
diff --git a/README.md b/README.md
@@ -50,17 +50,33 @@ len(embeddings_list[0]) # Vector of 384 dimensions
 
 ### ⚡️ FastEmbed on a GPU
 
-FastEmbed supports running on GPU devices. It requires installation of the `fastembed-gpu` package.
-Make sure not to have the `fastembed` package installed, as it might interfere with the `fastembed-gpu` package.
+FastEmbed supports running on GPU devices.
+It requires installation of the `fastembed-gpu` package.
 
 ```bash
 pip install fastembed-gpu
-``` 
+```
+
+*IMPORTANT*: Make sure not to have the `fastembed` package installed, as it interferes with the `fastembed-gpu` package.
+
+By default, `fastembed` is shipped with `onnxruntime-gpu` compiled for CUDA 11.8. 
+
+CUDA 12.x requires `onnxruntime-gpu` to be installed with the following command:
+
+```bash
+pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
+```
+
+*Note*: It is better to install it before `fastembed-gpu`, otherwise it might be required to uninstall `onnxruntime-gpu` first.
+ 
 
 ```python
 from fastembed import TextEmbedding
 
-embedding_model = TextEmbedding(model_name="BAAI/bge-small-en-v1.5", providers=["CUDAExecutionProvider"])
+embedding_model = TextEmbedding(
+    model_name="BAAI/bge-small-en-v1.5", 
+    providers=["CUDAExecutionProvider"]
+)
 print("The model BAAI/bge-small-en-v1.5 is ready to use on a GPU.")
 
 ```
diff --git a/fastembed/common/onnx_model.py b/fastembed/common/onnx_model.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 from typing import Any, Dict, Generic, Iterable, Optional, Tuple, Type, TypeVar, Sequence
+import warnings
 
 import numpy as np
 import onnxruntime as ort
@@ -39,14 +40,15 @@ def load_onnx_model(
         providers: Optional[Sequence[OnnxProvider]] = None,
     ) -> None:
         model_path = model_dir / model_file
-
         # List of Execution Providers: https://onnxruntime.ai/docs/execution-providers
 
         onnx_providers = ["CPUExecutionProvider"] if providers is None else list(providers)
         available_providers = ort.get_available_providers()
+        requested_provider_names = []
         for provider in onnx_providers:
             # check providers available
             provider_name = provider if isinstance(provider, str) else provider[0]
+            requested_provider_names.append(provider_name)
             if provider_name not in available_providers:
                 raise ValueError(
                     f"Provider {provider_name} is not available. Available providers: {available_providers}"
@@ -62,6 +64,15 @@ def load_onnx_model(
         self.model = ort.InferenceSession(
             str(model_path), providers=onnx_providers, sess_options=so
         )
+        if "CUDAExecutionProvider" in requested_provider_names:
+            current_providers = self.model.get_providers()
+            if "CUDAExecutionProvider" not in current_providers:
+                warnings.warn(
+                    f"Attempt to set CUDAExecutionProvider failed. Current providers: {current_providers}."
+                    "If you are using CUDA 12.x, install onnxruntime-gpu via "
+                    "`pip install onnxruntime-gpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/`",
+                    RuntimeWarning,
+                )
 
     def onnx_embed(self, *args, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
         raise NotImplementedError("Subclasses must implement this method")