diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 9754a288..8b7d08fb 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -49,6 +49,7 @@ jobs:
         python3 -m pip install --upgrade pip
         python3 -m pip install -r ci/requirements_tests.txt
         python3 -m pip install -r ci/requirements_optional.txt
+        python3 -m pip install -r ci/requirements_nlp.txt
         python3 -m pip install -e .
     - name: Running all tests
       run: |
diff --git a/README.md b/README.md
index 8396559a..755011cf 100644
--- a/README.md
+++ b/README.md
@@ -338,6 +338,11 @@ for our paper
 pip install -U open-metric-learning
 ```
 
+If you need OML for NLP, install the extra requirements with:
+```shell
+pip install -U open-metric-learning[nlp]
+```
+
 <details><summary>DockerHub</summary>
 
 ```shell
diff --git a/ci/Dockerfile b/ci/Dockerfile
index 7557d4e5..b2c5bffb 100644
--- a/ci/Dockerfile
+++ b/ci/Dockerfile
@@ -30,9 +30,11 @@ RUN pip install Cython==0.29.28
 COPY ci/requirements.txt requirements.txt
 COPY ci/requirements_tests.txt requirements_tests.txt
 COPY ci/requirements_optional.txt requirements_optional.txt
+COPY ci/requirements_nlp.txt requirements_nlp.txt
 RUN pip install --no-cache-dir -r requirements.txt ${EXTRA_INDEX}
-RUN pip install --no-cache-dir -r requirements_optional.txt
 RUN pip install --no-cache-dir -r requirements_tests.txt
+RUN pip install --no-cache-dir -r requirements_optional.txt
+RUN pip install --no-cache-dir -r requirements_nlp.txt
 
 WORKDIR /oml-lib
 COPY . /oml-lib
diff --git a/ci/requirements.txt b/ci/requirements.txt
index 095b4995..ceca0441 100644
--- a/ci/requirements.txt
+++ b/ci/requirements.txt
@@ -1,4 +1,5 @@
 torch>=2.0,<=2.1.1
+numpy>=1.24.4,<2.0
 pytorch-lightning>=2.0,<=2.1.2
 torchvision<=0.16.1
 albumentations>=1.0,<=1.3.1
diff --git a/ci/requirements_nlp.txt b/ci/requirements_nlp.txt
new file mode 100644
index 00000000..c74c2f5d
--- /dev/null
+++ b/ci/requirements_nlp.txt
@@ -0,0 +1 @@
+transformers>=4.0
diff --git a/ci/requirements_optional.txt b/ci/requirements_optional.txt
index b33f00cb..303f58d8 100644
--- a/ci/requirements_optional.txt
+++ b/ci/requirements_optional.txt
@@ -1,7 +1,5 @@
-grad-cam==1.4.6
-jupyter>=1.0.0
 neptune>=1.0.0, <1.10.1
 wandb>=0.15.4
 mlflow>=2.0.0
 clearml>=1.5.0
-transformers>=4.0
+grad-cam==1.4.6
diff --git a/docs/readme/installation.md b/docs/readme/installation.md
index 97214133..098b56ad 100644
--- a/docs/readme/installation.md
+++ b/docs/readme/installation.md
@@ -2,6 +2,11 @@
 pip install -U open-metric-learning
 ```
 
+If you need OML for NLP, install the extra requirements with:
+```shell
+pip install -U open-metric-learning[nlp]
+```
+
 <details><summary>DockerHub</summary>
 
 ```shell
diff --git a/oml/utils/misc_torch.py b/oml/utils/misc_torch.py
index 75009d3c..c93b05e8 100644
--- a/oml/utils/misc_torch.py
+++ b/oml/utils/misc_torch.py
@@ -7,7 +7,7 @@
 import torch
 from torch import Tensor, cdist
 
-TSingleValues = Union[int, float, np.float_, np.int_, torch.Tensor]
+TSingleValues = Union[int, float, np.float64, np.int_, torch.Tensor]
 TSequenceValues = Union[List[float], Tuple[float, ...], np.ndarray, torch.Tensor]
 TOnlineValues = Union[TSingleValues, TSequenceValues]
 
@@ -101,7 +101,7 @@ def elementwise_dist(x1: Tensor, x2: Tensor, p: int = 2) -> Tensor:
     assert len(x1.shape) == len(x2.shape) == 2
     assert x1.shape == x2.shape
 
-    # we need an extra dim here to avoid pairwise behaviour of torch.cdist
+    # we need an extra dim here to avoid pairwise behavior of torch.cdist
     if len(x1.shape) == 2:
         x1 = x1.unsqueeze(1)
         x2 = x2.unsqueeze(1)
@@ -263,7 +263,7 @@ def calc_with_batch(self, val: TSequenceValues) -> None:
 
 class OnlineDict(MutableMapping):  # type: ignore
     """
-    We don't inherite from built-in 'dict' due to internal C optimization. We mimic to dict with MutableMapping
+    We don't inherit from built-in 'dict' due to internal C optimization. We mimic to dict with MutableMapping
     https://treyhunner.com/2019/04/why-you-shouldnt-inherit-from-list-and-dict-in-python/
 
     """
@@ -395,7 +395,7 @@ def __init__(self, embeddings: torch.Tensor):
 
     def _fit(self, embeddings: torch.Tensor) -> None:
         """
-        Perform the PCA. Evaluate ``components``, ``expoained_variance``, ``explained_variance_ratio``,
+        Perform the PCA. Evaluate ``components``, ``explained_variance``, ``explained_variance_ratio``,
         ``singular_values``, ``mean``.
 
         Args:
@@ -470,7 +470,7 @@ def calc_principal_axes_number(self, pcf_variance: Tuple[float, ...]) -> torch.T
         Let :math:`\\lambda_1, \\ldots, \\lambda_d\\in\\mathbb{R}` be a set of eigenvalues
         of the covariance matrix of :math:`X` sorted in descending order.
         Then for a given value of desired explained variance :math:`r`,
-        the number of principal components that explaines :math:`r\\cdot 100\\%%` variance is the largest integer
+        the number of principal components that explains :math:`r\\cdot 100\\%%` variance is the largest integer
         :math:`n` such that
 
         .. math::
diff --git a/setup.py b/setup.py
index e7ab6e9a..a711d207 100644
--- a/setup.py
+++ b/setup.py
@@ -18,12 +18,19 @@ def load_version() -> str:
         return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', f.read(), re.M).group(1)
 
 
+NLP_REQUIRE = load_requirements("ci/requirements_nlp.txt")
+
+
 setup(
     # technical things
     version=load_version(),
     packages=find_packages(exclude=["ci", "docs", "pipelines", "tests*"]),
     python_requires=">=3.8,<4.0",
     install_requires=load_requirements("ci/requirements.txt"),
+    extras_require={
+        "nlp": NLP_REQUIRE,
+        "all": NLP_REQUIRE,  # later will be cv and audio
+    },
     include_package_data=True,
     long_description=Path("README.md").read_text(),
     long_description_content_type="text/markdown",