diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 9754a2888..8b7d08fbe 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -49,6 +49,7 @@ jobs: python3 -m pip install --upgrade pip python3 -m pip install -r ci/requirements_tests.txt python3 -m pip install -r ci/requirements_optional.txt + python3 -m pip install -r ci/requirements_nlp.txt python3 -m pip install -e . - name: Running all tests run: | diff --git a/README.md b/README.md index 8396559a5..755011cfa 100644 --- a/README.md +++ b/README.md @@ -338,6 +338,11 @@ for our paper pip install -U open-metric-learning ``` +If you need OML for NLP, install the extra requirements with: +```shell +pip install -U open-metric-learning[nlp] +``` +
DockerHub ```shell diff --git a/ci/Dockerfile b/ci/Dockerfile index 7557d4e50..b2c5bffb4 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -30,9 +30,11 @@ RUN pip install Cython==0.29.28 COPY ci/requirements.txt requirements.txt COPY ci/requirements_tests.txt requirements_tests.txt COPY ci/requirements_optional.txt requirements_optional.txt +COPY ci/requirements_nlp.txt requirements_nlp.txt RUN pip install --no-cache-dir -r requirements.txt ${EXTRA_INDEX} -RUN pip install --no-cache-dir -r requirements_optional.txt RUN pip install --no-cache-dir -r requirements_tests.txt +RUN pip install --no-cache-dir -r requirements_optional.txt +RUN pip install --no-cache-dir -r requirements_nlp.txt WORKDIR /oml-lib COPY . /oml-lib diff --git a/ci/requirements.txt b/ci/requirements.txt index 095b4995c..ceca04411 100644 --- a/ci/requirements.txt +++ b/ci/requirements.txt @@ -1,4 +1,5 @@ torch>=2.0,<=2.1.1 +numpy>=1.24.4,<2.0 pytorch-lightning>=2.0,<=2.1.2 torchvision<=0.16.1 albumentations>=1.0,<=1.3.1 diff --git a/ci/requirements_nlp.txt b/ci/requirements_nlp.txt new file mode 100644 index 000000000..c74c2f5dd --- /dev/null +++ b/ci/requirements_nlp.txt @@ -0,0 +1 @@ +transformers>=4.0 diff --git a/ci/requirements_optional.txt b/ci/requirements_optional.txt index b33f00cbf..303f58d8b 100644 --- a/ci/requirements_optional.txt +++ b/ci/requirements_optional.txt @@ -1,7 +1,5 @@ -grad-cam==1.4.6 -jupyter>=1.0.0 neptune>=1.0.0, <1.10.1 wandb>=0.15.4 mlflow>=2.0.0 clearml>=1.5.0 -transformers>=4.0 +grad-cam==1.4.6 diff --git a/docs/readme/installation.md b/docs/readme/installation.md index 972141334..098b56ade 100644 --- a/docs/readme/installation.md +++ b/docs/readme/installation.md @@ -2,6 +2,11 @@ pip install -U open-metric-learning ``` +If you need OML for NLP, install the extra requirements with: +```shell +pip install -U open-metric-learning[nlp] +``` +
DockerHub ```shell diff --git a/oml/utils/misc_torch.py b/oml/utils/misc_torch.py index 75009d3c3..c93b05e82 100644 --- a/oml/utils/misc_torch.py +++ b/oml/utils/misc_torch.py @@ -7,7 +7,7 @@ import torch from torch import Tensor, cdist -TSingleValues = Union[int, float, np.float_, np.int_, torch.Tensor] +TSingleValues = Union[int, float, np.float64, np.int_, torch.Tensor] TSequenceValues = Union[List[float], Tuple[float, ...], np.ndarray, torch.Tensor] TOnlineValues = Union[TSingleValues, TSequenceValues] @@ -101,7 +101,7 @@ def elementwise_dist(x1: Tensor, x2: Tensor, p: int = 2) -> Tensor: assert len(x1.shape) == len(x2.shape) == 2 assert x1.shape == x2.shape - # we need an extra dim here to avoid pairwise behaviour of torch.cdist + # we need an extra dim here to avoid pairwise behavior of torch.cdist if len(x1.shape) == 2: x1 = x1.unsqueeze(1) x2 = x2.unsqueeze(1) @@ -263,7 +263,7 @@ def calc_with_batch(self, val: TSequenceValues) -> None: class OnlineDict(MutableMapping): # type: ignore """ - We don't inherite from built-in 'dict' due to internal C optimization. We mimic to dict with MutableMapping + We don't inherit from built-in 'dict' due to internal C optimization. We mimic to dict with MutableMapping https://treyhunner.com/2019/04/why-you-shouldnt-inherit-from-list-and-dict-in-python/ """ @@ -395,7 +395,7 @@ def __init__(self, embeddings: torch.Tensor): def _fit(self, embeddings: torch.Tensor) -> None: """ - Perform the PCA. Evaluate ``components``, ``expoained_variance``, ``explained_variance_ratio``, + Perform the PCA. Evaluate ``components``, ``explained_variance``, ``explained_variance_ratio``, ``singular_values``, ``mean``. Args: @@ -470,7 +470,7 @@ def calc_principal_axes_number(self, pcf_variance: Tuple[float, ...]) -> torch.T Let :math:`\\lambda_1, \\ldots, \\lambda_d\\in\\mathbb{R}` be a set of eigenvalues of the covariance matrix of :math:`X` sorted in descending order. Then for a given value of desired explained variance :math:`r`, - the number of principal components that explaines :math:`r\\cdot 100\\%%` variance is the largest integer + the number of principal components that explains :math:`r\\cdot 100\\%%` variance is the largest integer :math:`n` such that .. math:: diff --git a/setup.py b/setup.py index e7ab6e9ac..a711d2073 100644 --- a/setup.py +++ b/setup.py @@ -18,12 +18,19 @@ def load_version() -> str: return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', f.read(), re.M).group(1) +NLP_REQUIRE = load_requirements("ci/requirements_nlp.txt") + + setup( # technical things version=load_version(), packages=find_packages(exclude=["ci", "docs", "pipelines", "tests*"]), python_requires=">=3.8,<4.0", install_requires=load_requirements("ci/requirements.txt"), + extras_require={ + "nlp": NLP_REQUIRE, + "all": NLP_REQUIRE, # later will be cv and audio + }, include_package_data=True, long_description=Path("README.md").read_text(), long_description_content_type="text/markdown",