Skip to content

Commit 45c01c1

Browse files
committed
remove heavy weight dependencies and compute text cosine similarity in pure python
1 parent 4ab3082 commit 45c01c1

File tree

4 files changed

+28
-27
lines changed

4 files changed

+28
-27
lines changed

.github/workflows/lint.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ jobs:
2222
django-version:
2323
- '3.2' # LTS April 2024
2424
- '4.2' # LTS April 2026
25-
- '5.0' # April 2025
2625
- '5.1' # December 2025
2726
exclude:
2827
- python-version: '3.8'
@@ -33,10 +32,6 @@ jobs:
3332
django-version: '3.2'
3433
- python-version: '3.10'
3534
django-version: '3.2'
36-
- python-version: '3.8'
37-
django-version: '5.0'
38-
- python-version: '3.10'
39-
django-version: '5.0'
4035
- python-version: '3.8'
4136
django-version: '5.1'
4237
- python-version: '3.10'

.github/workflows/test.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,6 @@ jobs:
5757
with:
5858
virtualenvs-create: true
5959
virtualenvs-in-project: true
60-
- name: Install libopenblas-dev
61-
if: matrix.python-version == '3.13'
62-
run: sudo apt-get install libopenblas-dev
6360
- name: Install Release Dependencies
6461
run: |
6562
poetry config virtualenvs.in-project true

pyproject.toml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -94,19 +94,7 @@ doc8 = ">=1.1.1"
9494
aiohttp = ">=3.9.1"
9595
readme-renderer = {extras = ["md"], version = ">=42"}
9696
sphinxcontrib-typer = {extras = ["html", "pdf", "png"], version = ">=0.5.0", markers="python_version >= '3.9'"}
97-
scikit-learn = [
98-
{ version = ">=1.5", markers = "python_version > '3.8'" },
99-
{ version = ">=1.0", markers = "python_version <= '3.8'" },
100-
]
10197
pytest-env = ">=1.0.0"
102-
numpy = [
103-
{ version = ">=1.26", markers = "python_version > '3.8'" },
104-
{ version = "<=1.24", markers = "python_version <= '3.8'" },
105-
]
106-
scipy = [
107-
{ version = ">=1.11", markers = "python_version > '3.8'" },
108-
{ version = "<=1.10", markers = "python_version <= '3.8'" },
109-
]
11098
django-stubs = ">=4.2.7"
11199
pexpect = ">=4.9.0"
112100
pyright = ">=1.1.357"

tests/utils.py

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@
55
import sys
66
from pathlib import Path
77
from typing import Tuple
8-
8+
import re
9+
import math
10+
import re
11+
from collections import Counter
912
import pexpect
1013
from django.core.management.color import no_style
11-
from sklearn.feature_extraction.text import TfidfVectorizer
12-
from sklearn.metrics.pairwise import cosine_similarity
1314

1415
try:
1516
import rich
@@ -22,6 +23,26 @@
2223
TESTS_DIR = Path(__file__).parent
2324
DJANGO_PARAMETER_LOG_FILE = TESTS_DIR / "dj_params.json"
2425
manage_py = TESTS_DIR.parent / "manage.py"
26+
WORD = re.compile(r"\w+")
27+
28+
29+
def get_cosine(vec1, vec2):
30+
intersection = set(vec1.keys()) & set(vec2.keys())
31+
numerator = sum([vec1[x] * vec2[x] for x in intersection])
32+
33+
sum1 = sum([vec1[x] ** 2 for x in list(vec1.keys())])
34+
sum2 = sum([vec2[x] ** 2 for x in list(vec2.keys())])
35+
denominator = math.sqrt(sum1) * math.sqrt(sum2)
36+
37+
if not denominator:
38+
return 0.0
39+
else:
40+
return float(numerator) / denominator
41+
42+
43+
def text_to_vector(text):
44+
words = WORD.findall(text)
45+
return Counter(words)
2546

2647

2748
def similarity(text1, text2):
@@ -31,10 +52,10 @@ def similarity(text1, text2):
3152
3253
We use this to lazily evaluate the output of --help to our
3354
renderings.
34-
"""
35-
vectorizer = TfidfVectorizer()
36-
tfidf_matrix = vectorizer.fit_transform([text1, text2])
37-
return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
55+
#"""
56+
vector1 = text_to_vector(text1)
57+
vector2 = text_to_vector(text2)
58+
return get_cosine(vector1, vector2)
3859

3960

4061
def get_named_arguments(function):

0 commit comments

Comments
 (0)