Skip to content

Commit 2b3cb1e

Browse files
add to readme, add some marqo integration tests
1 parent 1aef642 commit 2b3cb1e

File tree

8 files changed

+204
-20
lines changed

8 files changed

+204
-20
lines changed

.github/workflows/deploy_docs.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ jobs:
2525
run: |
2626
python -m pip install --upgrade pip
2727
pip install -r requirements.txt
28-
pip install -r requirements.docs.txt
2928
3029
- name: Build documentation
3130
run: |

.github/workflows/publish.yaml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Publish Python Package
2+
3+
on:
4+
push:
5+
tags:
6+
- 'v*.*.*'
7+
8+
jobs:
9+
publish:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- name: Check out code
14+
uses: actions/checkout@v3
15+
16+
- name: Set up Python
17+
uses: actions/setup-python@v4
18+
with:
19+
python-version: '3.x'
20+
21+
- name: Install dependencies
22+
run: pip install build twine
23+
24+
- name: Build package
25+
run: python -m build
26+
27+
- name: Publish to PyPI
28+
env:
29+
TWINE_USERNAME: __token__
30+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
31+
run: twine upload dist/*

README.md

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ To get started, you need to run the Marqo container:
1616
docker run --name marqo -it -p 8882:8882 marqoai/marqo:latest
1717
```
1818

19+
Install the required dependencies:
20+
```bash
21+
pip install -r requirements.txt
22+
```
23+
1924
Then, install the marqo-instantapi package:
2025
```bash
2126
pip install .
@@ -32,20 +37,35 @@ INSTANTAPI_KEY=your_instantapi_key
3237
Then, run the example script:
3338

3439
```bash
35-
pip install python-dotenv
3640
python example.py
3741
```
3842

3943
## Creating documentation
4044

41-
To create documentation for the package, install the required packages:
45+
Eun the following command to locally build the documentation:
4246

4347
```bash
44-
pip install -r requirements.docs.txt
48+
sphinx-build -b html docs/source docs/build
4549
```
4650

47-
Then, run the following command:
51+
## Running tests
52+
53+
To run tests use pytest:
4854

4955
```bash
50-
sphinx-build -b html docs/source docs/build
56+
python -m pytest
57+
```
58+
59+
To run the integration tests as well add the flag `--integration`:
60+
61+
```bash
62+
python -m pytest --integration
63+
```
64+
65+
## Formatting code
66+
67+
We use the `black` code formatter. To format the code run:
68+
69+
```bash
70+
black .
5171
```

requirements.docs.txt

Lines changed: 0 additions & 3 deletions
This file was deleted.

requirements.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
11
marqo==3.7.0
22
requests
3-
tldextract==5.1.2
3+
tldextract==5.1.2
4+
python-dotenv
5+
pytest
6+
sphinx
7+
sphinx-rtd-theme
8+
sphinx-autodoc-typehints
9+
black

src/marqo_instantapi/marqo_instantapi_adapter.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ def __init__(
3737
"filterStringMaxLength": 50,
3838
}
3939

40+
self.default_text_model = "hf/snowflake-arctic-embed-m-v1.5"
41+
self.default_image_model = "open_clip/ViT-B-16-SigLIP-512/webli"
42+
4043
def create_index(
4144
self,
4245
index_name: str,
@@ -63,9 +66,9 @@ def create_index(
6366

6467
if model is None:
6568
if multimodal:
66-
settings["model"] = "open_clip/ViT-B-32/laion2b_s34b_b79k"
69+
settings["model"] = self.default_image_model
6770
else:
68-
settings["model"] = "hf/e5-base-v2"
71+
settings["model"] = self.default_text_model
6972

7073
settings["treatUrlsAndPointersAsImages"] = multimodal
7174

@@ -81,12 +84,15 @@ def create_index(
8184
self.mq.index(index_name).search(q="")
8285
return response
8386

84-
def delete_index(self, index_name: str, confirm: bool = False) -> dict:
87+
def delete_index(
88+
self, index_name: str, confirm: bool = False, skip_if_not_exists: bool = False
89+
) -> dict:
8590
"""Delete a Marqo index.
8691
8792
Args:
8893
index_name (str): The name of the index to delete.
8994
confirm (bool, optional): Automatically confirms the deletion. Defaults to False.
95+
skip_if_not_exists (bool, optional): Skip deletion if the index does not exist. Defaults to False.
9096
9197
Returns:
9298
dict: The deletion response.
@@ -100,7 +106,14 @@ def delete_index(self, index_name: str, confirm: bool = False) -> dict:
100106
if choice == "n":
101107
return {"message": "Deletion cancelled."}
102108

103-
response = self.mq.delete_index(index_name)
109+
if not self._check_index_exists(index_name) and skip_if_not_exists:
110+
return {
111+
"acknowledged": True,
112+
"index": index_name,
113+
"message": "Index does not exist, skipping deletion.",
114+
}
115+
116+
response = self.mq.index(index_name).delete()
104117
return response
105118

106119
def _extract_page_data(

tests/test_integration.py

Lines changed: 0 additions & 6 deletions
This file was deleted.

tests/test_marqo_integration.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import pytest
2+
import marqo
3+
import hashlib
4+
from marqo_instantapi import InstantAPIMarqoAdapter
5+
6+
7+
@pytest.fixture
8+
def adapter():
9+
return InstantAPIMarqoAdapter()
10+
11+
12+
@pytest.fixture
13+
def mq():
14+
return marqo.Client()
15+
16+
17+
@pytest.mark.integration
18+
def test_create_index(adapter: InstantAPIMarqoAdapter):
19+
adapter.delete_index("example-index", confirm=True, skip_if_not_exists=True)
20+
response = adapter.create_index(
21+
"example-index", multimodal=True, skip_if_exists=True
22+
)
23+
# Expected: {"acknowledged":true, "index":"my-first-index"}
24+
25+
assert response["acknowledged"] is True
26+
assert response["index"] == "example-index"
27+
28+
29+
@pytest.mark.integration
30+
def test_error_if_exists(adapter: InstantAPIMarqoAdapter):
31+
adapter.create_index("example-index", multimodal=True, skip_if_exists=True)
32+
with pytest.raises(Exception):
33+
adapter.create_index("example-index", multimodal=True, skip_if_exists=False)
34+
35+
36+
@pytest.mark.integration
37+
def test_error_if_not_exists(adapter: InstantAPIMarqoAdapter):
38+
adapter.delete_index("example-index", confirm=True, skip_if_not_exists=True)
39+
with pytest.raises(Exception):
40+
response = adapter.delete_index(
41+
"example-index", confirm=True, skip_if_not_exists=False
42+
)
43+
print(response)
44+
45+
46+
@pytest.mark.integration
47+
def test_check_exists(adapter: InstantAPIMarqoAdapter):
48+
adapter.create_index("example-index", multimodal=True, skip_if_exists=True)
49+
assert adapter._check_index_exists("example-index") is True
50+
adapter.delete_index("example-index", confirm=True, skip_if_not_exists=True)
51+
assert adapter._check_index_exists("example-index") is False
52+
53+
54+
@pytest.mark.integration
55+
def test_check_modality(adapter: InstantAPIMarqoAdapter):
56+
adapter.delete_index("example-index", confirm=True, skip_if_not_exists=True)
57+
adapter.create_index("example-index", multimodal=True)
58+
assert adapter._check_index_can_use_images("example-index") is True
59+
adapter.delete_index("example-index", confirm=True)
60+
61+
adapter.create_index("example-index", multimodal=False)
62+
assert adapter._check_index_can_use_images("example-index") is False
63+
adapter.delete_index("example-index", confirm=True)
64+
65+
66+
@pytest.mark.integration
67+
def test_search_index(adapter: InstantAPIMarqoAdapter, mq: marqo.Client):
68+
adapter.delete_index("example-index", confirm=True, skip_if_not_exists=True)
69+
adapter.create_index("example-index", multimodal=False)
70+
71+
response = mq.index("example-index").add_documents(
72+
[
73+
{"title": "Hello, World!", "content": "This is a test document."},
74+
{"title": "Goodbye, World!", "content": "This is another test document."},
75+
],
76+
tensor_fields=["title"],
77+
)
78+
79+
assert not response["errors"]
80+
81+
search_results = adapter.search(
82+
q="hello",
83+
index_name="example-index",
84+
)
85+
86+
assert len(search_results["hits"]) == 2
87+
assert search_results["hits"][0]["title"] == "Hello, World!"
88+
assert search_results["hits"][1]["title"] == "Goodbye, World!"
89+
90+
adapter.delete_index("example-index", confirm=True)
91+
92+
93+
@pytest.mark.integration
94+
def text_add_documents(adapter: InstantAPIMarqoAdapter):
95+
def patched_extract_page_data(
96+
self, webpage_url, api_method_name, api_response_structure
97+
):
98+
return {"title": "Hello, World!", "content": "This is a test document."}
99+
100+
adapter._extract_page_data = patched_extract_page_data
101+
102+
adapter.delete_index("example-index", confirm=True, skip_if_not_exists=True)
103+
104+
adapter.create_index("example-index", multimodal=False)
105+
106+
schema = {
107+
"title": "the title of the page",
108+
"content": "text content summarising the page",
109+
}
110+
111+
response = adapter.add_documents(
112+
webpage_urls=["https://example.com"],
113+
index_name="example-index",
114+
api_response_structure=schema,
115+
api_method_name="getPageSummary",
116+
text_fields_to_index=["title", "content"],
117+
)
118+
119+
url_md5 = hashlib.md5("https://example.com".encode()).hexdigest()
120+
121+
response_ids = [doc["url_md5"] for doc in response]
122+
123+
assert len(response_ids) == 1
124+
assert response_ids[0] == url_md5

0 commit comments

Comments
 (0)