Skip to content

Commit ced4d90

Browse files
authored
Allow deleting vectors with prefix or filter (#43)
* Allow deleting vectors with prefix or filter We have started supporting deleting vectors with id prefix or metadata filter. * fix type hints
1 parent 0cb004b commit ced4d90

File tree

3 files changed

+179
-12
lines changed

3 files changed

+179
-12
lines changed

README.md

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,9 +404,8 @@ res = index.delete(
404404
ids=["id1", "id2"],
405405
)
406406

407-
print(
408-
res.deleted, # How many vectors are deleted out of the given ids.
409-
)
407+
# How many vectors are deleted out of the given ids.
408+
print(res.deleted)
410409
```
411410

412411
or, for singular deletion:
@@ -416,7 +415,23 @@ res = index.delete(
416415
"id1",
417416
)
418417

419-
print(res) # A boolean indicating whether the vector is deleted or not.
418+
# 1 if the vector is deleted, 0 otherwise.
419+
print(res.deleted)
420+
```
421+
422+
Apart from the vector ids, vectors can also be deleted with an id prefix
423+
or metadata filter.
424+
425+
```python
426+
# Delete all the vectors whose id starts with `id-0`
427+
index.delete(
428+
prefix="id-0",
429+
)
430+
431+
# Delete all the vectors whose metadata matches with the filter
432+
index.delete(
433+
filter="salary < 3000",
434+
)
420435
```
421436

422437
Also, a namespace can be specified to delete from.

tests/core/test_delete.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,3 +137,125 @@ async def test_delete_async(async_index: AsyncIndex, ns: str):
137137
assert res[0] is None
138138
assert res[1] is None
139139
assert res[2] is not None
140+
141+
142+
@pytest.mark.parametrize("ns", NAMESPACES)
143+
def test_delete_prefix(index: Index, ns: str):
144+
index.upsert(
145+
vectors=[
146+
("id-00", [0.1, 0.2]),
147+
("id-01", [0.1, 0.3]),
148+
("id-10", [0.1, 0.4]),
149+
("id-11", [0.1, 0.5]),
150+
],
151+
namespace=ns,
152+
)
153+
154+
result = index.delete(
155+
prefix="id-0",
156+
namespace=ns,
157+
)
158+
159+
assert result.deleted == 2
160+
161+
vectors = index.fetch(
162+
ids=["id-00", "id-01", "id-10", "id-11"],
163+
namespace=ns,
164+
)
165+
assert len(vectors) == 4
166+
assert vectors[0] is None
167+
assert vectors[1] is None
168+
assert vectors[2] is not None
169+
assert vectors[3] is not None
170+
171+
172+
@pytest.mark.parametrize("ns", NAMESPACES)
173+
def test_delete_filter(index: Index, ns: str):
174+
index.upsert(
175+
vectors=[
176+
("id-00", [0.1, 0.2], {"meta": 0}),
177+
("id-01", [0.1, 0.3], {"meta": 1}),
178+
("id-10", [0.1, 0.4], {"meta": 2}),
179+
("id-11", [0.1, 0.5], {"meta": 3}),
180+
],
181+
namespace=ns,
182+
)
183+
184+
result = index.delete(
185+
filter="meta >= 2",
186+
namespace=ns,
187+
)
188+
189+
assert result.deleted == 2
190+
191+
vectors = index.fetch(
192+
ids=["id-00", "id-01", "id-10", "id-11"],
193+
namespace=ns,
194+
)
195+
assert len(vectors) == 4
196+
assert vectors[0] is not None
197+
assert vectors[1] is not None
198+
assert vectors[2] is None
199+
assert vectors[3] is None
200+
201+
202+
@pytest.mark.asyncio
203+
@pytest.mark.parametrize("ns", NAMESPACES)
204+
async def test_delete_prefix_async(async_index: AsyncIndex, ns: str):
205+
await async_index.upsert(
206+
vectors=[
207+
("id-00", [0.1, 0.2]),
208+
("id-01", [0.1, 0.3]),
209+
("id-10", [0.1, 0.4]),
210+
("id-11", [0.1, 0.5]),
211+
],
212+
namespace=ns,
213+
)
214+
215+
result = await async_index.delete(
216+
prefix="id-0",
217+
namespace=ns,
218+
)
219+
220+
assert result.deleted == 2
221+
222+
vectors = await async_index.fetch(
223+
ids=["id-00", "id-01", "id-10", "id-11"],
224+
namespace=ns,
225+
)
226+
assert len(vectors) == 4
227+
assert vectors[0] is None
228+
assert vectors[1] is None
229+
assert vectors[2] is not None
230+
assert vectors[3] is not None
231+
232+
233+
@pytest.mark.asyncio
234+
@pytest.mark.parametrize("ns", NAMESPACES)
235+
async def test_delete_filter_async(async_index: AsyncIndex, ns: str):
236+
await async_index.upsert(
237+
vectors=[
238+
("id-00", [0.1, 0.2], {"meta": 0}),
239+
("id-01", [0.1, 0.3], {"meta": 1}),
240+
("id-10", [0.1, 0.4], {"meta": 2}),
241+
("id-11", [0.1, 0.5], {"meta": 3}),
242+
],
243+
namespace=ns,
244+
)
245+
246+
result = await async_index.delete(
247+
filter="meta >= 2",
248+
namespace=ns,
249+
)
250+
251+
assert result.deleted == 2
252+
253+
vectors = await async_index.fetch(
254+
ids=["id-00", "id-01", "id-10", "id-11"],
255+
namespace=ns,
256+
)
257+
assert len(vectors) == 4
258+
assert vectors[0] is not None
259+
assert vectors[1] is not None
260+
assert vectors[2] is None
261+
assert vectors[3] is None

upstash_vector/core/index_operations.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -415,8 +415,10 @@ def resumable_query(
415415

416416
def delete(
417417
self,
418-
ids: Union[str, List[str]],
418+
ids: Optional[Union[str, List[str]]] = None,
419419
namespace: str = DEFAULT_NAMESPACE,
420+
prefix: Optional[str] = None,
421+
filter: Optional[str] = None,
420422
) -> DeleteResult:
421423
"""
422424
Deletes the given vector(s) with given ids.
@@ -425,6 +427,8 @@ def delete(
425427
426428
:param ids: Singular or list of ids of vector(s) to be deleted.
427429
:param namespace: The namespace to use. When not specified, the default namespace is used.
430+
:param prefix: Prefix of vector ids to delete.
431+
:param filter: Metadata filter for the vectors to delete.
428432
429433
Example usage:
430434
@@ -436,11 +440,23 @@ def delete(
436440
index.delete("0")
437441
```
438442
"""
439-
if not isinstance(ids, list):
440-
ids = [ids]
443+
payload: Dict[str, Any] = {}
444+
if ids is not None:
445+
if not isinstance(ids, list):
446+
ids = [ids]
447+
448+
payload["ids"] = ids
449+
450+
if prefix is not None:
451+
payload["prefix"] = prefix
452+
453+
if filter is not None:
454+
payload["filter"] = filter
441455

442456
return DeleteResult._from_json(
443-
self._execute_request(payload=ids, path=_path_for(namespace, DELETE_PATH))
457+
self._execute_request(
458+
payload=payload, path=_path_for(namespace, DELETE_PATH)
459+
)
444460
)
445461

446462
def reset(self, namespace: str = DEFAULT_NAMESPACE, all: bool = False) -> str:
@@ -1000,8 +1016,10 @@ async def resumable_query(
10001016

10011017
async def delete(
10021018
self,
1003-
ids: Union[str, List[str]],
1019+
ids: Optional[Union[str, List[str]]] = None,
10041020
namespace: str = DEFAULT_NAMESPACE,
1021+
prefix: Optional[str] = None,
1022+
filter: Optional[str] = None,
10051023
) -> DeleteResult:
10061024
"""
10071025
Deletes the given vector(s) with given ids asynchronously.
@@ -1010,6 +1028,8 @@ async def delete(
10101028
10111029
:param ids: Singular or list of ids of vector(s) to be deleted.
10121030
:param namespace: The namespace to use. When not specified, the default namespace is used.
1031+
:param prefix: Prefix of vector ids to delete.
1032+
:param filter: Metadata filter for the vectors to delete.
10131033
10141034
Example usage:
10151035
@@ -1021,12 +1041,22 @@ async def delete(
10211041
await index.delete("0")
10221042
```
10231043
"""
1024-
if not isinstance(ids, list):
1025-
ids = [ids]
1044+
payload: Dict[str, Any] = {}
1045+
if ids is not None:
1046+
if not isinstance(ids, list):
1047+
ids = [ids]
1048+
1049+
payload["ids"] = ids
1050+
1051+
if prefix is not None:
1052+
payload["prefix"] = prefix
1053+
1054+
if filter is not None:
1055+
payload["filter"] = filter
10261056

10271057
return DeleteResult._from_json(
10281058
await self._execute_request_async(
1029-
payload=ids, path=_path_for(namespace, DELETE_PATH)
1059+
payload=payload, path=_path_for(namespace, DELETE_PATH)
10301060
)
10311061
)
10321062

0 commit comments

Comments
 (0)