Skip to content

Commit c13576a

Browse files
chore: remove batch_size
1 parent 06f1829 commit c13576a

3 files changed

Lines changed: 9 additions & 21 deletions

File tree

fastembed/late_interaction_multimodal/colpali.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,6 @@ def embed_image(
302302
def get_image_mask(
303303
self,
304304
images: ImageInput | Iterable[ImageInput],
305-
batch_size: int = 16,
306305
**kwargs: Any,
307306
) -> list[NumpyArray]:
308307
"""
@@ -314,8 +313,7 @@ def get_image_mask(
314313
315314
Args:
316315
images: Single image or iterable of images
317-
batch_size: Batch size for processing
318-
**kwargs: Additional processing arguments
316+
**kwargs: Additional processing arguments (reserved for future use)
319317
320318
Returns:
321319
List of binary masks (dtype=bool) where True = image token (ID 257152), False = other tokens.
@@ -326,17 +324,11 @@ def get_image_mask(
326324
is_single = isinstance(images, (str, bytes, Path)) or hasattr(images, "read")
327325
images_to_process: Iterable[ImageInput] = [images] if is_single else images # type: ignore[assignment, list-item]
328326

329-
# Process images in batches to get input_ids
327+
# Generate masks - all images get the same mask based on fixed tokenization pattern
330328
masks: list[NumpyArray] = []
331-
images_list = list(images_to_process)
332-
for batch_start in range(0, len(images_list), batch_size):
333-
batch = images_list[batch_start : batch_start + batch_size]
334-
335-
# For ColPali images, input_ids follow EMPTY_TEXT_PLACEHOLDER pattern
336-
# Generate mask: True for image tokens (ID 257152), False for others
337-
for _ in batch:
338-
mask: NumpyArray = self.EMPTY_TEXT_PLACEHOLDER == self.IMAGE_TOKEN_ID
339-
masks.append(mask)
329+
for _ in images_to_process:
330+
mask: NumpyArray = self.EMPTY_TEXT_PLACEHOLDER == self.IMAGE_TOKEN_ID
331+
masks.append(mask)
340332

341333
return masks
342334

fastembed/late_interaction_multimodal/late_interaction_multimodal_embedding.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,6 @@ def token_count(
187187
def get_image_mask(
188188
self,
189189
images: ImageInput | Iterable[ImageInput],
190-
batch_size: int = 16,
191190
**kwargs: Any,
192191
) -> list[NumpyArray]:
193192
"""
@@ -198,8 +197,7 @@ def get_image_mask(
198197
199198
Args:
200199
images: Single image or iterable of images (file paths, bytes, or PIL Image objects)
201-
batch_size: Number of images to process in each batch. Defaults to 16.
202-
**kwargs: Additional keyword arguments for image processing.
200+
**kwargs: Additional keyword arguments (reserved for future use)
203201
204202
Returns:
205203
List of binary masks (numpy arrays with dtype=bool), one per image. Each mask has shape (sequence_length,)
@@ -217,4 +215,4 @@ def get_image_mask(
217215
# First 1024 values are True (image tokens), last 6 are False (text tokens)
218216
```
219217
"""
220-
return self.model.get_image_mask(images, batch_size, **kwargs)
218+
return self.model.get_image_mask(images, **kwargs)

fastembed/late_interaction_multimodal/late_interaction_multimodal_embedding_base.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ def token_count(
8888
def get_image_mask(
8989
self,
9090
images: ImageInput | Iterable[ImageInput],
91-
batch_size: int = 16,
9291
**kwargs: Any,
9392
) -> list[NumpyArray]:
9493
"""
@@ -99,8 +98,7 @@ def get_image_mask(
9998
10099
Args:
101100
images: Single image or iterable of images (file paths, bytes, or PIL Image objects)
102-
batch_size: Number of images to process in each batch. Defaults to 16.
103-
**kwargs: Additional keyword arguments for image processing.
101+
**kwargs: Additional keyword arguments (reserved for future use)
104102
105103
Returns:
106104
List of binary masks (numpy arrays with dtype=bool), one per image. Each mask has shape (sequence_length,)
@@ -112,7 +110,7 @@ def get_image_mask(
112110
113111
Example:
114112
```python
115-
model = ColPali.load("Qdrant/colpali-v1.3-fp16")
113+
model = ColPali(model_name="Qdrant/colpali-v1.3-fp16")
116114
masks = model.get_image_mask(["image1.jpg", "image2.jpg"])
117115
# masks[0] is a numpy array of shape (1030,) with dtype=bool for ColPali
118116
# First 1024 values are True (image tokens), last 6 are False (text tokens)

0 commit comments

Comments
 (0)