chore: remove batch_size

kacperlukawski · kacperlukawski · commit c13576a51a76 · 2025-12-15T13:06:24.000+01:00
diff --git a/fastembed/late_interaction_multimodal/colpali.py b/fastembed/late_interaction_multimodal/colpali.py
@@ -302,7 +302,6 @@ def embed_image(
     def get_image_mask(
         self,
         images: ImageInput | Iterable[ImageInput],
-        batch_size: int = 16,
         **kwargs: Any,
     ) -> list[NumpyArray]:
         """
@@ -314,8 +313,7 @@ def get_image_mask(
 
         Args:
             images: Single image or iterable of images
-            batch_size: Batch size for processing
-            **kwargs: Additional processing arguments
+            **kwargs: Additional processing arguments (reserved for future use)
 
         Returns:
             List of binary masks (dtype=bool) where True = image token (ID 257152), False = other tokens.
@@ -326,17 +324,11 @@ def get_image_mask(
         is_single = isinstance(images, (str, bytes, Path)) or hasattr(images, "read")
         images_to_process: Iterable[ImageInput] = [images] if is_single else images  # type: ignore[assignment, list-item]
 
-        # Process images in batches to get input_ids
+        # Generate masks - all images get the same mask based on fixed tokenization pattern
         masks: list[NumpyArray] = []
-        images_list = list(images_to_process)
-        for batch_start in range(0, len(images_list), batch_size):
-            batch = images_list[batch_start : batch_start + batch_size]
-
-            # For ColPali images, input_ids follow EMPTY_TEXT_PLACEHOLDER pattern
-            # Generate mask: True for image tokens (ID 257152), False for others
-            for _ in batch:
-                mask: NumpyArray = self.EMPTY_TEXT_PLACEHOLDER == self.IMAGE_TOKEN_ID
-                masks.append(mask)
+        for _ in images_to_process:
+            mask: NumpyArray = self.EMPTY_TEXT_PLACEHOLDER == self.IMAGE_TOKEN_ID
+            masks.append(mask)
 
         return masks
 
diff --git a/fastembed/late_interaction_multimodal/late_interaction_multimodal_embedding.py b/fastembed/late_interaction_multimodal/late_interaction_multimodal_embedding.py
@@ -187,7 +187,6 @@ def token_count(
     def get_image_mask(
         self,
         images: ImageInput | Iterable[ImageInput],
-        batch_size: int = 16,
         **kwargs: Any,
     ) -> list[NumpyArray]:
         """
@@ -198,8 +197,7 @@ def get_image_mask(
 
         Args:
             images: Single image or iterable of images (file paths, bytes, or PIL Image objects)
-            batch_size: Number of images to process in each batch. Defaults to 16.
-            **kwargs: Additional keyword arguments for image processing.
+            **kwargs: Additional keyword arguments (reserved for future use)
 
         Returns:
             List of binary masks (numpy arrays with dtype=bool), one per image. Each mask has shape (sequence_length,)
@@ -217,4 +215,4 @@ def get_image_mask(
             # First 1024 values are True (image tokens), last 6 are False (text tokens)
             ```
         """
-        return self.model.get_image_mask(images, batch_size, **kwargs)
+        return self.model.get_image_mask(images, **kwargs)
diff --git a/fastembed/late_interaction_multimodal/late_interaction_multimodal_embedding_base.py b/fastembed/late_interaction_multimodal/late_interaction_multimodal_embedding_base.py
@@ -88,7 +88,6 @@ def token_count(
     def get_image_mask(
         self,
         images: ImageInput | Iterable[ImageInput],
-        batch_size: int = 16,
         **kwargs: Any,
     ) -> list[NumpyArray]:
         """
@@ -99,8 +98,7 @@ def get_image_mask(
 
         Args:
             images: Single image or iterable of images (file paths, bytes, or PIL Image objects)
-            batch_size: Number of images to process in each batch. Defaults to 16.
-            **kwargs: Additional keyword arguments for image processing.
+            **kwargs: Additional keyword arguments (reserved for future use)
 
         Returns:
             List of binary masks (numpy arrays with dtype=bool), one per image. Each mask has shape (sequence_length,)
@@ -112,7 +110,7 @@ def get_image_mask(
 
         Example:
             ```python
-            model = ColPali.load("Qdrant/colpali-v1.3-fp16")
+            model = ColPali(model_name="Qdrant/colpali-v1.3-fp16")
             masks = model.get_image_mask(["image1.jpg", "image2.jpg"])
             # masks[0] is a numpy array of shape (1030,) with dtype=bool for ColPali
             # First 1024 values are True (image tokens), last 6 are False (text tokens)