Skip to content

Commit 2440a5e

Browse files
authored
chore:PyPDFToDocument - deprecate converter init parameter (#8569)
* deprecat converter in pypdf * fix linting of MetaFieldGroupingRanker * linting
1 parent 3d95e06 commit 2440a5e

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

Diff for: haystack/components/converters/pypdf.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
import io
6+
import warnings
67
from pathlib import Path
78
from typing import Any, Dict, List, Optional, Protocol, Union
89

@@ -22,6 +23,9 @@
2223
class PyPDFConverter(Protocol):
2324
"""
2425
A protocol that defines a converter which takes a PdfReader object and converts it into a Document object.
26+
27+
This is deprecated and will be removed in Haystack 2.9.0.
28+
For in-depth customization of the conversion process, consider implementing a custom component.
2529
"""
2630

2731
def convert(self, reader: "PdfReader") -> Document: # noqa: D102
@@ -40,8 +44,7 @@ class PyPDFToDocument:
4044
"""
4145
Converts PDF files to documents your pipeline can query.
4246
43-
This component uses converters compatible with the PyPDF library.
44-
If no converter is provided, uses a default text extraction converter.
47+
This component uses the PyPDF library.
4548
You can attach metadata to the resulting documents.
4649
4750
### Usage example
@@ -62,10 +65,18 @@ def __init__(self, converter: Optional[PyPDFConverter] = None):
6265
Create an PyPDFToDocument component.
6366
6467
:param converter:
65-
An instance of a PyPDFConverter compatible class.
68+
An instance of a PyPDFConverter compatible class. This is deprecated and will be removed in Haystack 2.9.0.
69+
For in-depth customization of the conversion process, consider implementing a custom component.
6670
"""
6771
pypdf_import.check()
6872

73+
if converter is not None:
74+
msg = (
75+
"The `converter` parameter is deprecated and will be removed in Haystack 2.9.0. "
76+
"For in-depth customization of the conversion process, consider implementing a custom component."
77+
)
78+
warnings.warn(msg, DeprecationWarning)
79+
6980
self.converter = converter
7081

7182
def to_dict(self):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
deprecations:
3+
- |
4+
The `converter` parameter in the `PyPDFToDocument` component is deprecated and will be removed in Haystack 2.9.0.
5+
For in-depth customization of the conversion process, consider implementing a custom component.
6+
Additional high-level customization options will be added in the future.

0 commit comments

Comments
 (0)