3
3
# SPDX-License-Identifier: Apache-2.0
4
4
5
5
import io
6
+ import warnings
6
7
from pathlib import Path
7
8
from typing import Any , Dict , List , Optional , Protocol , Union
8
9
22
23
class PyPDFConverter (Protocol ):
23
24
"""
24
25
A protocol that defines a converter which takes a PdfReader object and converts it into a Document object.
26
+
27
+ This is deprecated and will be removed in Haystack 2.9.0.
28
+ For in-depth customization of the conversion process, consider implementing a custom component.
25
29
"""
26
30
27
31
def convert (self , reader : "PdfReader" ) -> Document : # noqa: D102
@@ -40,8 +44,7 @@ class PyPDFToDocument:
40
44
"""
41
45
Converts PDF files to documents your pipeline can query.
42
46
43
- This component uses converters compatible with the PyPDF library.
44
- If no converter is provided, uses a default text extraction converter.
47
+ This component uses the PyPDF library.
45
48
You can attach metadata to the resulting documents.
46
49
47
50
### Usage example
@@ -62,10 +65,18 @@ def __init__(self, converter: Optional[PyPDFConverter] = None):
62
65
Create an PyPDFToDocument component.
63
66
64
67
:param converter:
65
- An instance of a PyPDFConverter compatible class.
68
+ An instance of a PyPDFConverter compatible class. This is deprecated and will be removed in Haystack 2.9.0.
69
+ For in-depth customization of the conversion process, consider implementing a custom component.
66
70
"""
67
71
pypdf_import .check ()
68
72
73
+ if converter is not None :
74
+ msg = (
75
+ "The `converter` parameter is deprecated and will be removed in Haystack 2.9.0. "
76
+ "For in-depth customization of the conversion process, consider implementing a custom component."
77
+ )
78
+ warnings .warn (msg , DeprecationWarning )
79
+
69
80
self .converter = converter
70
81
71
82
def to_dict (self ):
0 commit comments