Skip to content

Commit e7bfd80

Browse files
authored
fix: (Temporarily) Re-add suport for pre-2.6.0 YAMLs with PyPDFConverter (#8443)
1 parent 3b9a60b commit e7bfd80

File tree

3 files changed

+21
-2
lines changed

3 files changed

+21
-2
lines changed

haystack/components/converters/pypdf.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from haystack.dataclasses import ByteStream
1313
from haystack.lazy_imports import LazyImport
1414
from haystack.utils.base_serialization import deserialize_class_instance, serialize_class_instance
15+
from haystack.utils.type_serialization import deserialize_type
1516

1617
with LazyImport("Run 'pip install pypdf'") as pypdf_import:
1718
from pypdf import PdfReader
@@ -118,7 +119,12 @@ def from_dict(cls, data):
118119
"""
119120
custom_converter_data = data["init_parameters"]["converter"]
120121
if custom_converter_data is not None:
121-
data["init_parameters"]["converter"] = deserialize_class_instance(custom_converter_data)
122+
if "data" in custom_converter_data:
123+
data["init_parameters"]["converter"] = deserialize_class_instance(custom_converter_data)
124+
else:
125+
# TODO: Remove in 2.7.0
126+
converter_class = deserialize_type(custom_converter_data["type"])
127+
data["init_parameters"]["converter"] = converter_class.from_dict(custom_converter_data)
122128
return default_from_dict(cls, data)
123129

124130
def _default_convert(self, reader: "PdfReader") -> Document:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
fixes:
3+
- |
4+
Revert change to PyPDFConverter that broke the deserialization of pre 2.6.0 YAMLs.

test/components/converters/test_pypdf_to_document.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88

99
from haystack import Document, default_from_dict, default_to_dict
10-
from haystack.components.converters.pypdf import PyPDFToDocument
10+
from haystack.components.converters.pypdf import PyPDFToDocument, DefaultConverter
1111
from haystack.dataclasses import ByteStream
1212

1313

@@ -79,6 +79,15 @@ def test_from_dict_custom_converter(self):
7979
assert isinstance(instance, PyPDFToDocument)
8080
assert isinstance(instance.converter, CustomConverter)
8181

82+
def test_from_dict_pre_2_6_0(self):
83+
data = {
84+
"type": "haystack.components.converters.pypdf.PyPDFToDocument",
85+
"init_parameters": {"converter": {"type": "haystack.components.converters.pypdf.DefaultConverter"}},
86+
}
87+
instance = PyPDFToDocument.from_dict(data)
88+
assert isinstance(instance, PyPDFToDocument)
89+
assert isinstance(instance.converter, DefaultConverter)
90+
8291
@pytest.mark.integration
8392
def test_run(self, test_files_path, pypdf_converter):
8493
"""

0 commit comments

Comments
 (0)