Skip to content

Commit d234c75

Browse files
fix: make pypdf converter more robust (#8427)
* fix: make `from_dict` of `PyPDFToDocument` more robust * chore: drop trailing space * converting method to static and making the comment shorter * reverting method to static --------- Co-authored-by: David S. Batista <[email protected]>
1 parent 6512442 commit d234c75

File tree

3 files changed

+15
-2
lines changed

3 files changed

+15
-2
lines changed

Diff for: haystack/components/converters/pypdf.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,10 @@ def from_dict(cls, data):
108108
:returns:
109109
Deserialized component.
110110
"""
111-
converter_class = deserialize_type(data["init_parameters"]["converter"]["type"])
112-
data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"])
111+
# the converter default is `None`, check if it was defined before deserializing
112+
if "converter" in data["init_parameters"]:
113+
converter_class = deserialize_type(data["init_parameters"]["converter"]["type"])
114+
data["init_parameters"]["converter"] = converter_class.from_dict(data["init_parameters"]["converter"])
113115
return default_from_dict(cls, data)
114116

115117
@component.output_types(documents=List[Document])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
fixes:
3+
- |
4+
Make the `from_dict` method of the `PyPDFToDocument` more robust to cases when the converter is
5+
not provided in the dictionary.

Diff for: test/components/converters/test_pypdf_to_document.py

+6
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ def test_from_dict(self):
4040
assert isinstance(instance, PyPDFToDocument)
4141
assert isinstance(instance.converter, DefaultConverter)
4242

43+
def test_from_dict_no_converter(self):
44+
data = {"type": "haystack.components.converters.pypdf.PyPDFToDocument", "init_parameters": {}}
45+
instance = PyPDFToDocument.from_dict(data)
46+
assert isinstance(instance, PyPDFToDocument)
47+
assert isinstance(instance.converter, DefaultConverter)
48+
4349
@pytest.mark.integration
4450
def test_run(self, test_files_path, pypdf_converter):
4551
"""

0 commit comments

Comments
 (0)