From 2f5a4267f89ab2c343e49e8ede0116a6ac9bed8a Mon Sep 17 00:00:00 2001 From: Tanvi Ranade Date: Tue, 30 Jul 2024 15:34:29 -0700 Subject: [PATCH] more updates to user None --- lib/sycamore/sycamore/data/document.py | 10 +++----- .../sycamore/query/execution/operations.py | 6 +++-- .../tests/unit/query/test_operations.py | 25 ------------------- 3 files changed, 7 insertions(+), 34 deletions(-) diff --git a/lib/sycamore/sycamore/data/document.py b/lib/sycamore/sycamore/data/document.py index 5efe773b5..a2a4fd6bc 100644 --- a/lib/sycamore/sycamore/data/document.py +++ b/lib/sycamore/sycamore/data/document.py @@ -224,15 +224,11 @@ def field_to_value(self, field: str) -> Any: Returns None if field does not exist in document. """ fields = field.split(".") - if hasattr(self, fields[0]): - value = getattr(self, fields[0]) - else: - return None + value = getattr(self, fields[0], None) if len(fields) > 1: - assert fields[0] == "properties" for f in fields[1:]: - if f in value: - value = value[f] + if isinstance(value, dict): + value = value.get(f, None) else: return None return value diff --git a/lib/sycamore/sycamore/query/execution/operations.py b/lib/sycamore/sycamore/query/execution/operations.py index ea709ee8f..cee81d393 100644 --- a/lib/sycamore/sycamore/query/execution/operations.py +++ b/lib/sycamore/sycamore/query/execution/operations.py @@ -363,7 +363,7 @@ def make_filter_fn_join(field: str, join_set: set) -> Callable[[Document], bool] """ def filter_fn_join(doc: Document) -> bool: - value = str(doc.field_to_value(field)) + value = doc.field_to_value(field) return value in join_set return filter_fn_join @@ -391,7 +391,7 @@ def join_operation(docset1: DocSet, docset2: DocSet, field1: str, field2: str) - doc = Document.from_row(row) if isinstance(doc, MetadataDocument): continue - value = str(doc.field_to_value(field1)) + value = doc.field_to_value(field1) unique_vals.add(value) # filters docset2 based on matches of field2 with unique values @@ -548,6 +548,8 @@ def ray_callable(input_dict: dict[str, Any]) -> dict[str, Any]: if unique_field is not None: val = str(doc.field_to_value(unique_field)) + if val is None: + return {"doc": None, "key": None, "unique": None} # updates row to include new col new_doc["unique"] = val diff --git a/lib/sycamore/sycamore/tests/unit/query/test_operations.py b/lib/sycamore/sycamore/tests/unit/query/test_operations.py index ca8da31b2..6ed701cb6 100644 --- a/lib/sycamore/sycamore/tests/unit/query/test_operations.py +++ b/lib/sycamore/sycamore/tests/unit/query/test_operations.py @@ -348,31 +348,6 @@ def test_semantic_cluster(self, number_docset): elif doc.text_representation == "3" or doc.text_representation == "three": assert doc.properties["_autogen_ClusterAssignment"] == "group3" - # Helpers - # def test_field_to_value(self): - # doc = Document( - # text_representation="hello", - # doc_id=1, - # properties={"letter": "A", "animal": "panda", "math": {"pi": 3.14, "e": 2.72, "tanx": "sinx/cosx"}}, - # ) - - # assert field_to_value(doc, "text_representation") == "hello" - # assert field_to_value(doc, "doc_id") == 1 - # assert field_to_value(doc, "properties.letter") == "A" - # assert field_to_value(doc, "properties.animal") == "panda" - # assert field_to_value(doc, "properties.math.pi") == 3.14 - # assert field_to_value(doc, "properties.math.e") == 2.72 - # assert field_to_value(doc, "properties.math.tanx") == "sinx/cosx" - - # with pytest.raises(KeyError): - # field_to_value(doc, "properties.math.log") - - # with pytest.raises(Exception): - # field_to_value(doc, "document_id") - - # with pytest.raises(AssertionError): - # field_to_value(doc, "text_representation.text") - def test_convert_string_to_date(self): date_string = "2024-07-21" expected_date = datetime(2024, 7, 21)