diff --git a/lib/sycamore/sycamore/llms/llms.py b/lib/sycamore/sycamore/llms/llms.py index 2146b07db..a3e1c4743 100644 --- a/lib/sycamore/sycamore/llms/llms.py +++ b/lib/sycamore/sycamore/llms/llms.py @@ -104,7 +104,7 @@ def get_metadata(self, kwargs, response_text, wall_latency, in_tokens, out_token "total_tokens": in_tokens + out_tokens, }, "wall_latency": wall_latency, - "prompt": kwargs.get("prompt"), + "prompt": kwargs.get("prompt") or kwargs.get("messages"), "output": response_text, } diff --git a/lib/sycamore/sycamore/tests/integration/transforms/test_data_extraction.py b/lib/sycamore/sycamore/tests/integration/transforms/test_data_extraction.py index cb6e226a6..e3f415417 100644 --- a/lib/sycamore/sycamore/tests/integration/transforms/test_data_extraction.py +++ b/lib/sycamore/sycamore/tests/integration/transforms/test_data_extraction.py @@ -43,12 +43,16 @@ def test_extract_properties_from_dict_schema(llm): docs = ctx.read.document(docs) docs = docs.extract_properties(property_extractor) - taken = docs.take_all() + taken = docs.take_all(include_metadata=True) assert taken[0].properties["entity"]["name"] == "Vinayak" assert taken[0].properties["entity"]["age"] == 74 assert "Honolulu" in taken[0].properties["entity"]["from_location"] + assert len(taken) == 3 + assert taken[2].metadata["usage"]["prompt_tokens"] > 0 + assert taken[2].metadata["usage"]["completion_tokens"] > 0 + @pytest.mark.parametrize("llm", llms) def test_extract_properties_from_schema(llm): @@ -61,6 +65,7 @@ def test_extract_properties_from_schema(llm): field_type="str", description="This is the name of an entity", examples=["Mark", "Ollie", "Winston"], + default="null", ), SchemaField(name="age", field_type="int", default=999), SchemaField(name="date", field_type="str", description="Any date in the doc in YYYY-MM-DD format"), @@ -80,14 +85,20 @@ def test_extract_properties_from_schema(llm): docs = ctx.read.document(docs) docs = docs.extract_properties(property_extractor) - taken = docs.take_all() + taken = docs.take_all(include_metadata=True) assert taken[0].properties["entity"]["name"] == "Vinayak" assert taken[0].properties["entity"]["age"] == 74 assert taken[0].properties["entity"]["from_location"] == "Honolulu, HI", "Invalid location extracted or formatted" assert taken[0].properties["entity"]["date"] == "1923-02-24" - assert taken[1].properties["entity"]["name"] is None, "Default None value not being used correctly" + assert taken[1].properties["entity"]["name"] == "None" # Anthropic isn't generating valid JSON with null values. assert taken[1].properties["entity"]["age"] == 999, "Default value not being used correctly" assert taken[1].properties["entity"]["from_location"] == "New Delhi" assert taken[1].properties["entity"]["date"] == "2014-01-11" + + assert len(taken) == 5 + assert taken[3].metadata["usage"]["prompt_tokens"] > 0 + assert taken[3].metadata["usage"]["completion_tokens"] > 0 + assert taken[4].metadata["usage"]["prompt_tokens"] > 0 + assert taken[4].metadata["usage"]["completion_tokens"] > 0