Skip to content

Commit 21dd03d

Browse files
vblagojeanakin87
andauthored
feat: Add completion start time timestamp to relevant generators (#8728)
* OpenAIChatGenerator - add completion_start_time * HuggingFaceAPIChatGenerator - add completion_start_time * Add tests * Add reno note * Relax condition for cached responses * Add completion_start_time timestamping to non-chat generators * Update haystack/components/generators/chat/hugging_face_api.py Co-authored-by: Stefano Fiorucci <[email protected]> * PR feedback --------- Co-authored-by: Stefano Fiorucci <[email protected]>
1 parent 62ac27c commit 21dd03d

File tree

9 files changed

+71
-7
lines changed

9 files changed

+71
-7
lines changed

haystack/components/generators/chat/hugging_face_api.py

+6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
from datetime import datetime
56
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
67

78
from haystack import component, default_from_dict, default_to_dict, logging
@@ -259,6 +260,7 @@ def _run_streaming(self, messages: List[Dict[str, str]], generation_kwargs: Dict
259260
)
260261

261262
generated_text = ""
263+
first_chunk_time = None
262264

263265
for chunk in api_output:
264266
# n is unused, so the API always returns only one choice
@@ -276,6 +278,9 @@ def _run_streaming(self, messages: List[Dict[str, str]], generation_kwargs: Dict
276278
if finish_reason:
277279
meta["finish_reason"] = finish_reason
278280

281+
if first_chunk_time is None:
282+
first_chunk_time = datetime.now().isoformat()
283+
279284
stream_chunk = StreamingChunk(text, meta)
280285
self.streaming_callback(stream_chunk) # type: ignore # streaming_callback is not None (verified in the run method)
281286

@@ -285,6 +290,7 @@ def _run_streaming(self, messages: List[Dict[str, str]], generation_kwargs: Dict
285290
"finish_reason": finish_reason,
286291
"index": 0,
287292
"usage": {"prompt_tokens": 0, "completion_tokens": 0}, # not available in streaming
293+
"completion_start_time": first_chunk_time,
288294
}
289295
)
290296

haystack/components/generators/chat/openai.py

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import json
66
import os
7+
from datetime import datetime
78
from typing import Any, Callable, Dict, List, Optional, Union
89

910
from openai import OpenAI, Stream
@@ -381,6 +382,7 @@ def _convert_streaming_chunks_to_chat_message(self, chunk: Any, chunks: List[Str
381382
"model": chunk.model,
382383
"index": 0,
383384
"finish_reason": chunk.choices[0].finish_reason,
385+
"completion_start_time": chunks[0].meta.get("received_at"), # first chunk received
384386
"usage": {}, # we don't have usage data for streaming responses
385387
}
386388

@@ -444,6 +446,7 @@ def _convert_chat_completion_chunk_to_streaming_chunk(self, chunk: ChatCompletio
444446
"index": choice.index,
445447
"tool_calls": choice.delta.tool_calls,
446448
"finish_reason": choice.finish_reason,
449+
"received_at": datetime.now().isoformat(),
447450
}
448451
)
449452
return chunk_message

haystack/components/generators/hugging_face_api.py

+9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
from dataclasses import asdict
6+
from datetime import datetime
67
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
78

89
from haystack import component, default_from_dict, default_to_dict, logging
@@ -217,18 +218,26 @@ def _stream_and_build_response(
217218
self, hf_output: Iterable["TextGenerationStreamOutput"], streaming_callback: Callable[[StreamingChunk], None]
218219
):
219220
chunks: List[StreamingChunk] = []
221+
first_chunk_time = None
222+
220223
for chunk in hf_output:
221224
token: TextGenerationOutputToken = chunk.token
222225
if token.special:
223226
continue
227+
224228
chunk_metadata = {**asdict(token), **(asdict(chunk.details) if chunk.details else {})}
229+
if first_chunk_time is None:
230+
first_chunk_time = datetime.now().isoformat()
231+
225232
stream_chunk = StreamingChunk(token.text, chunk_metadata)
226233
chunks.append(stream_chunk)
227234
streaming_callback(stream_chunk)
235+
228236
metadata = {
229237
"finish_reason": chunks[-1].meta.get("finish_reason", None),
230238
"model": self._client.model,
231239
"usage": {"completion_tokens": chunks[-1].meta.get("generated_tokens", 0)},
240+
"completion_start_time": first_chunk_time,
232241
}
233242
return {"replies": ["".join([chunk.content for chunk in chunks])], "meta": [metadata]}
234243

haystack/components/generators/openai.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
import os
6+
from datetime import datetime
67
from typing import Any, Callable, Dict, List, Optional, Union
78

89
from openai import OpenAI, Stream
@@ -255,7 +256,7 @@ def _create_message_from_chunks(
255256
"model": completion_chunk.model,
256257
"index": 0,
257258
"finish_reason": finish_reason,
258-
# Usage is available when streaming only if the user explicitly requests it
259+
"completion_start_time": streamed_chunks[0].meta.get("received_at"), # first chunk received
259260
"usage": dict(completion_chunk.usage or {}),
260261
}
261262
)
@@ -296,12 +297,17 @@ def _build_chunk(chunk: Any) -> StreamingChunk:
296297
:returns:
297298
The StreamingChunk.
298299
"""
299-
# function or tools calls are not going to happen in non-chat generation
300-
# as users can not send ChatMessage with function or tools calls
301300
choice = chunk.choices[0]
302301
content = choice.delta.content or ""
303302
chunk_message = StreamingChunk(content)
304-
chunk_message.meta.update({"model": chunk.model, "index": choice.index, "finish_reason": choice.finish_reason})
303+
chunk_message.meta.update(
304+
{
305+
"model": chunk.model,
306+
"index": choice.index,
307+
"finish_reason": choice.finish_reason,
308+
"received_at": datetime.now().isoformat(),
309+
}
310+
)
305311
return chunk_message
306312

307313
@staticmethod
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
enhancements:
3+
- |
4+
Added completion_start_time metadata to track time-to-first-token (TTFT) in streaming responses from Hugging Face API and OpenAI (Azure).

test/components/generators/chat/test_hugging_face_api.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]>
22
#
33
# SPDX-License-Identifier: Apache-2.0
4+
from datetime import datetime
45
import os
56
from unittest.mock import MagicMock, Mock, patch
67

@@ -503,9 +504,13 @@ def test_live_run_serverless_streaming(self):
503504
assert isinstance(response["replies"], list)
504505
assert len(response["replies"]) > 0
505506
assert [isinstance(reply, ChatMessage) for reply in response["replies"]]
506-
assert "usage" in response["replies"][0].meta
507-
assert "prompt_tokens" in response["replies"][0].meta["usage"]
508-
assert "completion_tokens" in response["replies"][0].meta["usage"]
507+
508+
response_meta = response["replies"][0].meta
509+
assert "completion_start_time" in response_meta
510+
assert datetime.fromisoformat(response_meta["completion_start_time"]) <= datetime.now()
511+
assert "usage" in response_meta
512+
assert "prompt_tokens" in response_meta["usage"]
513+
assert "completion_tokens" in response_meta["usage"]
509514

510515
@pytest.mark.integration
511516
@pytest.mark.skipif(

test/components/generators/chat/test_openai.py

+4
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,10 @@ def __call__(self, chunk: StreamingChunk) -> None:
546546
assert callback.counter > 1
547547
assert "Paris" in callback.responses
548548

549+
# check that the completion_start_time is set and valid ISO format
550+
assert "completion_start_time" in message.meta
551+
assert datetime.fromisoformat(message.meta["completion_start_time"]) < datetime.now()
552+
549553
@pytest.mark.skipif(
550554
not os.environ.get("OPENAI_API_KEY", None),
551555
reason="Export an env var called OPENAI_API_KEY containing the OpenAI API key to run this test.",

test/components/generators/test_hugging_face_api.py

+23
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44
import os
55
from unittest.mock import MagicMock, Mock, patch
6+
from datetime import datetime
67

78
import pytest
89
from huggingface_hub import (
@@ -312,3 +313,25 @@ def test_run_serverless(self):
312313
assert isinstance(response["meta"], list)
313314
assert len(response["meta"]) > 0
314315
assert [isinstance(meta, dict) for meta in response["meta"]]
316+
317+
@pytest.mark.integration
318+
@pytest.mark.skipif(
319+
not os.environ.get("HF_API_TOKEN", None),
320+
reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
321+
)
322+
def test_live_run_streaming_check_completion_start_time(self):
323+
generator = HuggingFaceAPIGenerator(
324+
api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
325+
api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
326+
streaming_callback=streaming_callback_handler,
327+
)
328+
329+
results = generator.run("What is the capital of France?")
330+
331+
assert len(results["replies"]) == 1
332+
assert "Paris" in results["replies"][0]
333+
334+
# Verify completion start time in final metadata
335+
assert "completion_start_time" in results["meta"][0]
336+
completion_start = datetime.fromisoformat(results["meta"][0]["completion_start_time"])
337+
assert completion_start <= datetime.now()

test/components/generators/test_openai.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]>
22
#
33
# SPDX-License-Identifier: Apache-2.0
4+
from datetime import datetime
45
import logging
56
import os
67
from typing import List
@@ -286,6 +287,9 @@ def __call__(self, chunk: StreamingChunk) -> None:
286287
assert "gpt-4o-mini" in metadata["model"]
287288
assert metadata["finish_reason"] == "stop"
288289

290+
assert "completion_start_time" in metadata
291+
assert datetime.fromisoformat(metadata["completion_start_time"]) <= datetime.now()
292+
289293
# unfortunately, the usage is not available for streaming calls
290294
# we keep the key in the metadata for compatibility
291295
assert "usage" in metadata and len(metadata["usage"]) == 0

0 commit comments

Comments
 (0)