microsoft
diff --git a/‎python/samples/getting_started_with_agents/multi_agent_orchestration/step2b_sequential_streaming_agent_response_callback.py
Lines changed: 159 additions & 0 deletions b/‎python/samples/getting_started_with_agents/multi_agent_orchestration/step2b_sequential_streaming_agent_response_callback.py
Lines changed: 159 additions & 0 deletions
diff --git a/‎python/semantic_kernel/agents/orchestration/agent_actor_base.py
Lines changed: 77 additions & 3 deletions b/‎python/semantic_kernel/agents/orchestration/agent_actor_base.py
Lines changed: 77 additions & 3 deletions
diff --git a/‎python/semantic_kernel/agents/orchestration/concurrent.py
Lines changed: 9 additions & 7 deletions b/‎python/semantic_kernel/agents/orchestration/concurrent.py
Lines changed: 9 additions & 7 deletions
@@ -0,0 +1,159 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from semantic_kernel.agents import Agent, ChatCompletionAgent, SequentialOrchestration
+from semantic_kernel.agents.runtime import InProcessRuntime
+from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
+
+"""
+The following sample demonstrates how to create a sequential orchestration for
+executing multiple agents in sequence, i.e. the output of one agent is the input
+to the next agent.
+
+This sample demonstrates the basic steps of creating and starting a runtime, creating
+a sequential orchestration, invoking the orchestration, and finally waiting for the
+results.
+"""
+
+
+def get_agents() -> list[Agent]:
+    """Return a list of agents that will participate in the sequential orchestration.
+
+    Feel free to add or remove agents.
+    """
+    concept_extractor_agent = ChatCompletionAgent(
+        name="ConceptExtractorAgent",
+        instructions=(
+            "You are a marketing analyst. Given a product description, identify:\n"
+            "- Key features\n"
+            "- Target audience\n"
+            "- Unique selling points\n\n"
+        ),
+        service=AzureChatCompletion(),
+    )
+    writer_agent = ChatCompletionAgent(
+        name="WriterAgent",
+        instructions=(
+            "You are a marketing copywriter. Given a block of text describing features, audience, and USPs, "
+            "compose a compelling marketing copy (like a newsletter section) that highlights these points. "
+            "Output should be short (around 150 words), output just the copy as a single text block."
+        ),
+        service=AzureChatCompletion(),
+    )
+    format_proof_agent = ChatCompletionAgent(
+        name="FormatProofAgent",
+        instructions=(
+            "You are an editor. Given the draft copy, correct grammar, improve clarity, ensure consistent tone, "
+            "give format and make it polished. Output the final improved copy as a single text block."
+        ),
+        service=AzureChatCompletion(),
+    )
+
+    # The order of the agents in the list will be the order in which they are executed
+    return [concept_extractor_agent, writer_agent, format_proof_agent]
+
+
+# Flag to indicate if a new message is being received
+is_new_message = True
+
+
+def streaming_agent_response_callback(message: StreamingChatMessageContent, is_final: bool) -> None:
+    """Observer function to print the messages from the agents.
+
+    Args:
+        message (StreamingChatMessageContent): The streaming message content from the agent.
+        is_final (bool): Indicates if this is the final part of the message.
+    """
+    global is_new_message
+    if is_new_message:
+        print(f"# {message.name}")
+        is_new_message = False
+    print(message.content, end="", flush=True)
+    if is_final:
+        print()
+        is_new_message = True
+
+
+async def main():
+    """Main function to run the agents."""
+    # 1. Create a sequential orchestration with multiple agents and an agent
+    #    response callback to observe the output from each agent as they stream
+    #    their responses.
+    agents = get_agents()
+    sequential_orchestration = SequentialOrchestration(
+        members=agents,
+        streaming_agent_response_callback=streaming_agent_response_callback,
+    )
+
+    # 2. Create a runtime and start it
+    runtime = InProcessRuntime()
+    runtime.start()
+
+    # 3. Invoke the orchestration with a task and the runtime
+    orchestration_result = await sequential_orchestration.invoke(
+        task="An eco-friendly stainless steel water bottle that keeps drinks cold for 24 hours",
+        runtime=runtime,
+    )
+
+    # 4. Wait for the results
+    value = await orchestration_result.get(timeout=20)
+    print(f"***** Final Result *****\n{value}")
+
+    # 5. Stop the runtime when idle
+    await runtime.stop_when_idle()
+
+    """
+    Sample output:
+    # ConceptExtractorAgent
+    **Key Features:**
+    - Made from eco-friendly stainless steel
+    - Insulation technology that keeps drinks cold for up to 24 hours
+    - Reusable design, promoting sustainability
+    - Possible variations in sizes and colors
+
+    **Target Audience:**
+    - Environmentally conscious consumers
+    - Active individuals and outdoor enthusiasts
+    - Health-conscious individuals looking to stay hydrated
+    - Students and professionals looking for stylish and functional drinkware
+
+    **Unique Selling Points:**
+    - Combines eco-friendliness with high performance in temperature retention
+    - Durable and reusable, reducing reliance on single-use plastics
+    - Sleek design that appeals to modern aesthetics while being functional
+    - Supporting sustainability initiatives through responsible manufacturing practices
+    # WriterAgent
+    Sip sustainably with our eco-friendly stainless steel water bottles, designed for the conscious consumer who values
+    both performance and aesthetics. Our bottles feature advanced insulation technology that keeps your drinks icy cold
+    for up to 24 hours, making them perfect for outdoor adventures, gym sessions, or a busy day at the office. Choose
+    from various sizes and stunning colors to match your personal style while making a positive impact on the planet.
+    Each reusable bottle helps reduce single-use plastics, supporting a cleaner, greener world. Join the movement toward
+    sustainability without compromising on style or functionality. Stay hydrated, look great, and make a difference—get
+    your eco-friendly water bottle today!
+    # FormatProofAgent
+    Sip sustainably with our eco-friendly stainless steel water bottles, designed for the conscious consumer who values
+    both performance and aesthetics. Our bottles utilize advanced insulation technology to keep your beverages icy cold
+    for up to 24 hours, making them perfect for outdoor adventures, gym sessions, or a busy day at the office. 
+
+    Choose from a variety of sizes and stunning colors to match your personal style while positively impacting the
+    planet. Each reusable bottle helps reduce single-use plastics, supporting a cleaner, greener world. 
+
+    Join the movement towards sustainability without compromising on style or functionality. Stay hydrated, look great,
+    and make a difference—get your eco-friendly water bottle today!
+    ***** Final Result *****
+    Sip sustainably with our eco-friendly stainless steel water bottles, designed for the conscious consumer who values
+    both performance and aesthetics. Our bottles utilize advanced insulation technology to keep your beverages icy cold
+    for up to 24 hours, making them perfect for outdoor adventures, gym sessions, or a busy day at the office.
+
+    Choose from a variety of sizes and stunning colors to match your personal style while positively impacting the
+    planet. Each reusable bottle helps reduce single-use plastics, supporting a cleaner, greener world.
+
+    Join the movement towards sustainability without compromising on style or functionality. Stay hydrated, look great,
+    and make a difference—get your eco-friendly water bottle today!
+    """
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -10,7 +10,7 @@
 from semantic_kernel.agents.orchestration.orchestration_base import DefaultTypeAlias
 from semantic_kernel.agents.runtime.core.message_context import MessageContext
 from semantic_kernel.agents.runtime.core.routed_agent import RoutedAgent
-from semantic_kernel.contents.chat_history import ChatHistory
+from semantic_kernel.contents import ChatHistory, ChatMessageContent, StreamingChatMessageContent
 from semantic_kernel.utils.feature_stage_decorator import experimental
 
 if sys.version_info >= (3, 12):
@@ -44,18 +44,23 @@ def __init__(
         agent: Agent,
         internal_topic_type: str,
         agent_response_callback: Callable[[DefaultTypeAlias], Awaitable[None] | None] | None = None,
+        streaming_agent_response_callback: Callable[[StreamingChatMessageContent, bool], Awaitable[None] | None]
+        | None = None,
     ) -> None:
         """Initialize the agent container.
 
         Args:
             agent (Agent): An agent to be run in the container.
             internal_topic_type (str): The topic type of the internal topic.
-            agent_response_callback (Callable | None): A function that is called when a response is produced
+            agent_response_callback (Callable | None): A function that is called when a full response is produced
                 by the agents.
+            streaming_agent_response_callback (Callable | None): A function that is called when a streaming response
+                is produced by the agents.
         """
         self._agent = agent
         self._internal_topic_type = internal_topic_type
         self._agent_response_callback = agent_response_callback
+        self._streaming_agent_response_callback = streaming_agent_response_callback
 
         self._agent_thread: AgentThread | None = None
         # Chat history to temporarily store messages before the agent thread is created
@@ -69,9 +74,78 @@ async def _call_agent_response_callback(self, message: DefaultTypeAlias) -> None
         Args:
             message (DefaultTypeAlias): The message to be sent to the agent_response_callback.
         """
-        # TODO(@taochen): Support streaming
         if self._agent_response_callback:
             if inspect.iscoroutinefunction(self._agent_response_callback):
                 await self._agent_response_callback(message)
             else:
                 self._agent_response_callback(message)
+
+    async def _call_streaming_agent_response_callback(
+        self,
+        message_chunk: StreamingChatMessageContent,
+        is_final: bool,
+    ) -> None:
+        """Call the streaming_agent_response_callback function if it is set.
+
+        Args:
+            message_chunk (StreamingChatMessageContent): The message chunk.
+            is_final (bool): Whether this is the final chunk of the response.
+        """
+        if self._streaming_agent_response_callback:
+            if inspect.iscoroutinefunction(self._streaming_agent_response_callback):
+                await self._streaming_agent_response_callback(message_chunk, is_final)
+            else:
+                self._streaming_agent_response_callback(message_chunk, is_final)
+
+    async def _invoke_agent(self, additional_messages: DefaultTypeAlias | None = None, **kwargs) -> ChatMessageContent:
+        """Invoke the agent with the current chat history or thread and optionally additional messages.
+
+        Args:
+            additional_messages (DefaultTypeAlias | None): Additional messages to be sent to the agent.
+            **kwargs: Additional keyword arguments to be passed to the agent's invoke method:
+                - kernel: The kernel to use for the agent invocation.
+
+        Returns:
+            DefaultTypeAlias: The response from the agent.
+        """
+        streaming_message_buffer: list[StreamingChatMessageContent] = []
+        messages = self._create_messages(additional_messages)
+
+        async for response_item in self._agent.invoke_stream(messages=messages, thread=self._agent_thread, **kwargs):  # type: ignore[arg-type]
+            # Buffer message chunks and stream them with correct is_final flag.
+            streaming_message_buffer.append(response_item.message)
+            if len(streaming_message_buffer) > 1:
+                await self._call_streaming_agent_response_callback(streaming_message_buffer[-2], is_final=False)
+            if self._agent_thread is None:
+                self._agent_thread = response_item.thread
+
+        if streaming_message_buffer:
+            # Call the callback for the last message chunk with is_final=True.
+            await self._call_streaming_agent_response_callback(streaming_message_buffer[-1], is_final=True)
+
+        if not streaming_message_buffer:
+            raise RuntimeError(f'Agent "{self._agent.name}" did not return any response.')
+
+        # Build the full response from the streaming messages
+        full_response = sum(streaming_message_buffer[1:], streaming_message_buffer[0])
+        await self._call_agent_response_callback(full_response)
+
+        return full_response
+
+    def _create_messages(self, additional_messages: DefaultTypeAlias | None = None) -> list[ChatMessageContent]:
+        """Create a list of messages to be sent to the agent along with a potential thread.
+
+        Args:
+            additional_messages (DefaultTypeAlias | None): Additional messages to be sent to the agent.
+
+        Returns:
+            list[ChatMessageContent]: A list of messages to be sent to the agent.
+        """
+        base_messages = self._chat_history.messages[:] if self._agent_thread is None else []
+
+        if additional_messages is None:
+            return base_messages
+
+        if isinstance(additional_messages, list):
+            return base_messages + additional_messages
+        return [*base_messages, additional_messages]
@@ -20,6 +20,7 @@
 from semantic_kernel.agents.runtime.core.routed_agent import message_handler
 from semantic_kernel.agents.runtime.core.topic import TopicId
 from semantic_kernel.agents.runtime.in_process.type_subscription import TypeSubscription
+from semantic_kernel.contents.streaming_chat_message_content import StreamingChatMessageContent
 from semantic_kernel.kernel_pydantic import KernelBaseModel
 from semantic_kernel.utils.feature_stage_decorator import experimental
 
@@ -56,38 +57,38 @@ def __init__(
         internal_topic_type: str,
         collection_agent_type: str,
         agent_response_callback: Callable[[DefaultTypeAlias], Awaitable[None] | None] | None = None,
+        streaming_agent_response_callback: Callable[[StreamingChatMessageContent, bool], Awaitable[None] | None]
+        | None = None,
     ) -> None:
         """Initialize the agent actor.
 
         Args:
             agent: The agent to be executed.
             internal_topic_type: The internal topic type for the actor.
             collection_agent_type: The collection agent type for the actor.
-            agent_response_callback: A callback function to handle the response from the agent.
+            agent_response_callback: A callback function to handle the full response from the agent.
+            streaming_agent_response_callback: A callback function to handle streaming responses from the agent.
         """
         self._collection_agent_type = collection_agent_type
         super().__init__(
             agent=agent,
             internal_topic_type=internal_topic_type,
             agent_response_callback=agent_response_callback,
+            streaming_agent_response_callback=streaming_agent_response_callback,
         )
 
     @message_handler
     async def _handle_message(self, message: ConcurrentRequestMessage, ctx: MessageContext) -> None:
         """Handle a message."""
         logger.debug(f"Concurrent actor (Actor ID: {self.id}; Agent name: {self._agent.name}) started processing...")
 
-        response = await self._agent.get_response(
-            messages=message.body,  # type: ignore[arg-type]
-        )
+        response = await self._invoke_agent(additional_messages=message.body)
 
         logger.debug(f"Concurrent actor (Actor ID: {self.id}; Agent name: {self._agent.name}) finished processing.")
 
-        await self._call_agent_response_callback(response.message)
-
         target_actor_id = await self.runtime.get(self._collection_agent_type)
         await self.send_message(
-            ConcurrentResponseMessage(body=response.message),
+            ConcurrentResponseMessage(body=response),
             target_actor_id,
             cancellation_token=ctx.cancellation_token,
         )
@@ -181,6 +182,7 @@ async def _internal_helper(agent: Agent) -> None:
                     internal_topic_type,
                     collection_agent_type=self._get_collection_actor_type(internal_topic_type),
                     agent_response_callback=self._agent_response_callback,
+                    streaming_agent_response_callback=self._streaming_agent_response_callback,
                 ),
             )