refactor: aggregators with state

boczekbartek · boczekbartek · commit ff9959425501 · 2025-04-17T12:56:48.000+02:00
diff --git a/src/rai_core/rai/agents/postprocessors/base.py b/src/rai_core/rai/agents/postprocessors/base.py
diff --git a/src/rai_core/rai/agents/state_based_agent.py b/src/rai_core/rai/agents/state_based_agent.py
@@ -31,7 +31,7 @@
 from rai.communication.hri_connector import HRIConnector, HRIMessage
 from rai.communication.ros2.api.conversion import import_message_from_str
 from rai.communication.ros2.api.topic import TopicConfig
-from rai.communication.ros2.connectors import ROS2ARIConnector
+from rai.communication.ros2.connectors import ROS2Connector
 from rai.messages.multimodal import HumanMultimodalMessage
 
 from .react_agent import ReActAgent, ReActAgentState
@@ -81,7 +81,7 @@ def __init__(
         )
         self.config = config
 
-        self._ari_connector = ROS2ARIConnector()
+        self._ros2_connector = ROS2Connector()
         self._callback_group = ReentrantCallbackGroup()
         self._subscriptions: Dict[str, Subscription] = dict()
 
@@ -101,11 +101,11 @@ def _configure_state_sources(self):
                 continue
             # NOTE(boczekbartek): refactor to use confugired_callbacks once implemented
             # in the connector
-            qos_profile = self._ari_connector._topic_api._resolve_qos_profile(
+            qos_profile = self._ros2_connector._topic_api._resolve_qos_profile(
                 topic, config.auto_qos_matching, config.qos_profile, for_publisher=False
             )
             msg_type = import_message_from_str(config.msg_type)
-            self._subscriptions[topic] = self._ari_connector.node.create_subscription(
+            self._subscriptions[topic] = self._ros2_connector.node.create_subscription(
                 msg_type=msg_type,
                 topic=topic,
                 callback=partial(self._state_topic_callback, topic),
@@ -182,5 +182,5 @@ def stop(self):
             self._aggregation_thread = None
         self._stop_event.clear()
         for subscription in self._subscriptions.values():
-            self._ari_connector.node.destroy_subscription(subscription)
+            self._ros2_connector.node.destroy_subscription(subscription)
         self.logger.info("Agent stopped")
diff --git a/src/rai_core/rai/aggregators/__init__.py b/src/rai_core/rai/aggregators/__init__.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .base import BaseStatePostprocessor
+from .base import BaseAggregator
 
-__all__ = ["BaseStatePostprocessor"]
+__all__ = ["BaseAggregator"]
diff --git a/src/rai_core/rai/aggregators/base.py b/src/rai_core/rai/aggregators/base.py
@@ -0,0 +1,57 @@
+# Copyright (C) 2025 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod
+from collections import deque
+from typing import Deque, Generic, TypeVar, List
+
+from langchain_core.messages import BaseMessage
+
+
+T = TypeVar("T")
+
+
+class BaseAggregator(ABC, Generic[T]):
+    """
+    Interface for aggregators. 
+
+    `__call__` method receives a message and appends it to the buffer.
+    `get` method returns the aggregated message.
+    """
+    def __init__(self, max_size: int | None=None) -> None:
+        super().__init__()
+        self._buffer: Deque[T] = deque()
+        self.max_size = max_size
+
+    def __call__(
+        self, msg: T
+    ) -> None:
+        if self.max_size is not None and len(self._buffer) >= self.max_size:
+            self._buffer.popleft()
+        self._buffer.append(msg)
+
+    @abstractmethod
+    def get(self) -> BaseMessage | None:
+        """ Returns the aggregated message """
+        pass
+
+    def clear(self) -> None:
+        self._buffer.clear()
+
+    def get_buffer(self) -> List[T]:
+        return list(self._buffer)
+
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}"
+    
diff --git a/src/rai_core/rai/aggregators/ros2/__init__.py b/src/rai_core/rai/aggregators/ros2/__init__.py
diff --git a/src/rai_core/rai/aggregators/ros2/aggregators.py b/src/rai_core/rai/aggregators/ros2/aggregators.py
@@ -19,18 +19,19 @@
 from rcl_interfaces.msg import Log
 from sensor_msgs.msg import CompressedImage, Image
 
-from rai.agents.postprocessors import BaseStatePostprocessor
+from rai.aggregators import BaseAggregator
 from rai.communication.ros2.api.conversion import encode_ros2_img_to_base64
 from rai.initialization.model_initialization import get_llm_model
 from rai.messages import HumanMultimodalMessage
 
 
-class ROS2LogsPostprocessor(BaseStatePostprocessor[Log]):
+class ROS2LogsPostprocessor(BaseAggregator[Log]):
     """Returns only unique messages while keeping their order"""
 
     levels = {10: "DEBUG", 20: "INFO", 30: "WARNING", 40: "ERROR", 50: "FATAL"}
 
-    def __call__(self, msgs: Sequence[Log]) -> HumanMessage:
+    def get(self) -> HumanMessage:
+        msgs = self.get_buffer()
         buffer = []
         prev_parsed = None
         counter = 0
@@ -50,12 +51,11 @@ def __call__(self, msgs: Sequence[Log]) -> HumanMessage:
         return HumanMessage(content=result)
 
 
-class ROS2GetLastImagePostprocessor(BaseStatePostprocessor[Image | CompressedImage]):
+class ROS2GetLastImagePostprocessor(BaseAggregator[Image | CompressedImage]):
     """Returns the last image from the buffer as base64 encoded string"""
 
-    def __call__(
-        self, msgs: Sequence[Image | CompressedImage]
-    ) -> HumanMultimodalMessage | None:
+    def get(self) -> HumanMultimodalMessage | None:
+        msgs = self.get_buffer()
         if len(msgs) == 0:
             return None
         ros2_img = msgs[-1]
@@ -64,21 +64,23 @@ def __call__(
 
 
 class ROS2ImgVLMDescriptionPostprocessor(
-    BaseStatePostprocessor[Image | CompressedImage]
+    BaseAggregator[Image | CompressedImage]
 ):
     """
     Returns the VLM analysis of the last image in the aggregation buffer
     """
 
-    def __init__(self) -> None:
-        super().__init__()
+    def __init__(self, max_size: int | None=None) -> None:
+        super().__init__(max_size)
         self.llm = get_llm_model(model_type="simple_model", streaming=True)
 
-    def __call__(self, msgs: Sequence[Image | CompressedImage]) -> HumanMessage | None:
+    def get(self) -> HumanMessage | None:
+        msgs: List[Image | CompressedImage] = self.get_buffer()
         if len(msgs) == 0:
             return None
 
         b64_images: List[str] = [encode_ros2_img_to_base64(msg) for msg in msgs]
+        self.clear()
 
         system_prompt = "You are an expert in image analysis and your speciality is the"
         "description of images"
@@ -102,18 +104,18 @@ class ROS2ImgDescription(BaseModel):
         )
 
 
-class ROS2ImgVLMDiffPostprocessor(BaseStatePostprocessor[Image | CompressedImage]):
+class ROS2ImgVLMDiffPostprocessor(BaseAggregator[Image | CompressedImage]):
     """
     Returns the LLM analysis of the differences between 3 images in the
     aggregation buffer: 1st, midden, last
     """
 
-    def __init__(self) -> None:
-        super().__init__()
+    def __init__(self, max_size: int | None=None) -> None:
+        super().__init__(max_size)
         self.llm = get_llm_model(model_type="simple_model", streaming=True)
 
     @staticmethod
-    def get_key_elements(elements: Sequence[Any]) -> List[Any]:
+    def get_key_elements(elements: List[Any]) -> List[Any]:
         """
         Returns 1st, last and middle elements of the list
         """
@@ -122,11 +124,14 @@ def get_key_elements(elements: Sequence[Any]) -> List[Any]:
         middle_index = len(elements) // 2
         return [elements[0], elements[middle_index], elements[-1]]
 
-    def __call__(self, msgs: Sequence[Any]) -> HumanMessage | None:
-        if len(msgs) == 0:
+    def get(self) -> HumanMessage | None:
+        if len(self.get_buffer()) == 0:
             return None
 
-        b64_images = [encode_ros2_img_to_base64(msg) for msg in msgs]
+        b64_images = [encode_ros2_img_to_base64(msg) for msg in self._buffer]
+        
+        self.clear()
+
         b64_images = self.get_key_elements(b64_images)
 
         system_prompt = "You are an expert in image analysis and your speciality is the comparison of 2 images"