RobotecAI
diff --git a/‎src/rai_bench/rai_bench/examples/images/image_1.jpg
64.6 KB b/‎src/rai_bench/rai_bench/examples/images/image_1.jpg
64.6 KB
diff --git a/‎src/rai_bench/rai_bench/examples/images/image_2.jpg
66.2 KB b/‎src/rai_bench/rai_bench/examples/images/image_2.jpg
66.2 KB
diff --git a/‎src/rai_bench/rai_bench/examples/images/image_3.jpg
77.8 KB b/‎src/rai_bench/rai_bench/examples/images/image_3.jpg
77.8 KB
diff --git a/‎src/rai_bench/rai_bench/examples/images/image_4.jpg
75.1 KB b/‎src/rai_bench/rai_bench/examples/images/image_4.jpg
75.1 KB
diff --git a/‎src/rai_bench/rai_bench/examples/images/image_5.jpg
95.6 KB b/‎src/rai_bench/rai_bench/examples/images/image_5.jpg
95.6 KB
diff --git a/‎src/rai_bench/rai_bench/examples/images/image_6.jpg
83.3 KB b/‎src/rai_bench/rai_bench/examples/images/image_6.jpg
83.3 KB
diff --git a/‎src/rai_bench/rai_bench/examples/images/image_7.jpg
69.5 KB b/‎src/rai_bench/rai_bench/examples/images/image_7.jpg
69.5 KB
diff --git a/‎src/rai_bench/rai_bench/examples/spatial_reasoning_tasks.py
Lines changed: 100 additions & 0 deletions b/‎src/rai_bench/rai_bench/examples/spatial_reasoning_tasks.py
Lines changed: 100 additions & 0 deletions
diff --git a/‎src/rai_bench/rai_bench/examples/spatial_tool_calling_agent_bench.py
Lines changed: 74 additions & 0 deletions b/‎src/rai_bench/rai_bench/examples/spatial_tool_calling_agent_bench.py
Lines changed: 74 additions & 0 deletions
diff --git a/‎src/rai_bench/rai_bench/tool_calling_agent_bench/agent_bench.py
Lines changed: 21 additions & 4 deletions b/‎src/rai_bench/rai_bench/tool_calling_agent_bench/agent_bench.py
Lines changed: 21 additions & 4 deletions
diff --git a/‎src/rai_bench/rai_bench/tool_calling_agent_bench/agent_tasks_interfaces.py
Lines changed: 21 additions & 0 deletions b/‎src/rai_bench/rai_bench/tool_calling_agent_bench/agent_tasks_interfaces.py
Lines changed: 21 additions & 0 deletions
diff --git a/‎src/rai_bench/rai_bench/tool_calling_agent_bench/spatial_reasoning_tasks.py
Lines changed: 104 additions & 0 deletions b/‎src/rai_bench/rai_bench/tool_calling_agent_bench/spatial_reasoning_tasks.py
Lines changed: 104 additions & 0 deletions
@@ -0,0 +1,100 @@
+# Copyright (C) 2025 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Sequence
+
+from rai_bench.tool_calling_agent_bench.agent_tasks_interfaces import (
+    SpatialReasoningAgentTask,
+)
+from rai_bench.tool_calling_agent_bench.spatial_reasoning_tasks import (
+    BoolImageTask,
+    BoolImageTaskInput,
+)
+
+inputs: List[BoolImageTaskInput] = [
+    BoolImageTaskInput(
+        question="Is the door on the left from the desk?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_1.jpg"],
+        expected_response=True,
+    ),
+    BoolImageTaskInput(
+        question="Is the door open?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_1.jpg"],
+        expected_response=False,
+    ),
+    BoolImageTaskInput(
+        question="Is someone in the room?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_1.jpg"],
+        expected_response=False,
+    ),
+    BoolImageTaskInput(
+        question="Is the light on in the room?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_2.jpg"],
+        expected_response=True,
+    ),
+    BoolImageTaskInput(
+        question="Do you see the plant?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_2.jpg"],
+        expected_response=True,
+    ),
+    BoolImageTaskInput(
+        question="Do you see the plant?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_3.jpg"],
+        expected_response=False,
+    ),
+    BoolImageTaskInput(
+        question="Are there any pictures on the wall?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_3.jpg"],
+        expected_response=True,
+    ),
+    BoolImageTaskInput(
+        question="Are there 3 pictures on the wall?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_4.jpg"],
+        expected_response=True,
+    ),
+    BoolImageTaskInput(
+        question="Are there 4 pictures on the wall?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_4.jpg"],
+        expected_response=False,
+    ),
+    BoolImageTaskInput(
+        question="Is there a rack on the left from the sofa?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_4.jpg"],
+        expected_response=False,
+    ),
+    BoolImageTaskInput(
+        question="Is there a plant behind the rack?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_5.jpg"],
+        expected_response=True,
+    ),
+    BoolImageTaskInput(
+        question="Is there a plant on the right from the window?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_6.jpg"],
+        expected_response=False,
+    ),
+    BoolImageTaskInput(
+        question="Is there a pillow on the armchain?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_7.jpg"],
+        expected_response=True,
+    ),
+    BoolImageTaskInput(
+        question="Is there a red pillow on the armchair?",
+        images_paths=["src/rai_bench/rai_bench/examples/images/image_7.jpg"],
+        expected_response=False,
+    ),
+]
+
+tasks: Sequence[SpatialReasoningAgentTask] = [
+    BoolImageTask(task_input=input_item) for input_item in inputs
+]
@@ -0,0 +1,74 @@
+# Copyright (C) 2025 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from datetime import datetime
+from pathlib import Path
+
+from rai.agents.conversational_agent import create_conversational_agent
+from rai.utils.model_initialization import (
+    get_llm_model,
+    get_llm_model_config_and_vendor,
+)
+
+from rai_bench.examples.spatial_reasoning_tasks import tasks
+from rai_bench.tool_calling_agent_bench.agent_bench import ToolCallingAgentBenchmark
+
+if __name__ == "__main__":
+    current_test_name = Path(__file__).stem
+
+    now = datetime.now()
+    experiment_dir = (
+        Path("src/rai_bench/rai_bench/experiments")
+        / current_test_name
+        / now.strftime("%Y-%m-%d_%H-%M-%S")
+    )
+    experiment_dir.mkdir(parents=True, exist_ok=True)
+    log_filename = experiment_dir / "benchmark.log"
+    results_filename = experiment_dir / "results.csv"
+
+    file_handler = logging.FileHandler(log_filename)
+    file_handler.setLevel(logging.DEBUG)
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    file_handler.setFormatter(formatter)
+
+    bench_logger = logging.getLogger("Benchmark logger")
+    bench_logger.setLevel(logging.INFO)
+    bench_logger.addHandler(file_handler)
+
+    agent_logger = logging.getLogger("Agent logger")
+    agent_logger.setLevel(logging.INFO)
+    agent_logger.addHandler(file_handler)
+
+    for task in tasks:
+        task.logger = bench_logger
+
+    benchmark = ToolCallingAgentBenchmark(
+        tasks=tasks, logger=bench_logger, results_filename=results_filename
+    )
+
+    model_type = "simple_model"
+    model_config = get_llm_model_config_and_vendor(model_type=model_type)[0]
+    model_name = getattr(model_config, model_type)
+
+    for task in tasks:
+        agent = create_conversational_agent(
+            llm=get_llm_model(model_type=model_type),
+            tools=task.expected_tools,
+            system_prompt=task.get_system_prompt(),
+            logger=agent_logger,
+        )
+        benchmark.run_next(agent=agent, model_name=model_name)
@@ -28,6 +28,7 @@
 from rai.messages.multimodal import HumanMultimodalMessage
 
 from rai_bench.tool_calling_agent_bench.agent_tasks_interfaces import (
+    SpatialReasoningAgentTask,
     ToolCallingAgentTask,
 )
 from rai_bench.tool_calling_agent_bench.scores_tracing import ScoreTracingHandler
@@ -160,10 +161,26 @@ def run_next(self, agent: CompiledStateGraph, model_name: str) -> None:
 
             ts = time.perf_counter()
             try:
-                response = agent.invoke(
-                    {"messages": [HumanMultimodalMessage(content=task.get_prompt())]},
-                    config=config,
-                )
+                if isinstance(task, SpatialReasoningAgentTask):
+                    response = agent.invoke(
+                        {
+                            "messages": [
+                                HumanMultimodalMessage(
+                                    content=task.get_prompt(), images=task.get_images()
+                                )
+                            ]
+                        },
+                        config=config,
+                    )
+                else:
+                    response = agent.invoke(
+                        {
+                            "messages": [
+                                HumanMultimodalMessage(content=task.get_prompt())
+                            ]
+                        },
+                        config=config,
+                    )
                 task.verify_tool_calls(response=response)
             except GraphRecursionError as e:
                 task.log_error(msg=f"Graph Recursion Error: {e}")
 
@@ -274,3 +274,24 @@ def _is_ai_message_requesting_get_ros2_topics_and_types(
         ):
             return False
         return True
+
+
+class SpatialReasoningAgentTask(ToolCallingAgentTask):
+    """Abstract class for spatial reasoning tasks for tool calling agent."""
+
+    def __init__(self, logger: loggers_type | None = None) -> None:
+        super().__init__(logger)
+        self.expected_tools: List[BaseTool]
+        self.question: str
+        self.images_paths: List[str]
+
+    @abstractmethod
+    def get_images(self) -> List[str]:
+        """Get the images related to the task.
+
+        Returns
+        -------
+        List[str]
+            List of image paths
+        """
+        pass
@@ -0,0 +1,104 @@
+# Copyright (C) 2025 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import logging
+from typing import Any, List, Optional, Sequence
+
+from langchain_core.messages import AIMessage
+from langchain_core.tools import BaseTool
+from pydantic import BaseModel, Field
+from rai.messages import preprocess_image
+
+from rai_bench.tool_calling_agent_bench.agent_tasks_interfaces import (
+    SpatialReasoningAgentTask,
+)
+
+
+class TaskParametrizationError(Exception):
+    """Exception raised when the task parameters are not valid."""
+
+    pass
+
+
+SPATIAL_REASONING_SYSTEM_PROMPT = "You are a helpful and knowledgeable AI assistant that specializes in interpreting and analyzing visual content. Your task is to answer questions based on the images provided to you. Please response with the use of the provided tools."
+
+
+class ReturnBoolResponseToolInput(BaseModel):
+    response: bool = Field(..., description="The response to the question.")
+
+
+class ReturnBoolResponseTool(BaseTool):
+    """Tool that returns a boolean response."""
+
+    name: str = "return_bool_response"
+    description: str = "Return a bool response to the question."
+    args_schema = ReturnBoolResponseToolInput
+
+    def _run(self, response: bool) -> bool:
+        if type(response) is bool:
+            return response
+        raise ValueError("Invalid response type. Response must be a boolean.")
+
+
+class BoolImageTaskInput(BaseModel):
+    question: str = Field(..., description="The question to be answered.")
+    images_paths: List[str] = Field(
+        ...,
+        description="List of image file paths to be used for answering the question.",
+    )
+    expected_response: bool = Field(
+        ..., description="The expected answer to the question."
+    )
+
+
+class BoolImageTask(SpatialReasoningAgentTask):
+    complexity = "easy"
+
+    def __init__(
+        self,
+        task_input: BoolImageTaskInput,
+        logger: Optional[logging.Logger] = None,
+    ) -> None:
+        super().__init__(logger)
+        self.expected_tools = [ReturnBoolResponseTool()]
+        self.question = task_input.question
+        self.images_paths = task_input.images_paths
+        self.expected_response = task_input.expected_response
+
+    def get_system_prompt(self) -> str:
+        return SPATIAL_REASONING_SYSTEM_PROMPT
+
+    def get_prompt(self):
+        return self.question
+
+    def get_images(self):
+        images = [preprocess_image(image_path) for image_path in self.images_paths]
+        return images
+
+    def verify_tool_calls(self, response: dict[str, Any]):
+        messages = response["messages"]
+        ai_messages: Sequence[AIMessage] = [
+            message for message in messages if isinstance(message, AIMessage)
+        ]
+
+        if ai_messages:
+            if self._check_tool_calls_num_in_ai_message(ai_messages[0], expected_num=1):
+                self._check_tool_call(
+                    tool_call=ai_messages[0].tool_calls[0],
+                    expected_name="return_bool_response",
+                    expected_args={"response": self.expected_response},
+                )
+        if not self.result.errors:
+            self.result.success = True