From 681b925c95b55a341bd1e469945148b8f2288a2d Mon Sep 17 00:00:00 2001
From: stantonius <craig.stanton2@gmail.com>
Date: Mon, 6 Jan 2025 17:19:23 -0500
Subject: [PATCH 1/2] Added ReAct function/tool default args

Addresses TOPIC 02 in react.py about handling default arguments in the Tool class
Not super elegent, but seems effective in my testing
---
 dspy/predict/react.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/dspy/predict/react.py b/dspy/predict/react.py
index 2690d066af..53d5195a59 100644
--- a/dspy/predict/react.py
+++ b/dspy/predict/react.py
@@ -11,7 +11,15 @@
 
 
 class Tool:
-    def __init__(self, func: Callable, name: str = None, desc: str = None, args: dict[str, Any] = None):
+
+    def __init__(
+        self,
+        func: Callable,
+        name: str = None,
+        desc: str = None,
+        args: dict[str, Any] = None,
+        defaults: dict[str, Any] = None,
+    ):
         annotations_func = func if inspect.isfunction(func) or inspect.ismethod(func) else func.__call__
         self.func = func
         self.name = name or getattr(func, "__name__", type(func).__name__)
@@ -23,6 +31,7 @@ def __init__(self, func: Callable, name: str = None, desc: str = None, args: dic
             for k, v in (args or get_type_hints(annotations_func)).items()
             if k != "return"
         }
+        self.defaults = defaults
 
     @with_callbacks
     def __call__(self, *args, **kwargs):
@@ -63,6 +72,8 @@ def __init__(self, signature, tools: list[Callable], max_iters=5):
             args = tool.args if hasattr(tool, "args") else str({tool.input_variable: str})
             desc = (f", whose description is <desc>{tool.desc}</desc>." if tool.desc else ".").replace("\n", "  ")
             desc += f" It takes arguments {args} in JSON format."
+            if tool.defaults:
+                desc += f" Default arguments are {tool.defaults}."
             instr.append(f"({idx+1}) {tool.name}{desc}")
 
         react_signature = (

From 06cdd437204ef30620d7328e548c235f26adcaed Mon Sep 17 00:00:00 2001
From: stantonius <craig.stanton2@gmail.com>
Date: Tue, 7 Jan 2025 09:24:55 -0500
Subject: [PATCH 2/2] sync with remote main update

---
 .github/workflows/run_tests.yml            | 29 +++----
 docs/docs/tutorials/observability/index.md |  2 +-
 dspy/evaluate/evaluate.py                  | 94 ++++++++++++++++------
 dspy/predict/react.py                      | 19 ++++-
 4 files changed, 99 insertions(+), 45 deletions(-)

diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 2ffc80f95d..1aaf9236c8 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -41,6 +41,9 @@ jobs:
         python-version: ["3.9"]
     steps:
       - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
       - name: Install Deno
         run: |
           curl -fsSL https://deno.land/install.sh | sh
@@ -59,13 +62,7 @@ jobs:
           path: ~/.local
           key: poetry-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }}
       - name: Install Poetry
-        if: steps.cached-poetry.outputs.cache-hit != 'true'
-        uses: snok/install-poetry@v1
-      - name: Set up python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-          cache: "poetry"
+        run: python -m pip install --upgrade "poetry==${{ env.POETRY_VERSION }}"
       - name: Install dependencies
         run: poetry install --no-interaction
       - name: Run lint with tests
@@ -89,14 +86,11 @@ jobs:
         with:
           path: ~/.local
           key: poetry-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }}
-      - name: Install Poetry
-        if: steps.cached-poetry.outputs.cache-hit != 'true'
-        uses: snok/install-poetry@v1
-      - name: Set up python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+      - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-          cache: "poetry"
+      - name: Install Poetry
+        run: python -m pip install --upgrade "poetry==${{ env.POETRY_VERSION }}"
       - name: Build
         run: poetry build
       - name: Install built package
@@ -118,13 +112,10 @@ jobs:
         with:
           path: ~/.local
           key: poetry-${{ env.POETRY_VERSION }}-${{ hashFiles('**/poetry.lock') }}
-      - name: Install Poetry
-        if: steps.cached-poetry.outputs.cache-hit != 'true'
-        uses: snok/install-poetry@v1
-      - name: Set up python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+      - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-          cache: "poetry"
+      - name: Install Poetry
+        run: python -m pip install --upgrade "poetry==${{ env.POETRY_VERSION }}"
       - name: Run setup.py build
         run: python setup.py build
diff --git a/docs/docs/tutorials/observability/index.md b/docs/docs/tutorials/observability/index.md
index 0b540f042f..e7568107fc 100644
--- a/docs/docs/tutorials/observability/index.md
+++ b/docs/docs/tutorials/observability/index.md
@@ -1,6 +1,6 @@
 # Tutorial: Debugging and Observability in DSPy
 
-This guide demonstrates how to debug problems and improve observability in DSPy. Modern AI programs often involve multiple components, such as language models, retrievers, and tools. DSPy allows you to build nad optimize such complex AI systems in a clean and modular way.
+This guide demonstrates how to debug problems and improve observability in DSPy. Modern AI programs often involve multiple components, such as language models, retrievers, and tools. DSPy allows you to build and optimize such complex AI systems in a clean and modular way.
 
 However, as systems grow more sophisticated, the ability to **understand what your system is doing** becomes critical. Without transparency, the prediction process can easily become a black box, making failures or quality issues difficult to diagnose and production maintenance challenging.
 
diff --git a/dspy/evaluate/evaluate.py b/dspy/evaluate/evaluate.py
index 9ef0bf7334..89be568937 100644
--- a/dspy/evaluate/evaluate.py
+++ b/dspy/evaluate/evaluate.py
@@ -1,6 +1,6 @@
 import logging
 import types
-from typing import Any
+from typing import Any, Callable, List, Optional
 
 import pandas as pd
 import tqdm
@@ -38,25 +38,40 @@ def HTML(x: str) -> str:
 logger = logging.getLogger(__name__)
 
 
-logger = logging.getLogger(__name__)
-
-
 class Evaluate:
+    """DSPy Evaluate class.
+
+    This class is used to evaluate the performance of a DSPy program. Users need to provide a evaluation dataset and 
+    a metric function in order to use this class. This class supports parallel evaluation on the provided dataset.
+    """
     def __init__(
         self,
         *,
-        devset,
-        metric=None,
-        num_threads=1,
-        display_progress=False,
-        display_table=False,
-        max_errors=5,
-        return_all_scores=False,
-        return_outputs=False,
-        provide_traceback=False,
-        failure_score=0.0,
-        **_kwargs,
+        devset: List["dspy.Example"],
+        metric: Optional[Callable] = None,
+        num_threads: int = 1,
+        display_progress: bool = False,
+        display_table: bool = False,
+        max_errors: int = 5,
+        return_all_scores: bool = False,
+        return_outputs: bool = False,
+        provide_traceback: bool = False,
+        failure_score: float = 0.0,
+        **kwargs,
     ):
+        """
+        Args:
+            devset (List[dspy.Example]): the evaluation dataset.
+            metric (Callable): The metric function to use for evaluation.
+            num_threads (int): The number of threads to use for parallel evaluation.
+            display_progress (bool): Whether to display progress during evaluation.
+            display_table (bool): Whether to display the evaluation results in a table.
+            max_errors (int): The maximum number of errors to allow before stopping evaluation.
+            return_all_scores (bool): Whether to return scores for every data record in `devset`.
+            return_outputs (bool): Whether to return the dspy program's outputs for every data in `devset`.
+            provide_traceback (bool): Whether to provide traceback information during evaluation.
+            failure_score (float): The default score to use if evaluation fails due to an exception.
+        """
         self.devset = devset
         self.metric = metric
         self.num_threads = num_threads
@@ -70,15 +85,48 @@ def __init__(
 
     def __call__(
         self,
-        program,
-        metric=None,
-        devset=None,
-        num_threads=None,
-        display_progress=None,
-        display_table=None,
-        return_all_scores=None,
-        return_outputs=None,
+        program: "dspy.Module",
+        metric: Optional[Callable] = None,
+        devset: Optional[List["dspy.Example"]] = None,
+        num_threads: Optional[int] = None,
+        display_progress: Optional[bool] = None,
+        display_table: Optional[bool] = None,
+        return_all_scores: Optional[bool] = None,
+        return_outputs: Optional[bool] = None,
     ):
+        """
+        Args:
+            program (dspy.Module): The DSPy program to evaluate.
+            metric (Callable): The metric function to use for evaluation. if not provided, use `self.metric`.
+            devset (List[dspy.Example]): the evaluation dataset. if not provided, use `self.devset`.
+            num_threads (int): The number of threads to use for parallel evaluation. if not provided, use
+                `self.num_threads`.
+            display_progress (bool): Whether to display progress during evaluation. if not provided, use
+                `self.display_progress`.
+            display_table (bool): Whether to display the evaluation results in a table. if not provided, use
+                `self.display_table`.
+            return_all_scores (bool): Whether to return scores for every data record in `devset`. if not provided,
+                use `self.return_all_scores`.
+            return_outputs (bool): Whether to return the dspy program's outputs for every data in `devset`. if not
+                provided, use `self.return_outputs`.
+
+        Returns:
+            The evaluation results are returned in different formats based on the flags:
+            
+            - Base return: A float percentage score (e.g., 67.30) representing overall performance
+            
+            - With `return_all_scores=True`:
+                Returns (overall_score, individual_scores) where individual_scores is a list of 
+                float scores for each example in devset
+            
+            - With `return_outputs=True`:
+                Returns (overall_score, result_triples) where result_triples is a list of 
+                (example, prediction, score) tuples for each example in devset
+
+            - With both flags=True:
+                Returns (overall_score, result_triples, individual_scores)
+
+        """
         metric = metric if metric is not None else self.metric
         devset = devset if devset is not None else self.devset
         num_threads = num_threads if num_threads is not None else self.num_threads
diff --git a/dspy/predict/react.py b/dspy/predict/react.py
index 53d5195a59..e116544d68 100644
--- a/dspy/predict/react.py
+++ b/dspy/predict/react.py
@@ -9,7 +9,6 @@
 from dspy.signatures.signature import ensure_signature
 from dspy.utils.callback import with_callbacks
 
-
 class Tool:
 
     def __init__(
@@ -19,6 +18,7 @@ def __init__(
         desc: str = None,
         args: dict[str, Any] = None,
         defaults: dict[str, Any] = None,
+        private_defaults: dict[str, Any] = None,
     ):
         annotations_func = func if inspect.isfunction(func) or inspect.ismethod(func) else func.__call__
         self.func = func
@@ -32,6 +32,7 @@ def __init__(
             if k != "return"
         }
         self.defaults = defaults
+        self.private_defaults = private_defaults
 
     @with_callbacks
     def __call__(self, *args, **kwargs):
@@ -74,6 +75,8 @@ def __init__(self, signature, tools: list[Callable], max_iters=5):
             desc += f" It takes arguments {args} in JSON format."
             if tool.defaults:
                 desc += f" Default arguments are {tool.defaults}."
+            if tool.private_defaults:
+                desc += f" Assume the following function arguments will be provided at function execution time: {tool.private_defaults.keys()}. Therefore do not propose these arguments in the `next_tool_args`."
             instr.append(f"({idx+1}) {tool.name}{desc}")
 
         react_signature = (
@@ -102,13 +105,25 @@ def format(trajectory: dict[str, Any], last_iteration: bool):
         for idx in range(self.max_iters):
             pred = self.react(**input_args, trajectory=format(trajectory, last_iteration=(idx == self.max_iters - 1)))
 
+            # extract private defaults from the tool and supply them to the next tool call
+            # do not assign the private defaults to the next_tool_args as this will be captured in the trajectory logs, which is not what we want
+            private_defaults = (
+                self.tools[pred.next_tool_name].private_defaults
+                if pred.next_tool_name in self.tools
+                and self.tools[pred.next_tool_name].private_defaults
+                else {}
+            )
+
             trajectory[f"thought_{idx}"] = pred.next_thought
             trajectory[f"tool_name_{idx}"] = pred.next_tool_name
             trajectory[f"tool_args_{idx}"] = pred.next_tool_args
 
             try:
-                trajectory[f"observation_{idx}"] = self.tools[pred.next_tool_name](**pred.next_tool_args)
+                trajectory[f"observation_{idx}"] = self.tools[pred.next_tool_name](
+                    **pred.next_tool_args, **private_defaults
+                )
             except Exception as e:
+                # risk that the error log will capture the private defaults?
                 trajectory[f"observation_{idx}"] = f"Failed to execute: {e}"
 
             if pred.next_tool_name == "finish":