updated docs

unifyai · Sep 4, 2024 · fd72209 · fd72209
1 parent 1e295ad
commit fd72209
Show file tree

Hide file tree

Showing 17 changed files with 62 additions and 242 deletions.
diff --git a/api-reference/llm_queries/chat_completions.mdx b/api-reference/llm_queries/chat_completions.mdx
@@ -182,8 +182,8 @@ curl --request POST \
     "response_format": "{ \"type\": \"json_mode\"}",
     "seed": 11,
     "stream_options": [
-        "include_usage",
-        true
+        true,
+        "include_usage"
     ],
     "top_p": 0.5,
     "tool_choice": "{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}",
@@ -203,7 +203,7 @@ url = "https://api.unify.ai/v0/chat/completions"
 
 headers = {"Authorization": "Bearer <token>"}
 
-json_input = {"messages": [{"content": "Tell me a joke", "role": "user"}], "model": "gpt-4o-mini@openai", "max_tokens": 1024, "stop": ["The End.", " is the answer."], "stream": False, "temperature": 0.9, "frequency_penalty": 1.5, "logit_bias": {"0": 10, "1": -75, "2": 90}, "logprobs": False, "top_logprobs": 15, "n": 15, "presence_penalty": -1.1, "response_format": "{ "type": "json_mode"}", "seed": 11, "stream_options": ["include_usage", True], "top_p": 0.5, "tool_choice": "{"type": "function", "function": {"name": "my_function"}}", "parallel_tool_calls": True, "user": "some_user", "signature": "python", "use_custom_keys": True, "tags": True, "drop_params": True}
+json_input = {"messages": [{"content": "Tell me a joke", "role": "user"}], "model": "gpt-4o-mini@openai", "max_tokens": 1024, "stop": ["The End.", " is the answer."], "stream": False, "temperature": 0.9, "frequency_penalty": 1.5, "logit_bias": {"0": 10, "1": -75, "2": 90}, "logprobs": False, "top_logprobs": 15, "n": 15, "presence_penalty": -1.1, "response_format": "{ "type": "json_mode"}", "seed": 11, "stream_options": [True, "include_usage"], "top_p": 0.5, "tool_choice": "{"type": "function", "function": {"name": "my_function"}}", "parallel_tool_calls": True, "user": "some_user", "signature": "python", "use_custom_keys": True, "tags": True, "drop_params": True}
 
 response = requests.request("POST", url, json=json_input, headers=headers)
 

diff --git a/api-reference/openapi.json b/api-reference/openapi.json
@@ -3506,8 +3506,8 @@
                         "title": "Stream Options",
                         "description": "Options for streaming response. Only set this when you set `stream: true`.",
                         "example": [
-                            "include_usage",
-                            true
+                            true,
+                            "include_usage"
                         ]
                     },
                     "top_p": {

diff --git a/mint.json b/mint.json
@@ -100,6 +100,7 @@
             "pages": [
                 "python/dataset",
                 "python/evaluation",
+                "python/evaluator",
                 "python/exceptions",
                 {
                     "group": "queries",

diff --git a/python/evaluation.mdx b/python/evaluation.mdx
@@ -17,10 +17,9 @@ class Evaluation()
 ### \_\_init\_\_
 
 ```python
-def __init__(agent: Union[Agent, str],
-             dataset: Union[Dataset, str],
-             evaluator: Union[Evaluator, str],
-             auto_sync: bool = False,
+def __init__(agent: Union[str, Client, Agent],
+             dataset: Union[str, Dataset],
+             evaluator: Union[str, Evaluator],
              api_key: Optional[str] = None)
 ```
 
@@ -35,47 +34,6 @@ Initialize a local evaluation for a dataset of LLM queries.
 
 - `evaluator` - The evaluator that has been judging the quality of responses.
 
-- `auto_sync` - Whether to automatically keep this dataset fully synchronized
-  with the upstream variant at all times.
-
-- `api_key` - API key for accessing the Unify API. If None, it attempts to
-  retrieve the API key from the environment variable UNIFY_KEY. Defaults to
-  None.
-
-
-**Raises**:
-
-- `UnifyError` - If the API key is missing.
-
-<a id="evaluation.Evaluation.from_upstream"></a>
-
----
-
-### from\_upstream
-
-```python
-@staticmethod
-def from_upstream(agent: str,
-                  dataset: str,
-                  evaluator: str,
-                  auto_sync: bool = False,
-                  api_key: Optional[str] = None)
-```
-
-Initialize a local evaluation for a dataset of LLM queries.
-
-**Arguments**:
-
-- `agent` - The agent that is being evaluated, either a local LLM agent or a
-  string for an endpoint available in the platform.
-
-- `dataset` - The dataset that the evaluation has been performed on.
-
-- `evaluator` - The evaluator that has been judging the quality of responses.
-
-- `auto_sync` - Whether to automatically keep this dataset fully synchronized
-  with the upstream variant at all times.
-
 - `api_key` - API key for accessing the Unify API. If None, it attempts to
   retrieve the API key from the environment variable UNIFY_KEY. Defaults to
   None.
@@ -85,184 +43,4 @@ Initialize a local evaluation for a dataset of LLM queries.
 
 - `UnifyError` - If the API key is missing.
 
-<a id="evaluation.Evaluation.from_file"></a>
-
----
-
-### from\_file
-
-```python
-@staticmethod
-def from_file(filepath: str,
-              agent: Union[Agent, str],
-              evaluator: Union[Evaluator, str],
-              auto_sync: bool = False,
-              api_key: Optional[str] = None)
-```
-
-Loads the evaluation from a local .jsonl filepath.
-
-**Arguments**:
-
-- `filepath` - Filepath (.jsonl) to load the dataset from.
-
-- `agent` - The agent that is being evaluated, either a local LLM agent or a
-  string for an endpoint available in the platform.
-
-- `evaluator` - The evaluator that has been judging the quality of responses.
-
-- `auto_sync` - Whether to automatically keep this dataset fully synchronized
-  with the upstream variant at all times.
-
-- `api_key` - API key for accessing the Unify API. If None, it attempts to
-  retrieve the API key from the environment variable UNIFY_KEY. Defaults to
-  None.
-
-<a id="evaluation.Evaluation.upload"></a>
-
----
-
-### upload
-
-```python
-def upload(overwrite=False)
-```
-
-Uploads all unique local data in the dataset evaluation to the user account
-upstream. This function will not download any uniques from upstream.
-Use `sync` to synchronize and superset the datasets in both directions.
-Set `overwrite=True` to disregard any pre-existing upstream data.
-
-**Arguments**:
-
-- `overwrite` - Whether to overwrite the upstream dataset if it already exists
-
-<a id="evaluation.Evaluation.download"></a>
-
----
-
-### download
-
-```python
-def download(overwrite=False)
-```
-
-Downloads all unique upstream data from the user account to the local dataset.
-This function will not upload any unique values stored locally.
-Use `sync` to synchronize and superset the datasets in both directions.
-Set `overwrite=True` to disregard any pre-existing data stored in this class.
-
-**Arguments**:
-
-- `overwrite` - Whether to overwrite the local data, if any already exists
-
-<a id="evaluation.Evaluation.sync"></a>
-
----
-
-### sync
-
-```python
-def sync()
-```
-
-Synchronize the dataset in both directions, downloading any values missing
-locally, and uploading any values missing from upstream in the account.
-
-<a id="evaluation.Evaluation.upstream_diff"></a>
-
----
-
-### upstream\_diff
-
-```python
-def upstream_diff()
-```
-
-Prints the difference between the local dataset and the upstream dataset.
-
-<a id="evaluation.Evaluation.save_to_file"></a>
-
----
-
-### save\_to\_file
-
-```python
-def save_to_file(filepath: str)
-```
-
-Saves to dataset to a local .jsonl filepath.
-
-**Arguments**:
-
-- `filepath` - Filepath (.jsonl) to save the dataset to.
-
-<a id="evaluation.Evaluation.add"></a>
-
----
-
-### add
-
-```python
-def add(other: __class__)
-```
-
-Adds another dataset to this one, return a new Dataset instance, with this
-new dataset receiving all unique queries from the other added dataset.
-
-**Arguments**:
-
-- `other` - The other dataset being added to this one.
-
-<a id="evaluation.Evaluation.sub"></a>
-
----
-
-### sub
-
-```python
-def sub(other: __class__)
-```
-
-Subtracts another dataset from this one, return a new Dataset instance, with
-this new dataset losing all queries from the other subtracted dataset.
-
-**Arguments**:
-
-- `other` - The other dataset being added to this one.
-
-<a id="evaluation.Evaluation.__iadd__"></a>
-
----
-
-### \_\_iadd\_\_
-
-```python
-def __iadd__(other)
-```
-
-Adds another dataset to this one, with this dataset receiving all unique queries
-from the other added dataset.
-
-**Arguments**:
-
-- `other` - The other dataset being added to this one.
-
-<a id="evaluation.Evaluation.__isub__"></a>
-
----
-
-### \_\_isub\_\_
-
-```python
-def __isub__(other)
-```
-
-Subtracts another dataset from this one, with this dataset losing all queries
-from the other subtracted dataset.
-
-**Arguments**:
-
-- `other` - The other dataset being added to this one.
-
-<a id="evaluator"></a>
+<a id="__init__"></a>
diff --git a/python/evaluator.mdx b/python/evaluator.mdx
@@ -0,0 +1,41 @@
+---
+title: 'evaluator'
+---
+
+<a id="evaluator.Evaluator"></a>
+
+## Evaluator
+
+```python
+class Evaluator(abc.ABC)
+```
+
+<a id="evaluator.Evaluator.evaluate"></a>
+
+---
+
+### evaluate
+
+```python
+@abstractmethod
+def evaluate(agent: Union[str, Client, Agent],
+             dataset: Union[str, Dataset],
+             default_query: Query = None)
+```
+
+Evaluate the agent on the given dataset, based on this evaluator.
+
+**Arguments**:
+
+- `agent` - Name of the endpoint or handle to the local Agent (possibly
+  multi-step LLM calls) to evaluate.
+
+- `dataset` - Name of the uploaded dataset or handle to the local Dataset
+  instance to evaluate
+
+- `default_query` - The default query for evaluation, which each unique query in
+  the dataset will inherit from, overwriting the extra fields. This query can
+  therefore include temperature, system prompt, tools etc. which are not
+  present in each query in the dataset.
+
+<a id="utils"></a>
diff --git a/python/exceptions.mdx b/python/exceptions.mdx
@@ -92,4 +92,4 @@ class InternalServerError(UnifyError)
 
 Exception raised for HTTP 500 Internal Server Error errors.
 
-<a id="_caching"></a>
+<a id="evaluator"></a>
diff --git a/python/queries/chat.mdx b/python/queries/chat.mdx
@@ -207,4 +207,3 @@ Starts the chat interaction loop.
 - `show_credits` - Whether to show credit consumption. Defaults to False.
 - `show_provider` - Whether to show the provider used. Defaults to False.
 
-<a id="queries.multi_llm"></a>
diff --git a/python/queries/clients.mdx b/python/queries/clients.mdx
@@ -1250,4 +1250,4 @@ class AsyncUnify(Client)
 Class for interacting with the Unify chat completions endpoint in a synchronous
 manner.
 
-<a id="queries.chat"></a>
+<a id="queries.multi_llm"></a>
diff --git a/python/queries/multi_llm.mdx b/python/queries/multi_llm.mdx
@@ -195,3 +195,4 @@ Generate content using the Unify API.
 
 - `UnifyError` - If an error occurs during content generation.
 
+<a id="queries.chat"></a>
diff --git a/python/utils/credits.mdx b/python/utils/credits.mdx
@@ -28,4 +28,4 @@ Returns the credits remaining in the user account, in USD.
 
 - `ValueError` - If there was an HTTP error.
 
-<a id="utils.router_training"></a>
+<a id="utils.efficiency_benchmarks"></a>
diff --git a/python/utils/custom_api_keys.mdx b/python/utils/custom_api_keys.mdx
@@ -143,4 +143,4 @@ Get a list of custom API keys associated with the user's account.
   A list of dictionaries containing custom API key information.
   Each dictionary has 'name' and 'value' keys.
 
-<a id="utils.helpers"></a>
+<a id="utils.evaluators"></a>
diff --git a/python/utils/custom_endpoints.mdx b/python/utils/custom_endpoints.mdx
@@ -129,4 +129,4 @@ Get a list of custom endpoints for the authenticated user.
 
 - `requests.exceptions.RequestException` - If the API request fails.
 
-<a id="utils.evaluators"></a>
+<a id="utils.supported_endpoints"></a>
diff --git a/python/utils/datasets.mdx b/python/utils/datasets.mdx
@@ -147,4 +147,4 @@ Fetches a list of all uploaded datasets.
 
 - `ValueError` - If there was an HTTP error.
 
-<a id="utils.supported_endpoints"></a>
+<a id="utils.logging"></a>
diff --git a/python/utils/evaluations.mdx b/python/utils/evaluations.mdx
@@ -161,4 +161,4 @@ Get the evaluation status for a specific dataset, endpoint, and evaluator.
 
   A dictionary containing the evaluation status information.
 
-<a id="utils.custom_api_keys"></a>
+<a id="utils.helpers"></a>
diff --git a/python/utils/evaluators.mdx b/python/utils/evaluators.mdx
@@ -136,4 +136,4 @@ Get a list of available evaluators.
 
   A list of available evaluator names if successful, otherwise an empty list.
 
-<a id="queries"></a>
+<a id="utils.router_training"></a>
diff --git a/python/utils/logging.mdx b/python/utils/logging.mdx
@@ -86,4 +86,4 @@ Get query metrics for specified parameters.
 
   A dictionary containing the query metrics.
 
-<a id="utils.router_deployment"></a>
+<a id="utils.custom_api_keys"></a>
diff --git a/python/utils/supported_endpoints.mdx b/python/utils/supported_endpoints.mdx
@@ -94,4 +94,4 @@ provider.
 - `BadRequestError` - If there was an HTTP error.
 - `ValueError` - If there was an error parsing the JSON response.
 
-<a id="utils.efficiency_benchmarks"></a>
+<a id="utils.router_deployment"></a>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -92,4 +92,4 @@ class InternalServerError(UnifyError)

		Exception raised for HTTP 500 Internal Server Error errors.

		<a id="_caching"></a>
		<a id="evaluator"></a>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -207,4 +207,3 @@ Starts the chat interaction loop.
		- `show_credits` - Whether to show credit consumption. Defaults to False.
		- `show_provider` - Whether to show the provider used. Defaults to False.

		<a id="queries.multi_llm"></a>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -195,3 +195,4 @@ Generate content using the Unify API.

		- `UnifyError` - If an error occurs during content generation.

		<a id="queries.chat"></a>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -28,4 +28,4 @@ Returns the credits remaining in the user account, in USD.

		- `ValueError` - If there was an HTTP error.

		<a id="utils.router_training"></a>
		<a id="utils.efficiency_benchmarks"></a>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -129,4 +129,4 @@ Get a list of custom endpoints for the authenticated user.

		- `requests.exceptions.RequestException` - If the API request fails.

		<a id="utils.evaluators"></a>
		<a id="utils.supported_endpoints"></a>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -147,4 +147,4 @@ Fetches a list of all uploaded datasets.

		- `ValueError` - If there was an HTTP error.

		<a id="utils.supported_endpoints"></a>
		<a id="utils.logging"></a>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -161,4 +161,4 @@ Get the evaluation status for a specific dataset, endpoint, and evaluator.

		A dictionary containing the evaluation status information.

		<a id="utils.custom_api_keys"></a>
		<a id="utils.helpers"></a>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -136,4 +136,4 @@ Get a list of available evaluators.

		A list of available evaluator names if successful, otherwise an empty list.

		<a id="queries"></a>
		<a id="utils.router_training"></a>
Original file line number	Diff line number	Diff line change
Expand Up		@@ -86,4 +86,4 @@ Get query metrics for specified parameters.

		A dictionary containing the query metrics.

		<a id="utils.router_deployment"></a>
		<a id="utils.custom_api_keys"></a>