Skip to content

Commit

Permalink
Add agent example use case to generate query, positive and negative e…
Browse files Browse the repository at this point in the history
…xamples (#451)
  • Loading branch information
zechengz authored Jun 20, 2024
1 parent 6df0552 commit b481c72
Show file tree
Hide file tree
Showing 8 changed files with 322 additions and 0 deletions.
14 changes: 14 additions & 0 deletions camel/configs/openai_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,19 @@ class ChatGPTConfig(BaseConfig):
(default: :obj:`1.0`)
n (int, optional): How many chat completion choices to generate for
each input message. (default: :obj:`1`)
response_format (object, optional): An object specifying the format
that the model must output. Compatible with GPT-4 Turbo and all
GPT-3.5 Turbo models newer than gpt-3.5-turbo-1106. Setting to
{"type": "json_object"} enables JSON mode, which guarantees the
message the model generates is valid JSON. Important: when using
JSON mode, you must also instruct the model to produce JSON
yourself via a system or user message. Without this, the model
may generate an unending stream of whitespace until the generation
reaches the token limit, resulting in a long-running and seemingly
"stuck" request. Also note that the message content may be
partially cut off if finish_reason="length", which indicates the
generation exceeded max_tokens or the conversation exceeded the
max context length.
stream (bool, optional): If True, partial message deltas will be sent
as data-only server-sent events as they become available.
(default: :obj:`False`)
Expand Down Expand Up @@ -95,6 +108,7 @@ class ChatGPTConfig(BaseConfig):
stop: str | Sequence[str] | NotGiven = NOT_GIVEN
max_tokens: int | NotGiven = NOT_GIVEN
presence_penalty: float = 0.0
response_format: dict | NotGiven = NOT_GIVEN
frequency_penalty: float = 0.0
logit_bias: dict = field(default_factory=dict)
user: str = ""
Expand Down
4 changes: 4 additions & 0 deletions camel/prompts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
from .code import CodePromptTemplateDict
from .descripte_video_prompt import DescriptionVideoPromptTemplateDict
from .evaluation import EvaluationPromptTemplateDict
from .generate_text_embedding_data import (
GenerateTextEmbeddingDataPromptTemplateDict,
)
from .misalignment import MisalignmentPromptTemplateDict
from .object_recognition import ObjectRecognitionPromptTemplateDict
from .prompt_templates import PromptTemplateGenerator
Expand All @@ -37,6 +40,7 @@
'TaskPromptTemplateDict',
'PromptTemplateGenerator',
'SolutionExtractionPromptTemplateDict',
'GenerateTextEmbeddingDataPromptTemplateDict',
'ObjectRecognitionPromptTemplateDict',
'DescriptionVideoPromptTemplateDict',
]
79 changes: 79 additions & 0 deletions camel/prompts/generate_text_embedding_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Any

from camel.prompts import TextPrompt, TextPromptDict
from camel.types import RoleType


# flake8: noqa :E501
class GenerateTextEmbeddingDataPromptTemplateDict(TextPromptDict):
r"""A :obj:`TextPrompt` dictionary containing text embedding tasks
generation, query, positive and hard negative samples generation,
from the `"Improving Text Embeddings with Large Language Models"
<https://arxiv.org/abs/2401.00368>`_ paper.
Attributes:
GENERATE_TASKS (TextPrompt): A prompt to generate a list
of :obj:`num_tasks` synthetic text_embedding tasks.
ASSISTANT_PROMPT (TextPrompt): A system prompt for the AI assistant
to generate synthetic :obj:`user_query`, :obj:`positive document`,
and :obj:`hard_negative_document` for a specific :obj:`task` with
specified parameters including :obj:`query_type`,
:obj:`query_length`, :obj:`clarity`, :obj:`num_words`,
:obj:`language` and :obj:`difficulty`.
"""

GENERATE_TASKS = TextPrompt(
"""You are an expert to brainstorm a list of {num_tasks} potentially useful text retrieval tasks
Here are a few examples for your reference:
- Provided a scientific claim as query, retrieve documents that help verify or refute the claim.
- Search for documents that answers a FAQ-style query on children's nutrition.
Please adhere to the following guidelines:
- Specify what the query is, and what the desired documents are.
- Each retrieval task should cover a wide range of queries, and should not be too specific.
Your output should always be a python list of strings starting with `1.`, `2.` etc.
And each element corresponds to a distinct retrieval task in one sentence.
Do not explain yourself or output anything else.
Be creative!"""
)

ASSISTANT_PROMPT = TextPrompt(
"""You have been assigned a retrieval task: {task}
Your mission is to write one text retrieval example for this task in JSON format. The JSON object must
contain the following keys:
- "user_query": a string, a random user search query specified by the retrieval task.
- "positive_document": a string, a relevant document for the user query.
- "hard_negative_document": a string, a hard negative document that only appears relevant to the query.
Please adhere to the following guidelines:
- The "user_query" should be {query_type}, {query_length}, {clarity}, and diverse in topic.
- All documents must be created independent of the query. Avoid copying the query verbatim.
It's acceptable if some parts of the "positive_document" are not topically related to the query.
- All documents should be at least {num_words} words long.
- The "hard_negative_document" contains some useful information, but it should be less useful or comprehensive compared to the "positive_document".
- Both the query and documents should be in {language}.
- Do not provide any explanation in any document on why it is relevant or not relevant to the query.
- Both the query and documents require {difficulty} level education to understand.
Your output must always be a JSON object only (starting and ending with curly brackets), do not explain yourself or output anything else. Be creative!"""
)

def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.update(
{
"generate_tasks": self.GENERATE_TASKS,
RoleType.ASSISTANT: self.ASSISTANT_PROMPT,
}
)
4 changes: 4 additions & 0 deletions camel/prompts/task_prompt_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from camel.prompts.evaluation import (
EvaluationPromptTemplateDict,
)
from camel.prompts.generate_text_embedding_data import (
GenerateTextEmbeddingDataPromptTemplateDict,
)
from camel.prompts.misalignment import MisalignmentPromptTemplateDict
from camel.prompts.object_recognition import (
ObjectRecognitionPromptTemplateDict,
Expand Down Expand Up @@ -60,6 +63,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
TaskType.SOLUTION_EXTRACTION: SolutionExtractionPromptTemplateDict(), # noqa: E501
TaskType.ROLE_DESCRIPTION: RoleDescriptionPromptTemplateDict(),
TaskType.OBJECT_RECOGNITION: ObjectRecognitionPromptTemplateDict(), # noqa: E501
TaskType.GENERATE_TEXT_EMBEDDING_DATA: GenerateTextEmbeddingDataPromptTemplateDict(), # noqa: E501
TaskType.VIDEO_DESCRIPTION: DescriptionVideoPromptTemplateDict(), # noqa: E501
}
)
1 change: 1 addition & 0 deletions camel/types/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ class TaskType(Enum):
EVALUATION = "evaluation"
SOLUTION_EXTRACTION = "solution_extraction"
ROLE_DESCRIPTION = "role_description"
GENERATE_TEXT_EMBEDDING_DATA = "generate_text_embedding_data"
OBJECT_RECOGNITION = "object_recognition"
DEFAULT = "default"
VIDEO_DESCRIPTION = "video_description"
Expand Down
Loading

0 comments on commit b481c72

Please sign in to comment.