Skip to content

Commit b8d4c42

Browse files
chore: add Obot Website Knowledge tool
1 parent d03e7d0 commit b8d4c42

File tree

3 files changed

+61
-85
lines changed

3 files changed

+61
-85
lines changed

search/tavily/main.py

Lines changed: 46 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -7,33 +7,40 @@
77
from tavily import TavilyClient
88
from urllib3.util import parse_url
99

10+
tool_name = os.getenv("TAVILY_TOOL_NAME", "Tavily")
11+
if len(sys.argv) > 1:
12+
tool_name = sys.argv[1]
13+
1014

1115
def main():
1216
if len(sys.argv) < 2:
1317
print("Usage: python main.py [search | extract]")
1418
sys.exit(1)
1519

1620
command = sys.argv[1]
17-
client = TavilyClient() # env TAVILY_API_KEY required
1821

1922
match command:
20-
case "search" | "safe-search":
23+
case "site-search-context":
24+
site_search_context()
25+
sys.exit(0)
26+
case "search" | "site-search":
27+
client = TavilyClient() # env TAVILY_API_KEY required
2128
query = os.getenv("QUERY", "").strip()
2229
if not query:
2330
print("No search query provided")
2431
sys.exit(1)
2532

26-
domains_str = os.getenv("INCLUDE_DOMAINS", "")
27-
include_domains = [
28-
domain.strip() for domain in domains_str.split(",") if domain.strip()
29-
]
30-
31-
# safe-search is a special case where we only allow certain domains
33+
# site-search is a special case where we only allow certain domains
3234
# this is a different command so that we can use the same code for different tool implementations
33-
if command == "safe-search":
34-
include_domains = check_allowed_include_domains(include_domains)
35-
36-
max_results = 10 # broader search if general,
35+
if command == "site-search":
36+
include_domains = get_allowed_domains_or_fail()
37+
else:
38+
domains_str = os.getenv("INCLUDE_DOMAINS", "")
39+
include_domains = [
40+
domain.strip() for domain in domains_str.split(",") if domain.strip()
41+
]
42+
43+
max_results = 5 # broader search if general,
3744
if len(include_domains) > 0:
3845
max_results = 3 * len(
3946
include_domains
@@ -51,6 +58,7 @@ def main():
5158
include_domains=include_domains,
5259
)
5360
case "extract":
61+
client = TavilyClient() # env TAVILY_API_KEY required
5462
url = parse_url(os.getenv("URL").strip())
5563

5664
# default to https:// if no scheme is provided
@@ -76,43 +84,33 @@ def main():
7684
# print the response as a valid json object
7785
print(json.dumps(response))
7886

79-
80-
def check_allowed_include_domains(include_domains: List[str]) -> List[str]:
81-
# TAVILY_ALLOWED_DOMAINS has the TAVILY_ prefix as it will be set by Obot directly in the env,
82-
# while e.g. INCLUDE_DOMAINS is a tool parameter
83-
allowed_domains_str = os.getenv("TAVILY_ALLOWED_DOMAINS", "")
84-
allowed_domains = [
85-
domain.strip() for domain in allowed_domains_str.split(",") if domain.strip()
86-
]
87-
88-
if len(allowed_domains) == 0:
89-
print("No allowed domains provided")
87+
def site_search_context():
88+
print(f"""WEBSITE KNOWLEDGE:
89+
Use the {tool_name} website knowledge tool to search the following"
90+
configured domains:
91+
""")
92+
config = json.loads(os.getenv("OBOT_WEBSITE_KNOWLEDGE", "{}"))
93+
for site_def in config.get("sites", []):
94+
site = site_def.get("site", "")
95+
description = site_def.get("description", "")
96+
if site:
97+
print(f"DOMAIN: {site}\n")
98+
if description:
99+
print(f"DESCRIPTION: {description}\n")
100+
print(f"""END WEBSITE KNOWLEDGE
101+
""")
102+
103+
def get_allowed_domains_or_fail() -> List[str]:
104+
result = []
105+
config = json.loads(os.getenv("OBOT_WEBSITE_KNOWLEDGE", "{}"))
106+
for site_def in config.get("sites", []):
107+
site = site_def.get("site", "")
108+
if site:
109+
result.append(site)
110+
if len(result) == 0:
111+
logging.error("No allowed domains found in OBOT_WEBSITE_KNOWLEDGE")
90112
sys.exit(1)
91-
92-
# allow not setting INCLUDE_DOMAINS - fallback to all allowed domains
93-
if len(include_domains) == 0:
94-
return allowed_domains
95-
96-
allowed_include_domains = []
97-
disallowed_include_domains = []
98-
99-
for domain in include_domains:
100-
if domain in allowed_domains:
101-
allowed_include_domains.append(domain)
102-
else:
103-
disallowed_include_domains.append(domain)
104-
105-
if len(disallowed_include_domains) > 0:
106-
if os.getenv("TAVILY_ALLOWED_DOMAINS_STRICT", "").lower() == "true":
107-
print(
108-
f"Tried to access domains {disallowed_include_domains} which are not listed in allowed domains {allowed_domains}"
109-
)
110-
sys.exit(1)
111-
logging.warning(
112-
f"Filtered out {disallowed_include_domains} as they are not listed in allowed domains {allowed_domains}. Continuing with {allowed_include_domains}"
113-
)
114-
include_domains = allowed_include_domains
115-
return include_domains
113+
return result
116114

117115

118116
if __name__ == "__main__":

search/tavily/safesearch.gpt

Lines changed: 0 additions & 37 deletions
This file was deleted.

search/tavily/websiteknowledge.gpt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
Name: Obot Website Knowledge
2+
Description: Search explicit list of configured domains.
3+
Credential: ./credential
4+
Share Context: tavily-safe-search-context
5+
Metadata: icon: /admin/assets/obot_search_icon.png
6+
Param: query: The search query
7+
8+
#!/usr/bin/env python3 ${GPTSCRIPT_TOOL_DIR}/main.py site-search Obot
9+
10+
---
11+
Name: tavily-safe-search-context
12+
Type: context
13+
Share Context: ../../time
14+
15+
#!/usr/bin/env python3 ${GPTSCRIPT_TOOL_DIR}/main.py site-search-context Obot

0 commit comments

Comments
 (0)