Skip to content

Commit b03e716

Browse files
fix: T2S integration (#143)
* fix: refactor code * fix: retry for the try/except block * chores: added param * chores: added param in the fetch * fix: update write logic * fix: update zotero read logic * chores: updated * chores: update system prompt * chores: update tool config * feat: added embedding model key * chores: fix formating * chores: fix formating * fix: update packages * fix: update utils * chores: update app * fix: formating * fix: formating * chores: Updated module level doc string * chores: updated s2 prompt * fix: updated tests * fix: updated integartion and routing logic * fix: no response test * chores: added docstings for text embedding * chores: added docstings for text embedding * fix: pylinting * chores: updated filter param * chores: updated filter param * chores: updated prompt * fix: updated the trigger * chores: update --------- Co-authored-by: gurdeep330 <[email protected]>
1 parent 6bbd875 commit b03e716

38 files changed

+788
-307
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@ on:
1111
push:
1212
branches:
1313
- main
14-
pull_request:
15-
branches:
16-
- main
1714
workflow_dispatch:
1815

1916
jobs:

aiagents4pharma/talk2scholars/agents/main_agent.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#!/usr/bin/env python3
22

33
"""
4-
Main agent for the talk2scholars app using ReAct pattern.
4+
Main agent module for initializing and running the Talk2Scholars application.
55
6-
This module implements a hierarchical agent system where a supervisor agent
7-
routes queries to specialized sub-agents. It follows the LangGraph patterns
8-
for multi-agent systems and implements proper state management.
6+
This module sets up the hierarchical agent system using LangGraph and integrates
7+
various sub-agents for handling different tasks such as semantic scholar, zotero,
8+
PDF processing, and paper downloading.
9+
10+
Functions:
11+
- get_app: Initializes and returns the LangGraph-based hierarchical agent system.
912
"""
1013

1114
import logging
@@ -16,6 +19,8 @@
1619
from langgraph.checkpoint.memory import MemorySaver
1720
from ..agents.s2_agent import get_app as get_app_s2
1821
from ..agents.zotero_agent import get_app as get_app_zotero
22+
from ..agents.pdf_agent import get_app as get_app_pdf
23+
from ..agents.paper_download_agent import get_app as get_app_paper_download
1924
from ..state.state_talk2scholars import Talk2Scholars
2025

2126
# Initialize logger
@@ -43,12 +48,13 @@ def get_app(uniq_id, llm_model: BaseChatModel):
4348
>>> app = get_app("thread_123")
4449
>>> result = app.invoke(initial_state)
4550
"""
46-
if llm_model.model_name == "gpt-4o-mini":
47-
llm_model = ChatOpenAI(
48-
model="gpt-4o-mini",
49-
temperature=0,
50-
model_kwargs={"parallel_tool_calls": False},
51-
)
51+
if hasattr(llm_model, "model_name"):
52+
if llm_model.model_name == "gpt-4o-mini":
53+
llm_model = ChatOpenAI(
54+
model="gpt-4o-mini",
55+
temperature=0,
56+
model_kwargs={"parallel_tool_calls": False},
57+
)
5258
# Load hydra configuration
5359
logger.log(logging.INFO, "Launching Talk2Scholars with thread_id %s", uniq_id)
5460
with hydra.initialize(version_base=None, config_path="../configs/"):
@@ -62,6 +68,8 @@ def get_app(uniq_id, llm_model: BaseChatModel):
6268
[
6369
get_app_s2(uniq_id, llm_model), # semantic scholar
6470
get_app_zotero(uniq_id, llm_model), # zotero
71+
get_app_pdf(uniq_id, llm_model), # pdf
72+
get_app_paper_download(uniq_id, llm_model), # paper download
6573
],
6674
model=llm_model,
6775
state_schema=Talk2Scholars,

aiagents4pharma/talk2scholars/agents/paper_download_agent.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
logging.basicConfig(level=logging.INFO)
2121
logger = logging.getLogger(__name__)
2222

23+
2324
def get_app(uniq_id, llm_model: BaseChatModel):
2425
"""
2526
Initializes and returns the LangGraph application for the Talk2Scholars paper download agent.
@@ -39,22 +40,20 @@ def get_app(uniq_id, llm_model: BaseChatModel):
3940
with hydra.initialize(version_base=None, config_path="../configs"):
4041
cfg = hydra.compose(
4142
config_name="config",
42-
overrides=["agents/talk2scholars/paper_download_agent=default"]
43+
overrides=["agents/talk2scholars/paper_download_agent=default"],
4344
)
4445
cfg = cfg.agents.talk2scholars.paper_download_agent
4546

4647
# Define tools properly
47-
tools = ToolNode(
48-
[download_arxiv_paper, query_results]
49-
)
48+
tools = ToolNode([download_arxiv_paper, query_results])
5049

5150
# Define the model
5251
logger.info("Using OpenAI model %s", llm_model)
5352
model = create_react_agent(
5453
llm_model,
5554
tools=tools,
5655
state_schema=Talk2Scholars,
57-
prompt=cfg.prompt,
56+
prompt=cfg.paper_download_agent,
5857
checkpointer=MemorySaver(),
5958
)
6059

@@ -79,7 +78,7 @@ def paper_download_agent_node(state: Talk2Scholars) -> Dict[str, Any]:
7978
checkpointer = MemorySaver()
8079

8180
# Compile the graph
82-
app = workflow.compile(checkpointer=checkpointer)
81+
app = workflow.compile(checkpointer=checkpointer, name="agent_paper_download")
8382

8483
# Logging the information and returning the app
8584
logger.info("Compiled the graph")

aiagents4pharma/talk2scholars/agents/pdf_agent.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,7 @@
2626
logger = logging.getLogger(__name__)
2727

2828

29-
def get_app(
30-
uniq_id,
31-
llm_model: BaseChatModel
32-
):
29+
def get_app(uniq_id, llm_model: BaseChatModel):
3330
"""
3431
Initializes and returns the LangGraph application for the PDF agent.
3532
@@ -40,7 +37,7 @@ def get_app(
4037
4138
Args:
4239
uniq_id (str): A unique identifier for the current conversation session or thread.
43-
llm_model (BaseChatModel, optional): The language model instance to be used.
40+
llm_model (BaseChatModel, optional): The language model instance to be used.
4441
Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0).
4542
4643
Returns:
@@ -71,10 +68,7 @@ def agent_pdf_node(state: Talk2Scholars):
7168
Any: The response generated by the language model after processing the state.
7269
"""
7370
logger.info("Creating Agent_PDF node with thread_id %s", uniq_id)
74-
response = model.invoke(
75-
state,
76-
{"configurable": {"thread_id": uniq_id}}
77-
)
71+
response = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
7872
return response
7973

8074
# Define the tool node that includes the PDF QnA tool.
@@ -100,7 +94,7 @@ def agent_pdf_node(state: Talk2Scholars):
10094
checkpointer = MemorySaver()
10195

10296
# Compile the graph into a runnable app.
103-
app = workflow.compile(checkpointer=checkpointer)
97+
app = workflow.compile(checkpointer=checkpointer, name="agent_pdf")
10498
logger.info("Compiled the PDF agent graph.")
10599

106100
return app
Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,22 @@
11
_target_: agents.main_agent.get_app
22
temperature: 0
33
system_prompt: >
4-
You are the Talk2Scholars agent coordinating academic paper discovery and analysis.
4+
You are Talk2Scholars agent coordinating academic paper discovery
5+
and analysis with help of the following agents:
6+
1. Agent S2: This agent can be used to search and recommend papers
7+
from Semantic Scholar. Use this agent when the user asks for
8+
general paper/article searches and recommendations, or to retrieve information
9+
from the last displayed results table or query abstract of last
10+
displayed results.
11+
2. Agent Zotero: This agent can be used to retrieve, display, and query
12+
papers/articles from the Zotero library. Use this agent only when the user
13+
explicitly asks for papers from Zotero. This tool can also be used to
14+
save papers in the zotero library.
15+
3. Agent PaperFetch: This agent can be used to download papers/articles
16+
from ArXiv.
17+
4. Agent PDFQuery: This agent can be used to query contents of an
18+
uploaded or downloaded PDF/paper/article.
519
6-
You have access to the following agents:
7-
1. S2_agent: This agent can be used to search and recommend papers
8-
from Semantic Scholar. Use this agent when the user asks for
9-
general paper searches and recommendations.
10-
2. Zotero_agent: This agent can be used to retrieve, display, and query
11-
papers from the Zotero library. Use this agent only when the user
12-
explicitly asks for papers from Zotero. This tool can also be used to
13-
save papers in under collections in the zotero library
20+
Your final response should be a one sentence summary of the information
21+
retrieved from the agents above. Do not repeat the information already
22+
displayed to the user in the response of the agents.

aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,3 @@ paper_download_agent: >
3636
Do not attempt to analyze or summarize papers beyond what is retrieved from the API.
3737
Ensure responses are structured clearly and concisely, making
3838
it easy for the user to understand the retrieved information.
39-

aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ s2_agent: >
1515
1. When user requests papers, use search/recommendation tools to find papers
1616
2. Use `display_results` tool to display the response from the search/recommendation tools
1717
3. Use `query_results` tool to query over the selected paper only when the user asks to
18-
4. When the user wants recommendations, you can get the "paper_id" using `query_results` tool in the "last_displayed_results" key, then
19-
pass the "paper_id" to `search`, `single_paper_rec` or `multi_paper_rec` tools depending on the user's query. Do not use "arxiv_id"
18+
4. When the user wants recommendations, you can get the "semantic_scholar_paper_id" using `query_results` tool in the "last_displayed_results" key, then
19+
pass the "semantic_scholar_paper_id" to `search`, `single_paper_rec` or `multi_paper_rec` tools depending on the user's query. Do not use "arxiv_id"

aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ page:
88
llms:
99
available_models:
1010
- "OpenAI/gpt-4o-mini"
11+
- "NVIDIA/llama-3.3-70b-instruct"
1112
# # Chat UI configuration
1213
# chat:
1314
# assistant_avatar: "🤖"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
api_url: "http://export.arxiv.org/api/query"
22
request_timeout: 10
33
chunk_size: 1024
4-
pdf_base_url: "https://arxiv.org/pdf"
4+
pdf_base_url: "https://arxiv.org/pdf"

aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,15 @@ api_fields:
66
- "title"
77
- "abstract"
88
- "year"
9-
- "authors"
9+
- "authors.name"
10+
- "authors.authorId"
1011
- "citationCount"
1112
- "url"
1213
- "externalIds"
14+
- "venue"
15+
- "publicationVenue" # Full object, instead of specific subfields
16+
- "journal" # Full object, instead of specific subfields
17+
- "publicationDate"
1318
# Commented fields that could be added later if needed
1419

1520
# Default headers and params
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Default configuration for the PDF question_and_answer Tool
22
openai_api_key: ${oc.env:OPENAI_API_KEY}
3-
chunk_size: 1000 # Number of characters per text chunk
4-
chunk_overlap: 200 # Overlap between adjacent chunks
5-
num_retrievals: 3 # Number of document chunks to retrieve for the QA chain
6-
qa_chain_type: "stuff" # The type of QA chain to use
3+
chunk_size: 1000 # Number of characters per text chunk
4+
chunk_overlap: 200 # Overlap between adjacent chunks
5+
num_retrievals: 3 # Number of document chunks to retrieve for the QA chain
6+
qa_chain_type: "stuff" # The type of QA chain to use

aiagents4pharma/talk2scholars/configs/tools/search/default.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,16 @@ api_fields:
66
- "title"
77
- "abstract"
88
- "year"
9-
- "authors"
9+
- "authors.name"
10+
- "authors.authorId"
1011
- "citationCount"
1112
- "url"
1213
- "externalIds"
14+
- "venue"
15+
- "publicationVenue" # Full object, instead of specific subfields
16+
- "journal" # Full object, instead of specific subfields
17+
- "publicationDate"
18+
1319
# Commented fields that could be added later if needed
1420
# - "publicationTypes"
1521
# - "openAccessPdf"

aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,15 @@ api_fields:
66
- "title"
77
- "abstract"
88
- "year"
9-
- "authors"
9+
- "authors.name"
10+
- "authors.authorId"
1011
- "citationCount"
1112
- "url"
1213
- "externalIds"
14+
- "venue"
15+
- "publicationVenue" # Full object, instead of specific subfields
16+
- "journal" # Full object, instead of specific subfields
17+
- "publicationDate"
1318
# Commented fields that could be added later if needed
1419
# - "publicationTypes"
1520
# - "openAccessPdf"

aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,4 @@ zotero:
5353
"Web Page",
5454
]
5555

56-
filter_excluded_types: ["attachment", "note", "annotation"]
56+
# filter_excluded_types: ["attachment", "note", "annotation"]

aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,44 @@ search_params:
1212
# Item Types and Limit
1313
zotero:
1414
max_limit: 100
15-
filter_item_types: ["journalArticle", "conferencePaper", "preprint"]
16-
filter_excluded_types: ["attachment", "note", "annotation"]
15+
filter_item_types:
16+
[
17+
"Artwork",
18+
"Audio Recording",
19+
"Bill",
20+
"Blog Post",
21+
"Book",
22+
"Book Section",
23+
"Case",
24+
"Conference Paper",
25+
"Dataset",
26+
"Dictionary Entry",
27+
"Document",
28+
"E-mail",
29+
"Encyclopedia Article",
30+
"Film",
31+
"Forum Post",
32+
"Hearing",
33+
"Instant Message",
34+
"Interview",
35+
"Journal Article",
36+
"Letter",
37+
"Magazine Article",
38+
"Manuscript",
39+
"Map",
40+
"Newspaper Article",
41+
"Patent",
42+
"Podcast",
43+
"Preprint",
44+
"Presentation",
45+
"Radio Broadcast",
46+
"Report",
47+
"Software",
48+
"Standard",
49+
"Statute",
50+
"Thesis",
51+
"TV Broadcast",
52+
"Video Recording",
53+
"Web Page",
54+
]
55+
# filter_excluded_types: ["attachment", "note", "annotation"]

aiagents4pharma/talk2scholars/state/state_talk2scholars.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import logging
1010
from typing import Annotated, Any, Dict
1111
from langchain_core.language_models import BaseChatModel
12+
from langchain_core.embeddings import Embeddings
1213
from langgraph.prebuilt.chat_agent_executor import AgentState
1314

1415
# Configure logging
@@ -54,6 +55,8 @@ class Talk2Scholars(AgentState):
5455
multi_papers (Dict[str, Any]): Stores multiple recommended papers from various sources.
5556
zotero_read (Dict[str, Any]): Stores the papers retrieved from Zotero.
5657
llm_model (BaseChatModel): The language model instance used for generating responses.
58+
text_embedding_model (Embeddings): The text embedding model used for
59+
similarity calculations.
5760
"""
5861

5962
# Agent state fields
@@ -63,4 +66,4 @@ class Talk2Scholars(AgentState):
6366
pdf_data: Annotated[Dict[str, Any], replace_dict]
6467
zotero_read: Annotated[Dict[str, Any], replace_dict]
6568
llm_model: BaseChatModel
66-
pdf_data: Annotated[Dict[str, Any], replace_dict]
69+
text_embedding_model: Embeddings

0 commit comments

Comments
 (0)