diff --git a/open_deep_researcher/researcher/researcher.py b/open_deep_researcher/researcher/researcher.py index 4d4dac5..9b630c9 100644 --- a/open_deep_researcher/researcher/researcher.py +++ b/open_deep_researcher/researcher/researcher.py @@ -6,40 +6,53 @@ from typing import List, Dict, Optional, Tuple import google.generativeai as genai import time +import os # --------------------------- # Configuration Constants # --------------------------- -GEMINI_API_KEY = "ADD_YOUR_GEMINI_API_KEY" -SERPAPI_API_KEY = "ADD_YOUR_SERPAPI_API_KEY" -JINA_API_KEY = "ADD_YOUR_JINA_API_KEY" + + +GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") +SERPAPI_API_KEY = os.environ.get("SERPAPI_API_KEY") +JINA_API_KEY = os.environ.get("JINA_API_KEY") SERPAPI_URL = "https://serpapi.com/search" JINA_BASE_URL = "https://r.jina.ai/" genai.configure(api_key=GEMINI_API_KEY) + async def call_google_gemini(messages: List[Dict]) -> Optional[str]: """Call Google Gemini asynchronously.""" try: prompt = "\n".join([msg["content"] for msg in messages]) - model = genai.GenerativeModel('gemini-1.5-flash') + model = genai.GenerativeModel("gemini-1.5-flash") loop = asyncio.get_event_loop() - response = await loop.run_in_executor(None, lambda: model.generate_content(prompt)) + response = await loop.run_in_executor( + None, lambda: model.generate_content(prompt) + ) return response.text except Exception as e: return None -async def generate_search_queries_async(session: aiohttp.ClientSession, user_query: str) -> List[str]: + +async def generate_search_queries_async( + session: aiohttp.ClientSession, user_query: str +) -> List[str]: """Generate search queries based on user query.""" prompt = ( "You are an expert research assistant. Given the user's query, generate up to four distinct, " "precise search queries that would help gather complete information on the topic. " "Return only a valid list of plain strings. Do not include markdown, code blocks, backticks, or explanations. " - "Just return the list itself, for example: ['query1', 'query2', 'query3'].") + "Just return the list itself, for example: ['query1', 'query2', 'query3']." + ) messages = [ - {"role": "system", "content": "You are a helpful and precise research assistant."}, - {"role": "user", "content": f"User Query: {user_query}\n\n{prompt}"} + { + "role": "system", + "content": "You are a helpful and precise research assistant.", + }, + {"role": "user", "content": f"User Query: {user_query}\n\n{prompt}"}, ] response = await call_google_gemini(messages) if response: @@ -50,22 +63,24 @@ async def generate_search_queries_async(session: aiohttp.ClientSession, user_que return [] return [] + async def perform_search_async(session: aiohttp.ClientSession, query: str) -> List[str]: """Perform search using SERPAPI.""" - params = { - "q": query, - "api_key": SERPAPI_API_KEY, - "engine": "google" - } + params = {"q": query, "api_key": SERPAPI_API_KEY, "engine": "google"} try: async with session.get(SERPAPI_URL, params=params) as resp: if resp.status == 200: results = await resp.json() - return [item.get("link") for item in results.get("organic_results", []) if "link" in item] + return [ + item.get("link") + for item in results.get("organic_results", []) + if "link" in item + ] return [] except Exception: return [] + async def fetch_webpage_text_async(session: aiohttp.ClientSession, url: str) -> str: """Fetch webpage text using Jina API.""" full_url = f"{JINA_BASE_URL}{url}" @@ -76,7 +91,10 @@ async def fetch_webpage_text_async(session: aiohttp.ClientSession, url: str) -> except Exception: return "" -async def is_page_useful_async(session: aiohttp.ClientSession, user_query: str, page_text: str) -> bool: + +async def is_page_useful_async( + session: aiohttp.ClientSession, user_query: str, page_text: str +) -> bool: """Determine if the page content is useful for the query.""" prompt = ( "You are a critical research evaluator. Given the user's query and the content of a webpage, " @@ -84,17 +102,21 @@ async def is_page_useful_async(session: aiohttp.ClientSession, user_query: str, "Respond with exactly one word: 'Yes' if the page is useful, or 'No' if it is not." ) messages = [ - {"role": "system", "content": "You are a strict and concise evaluator of research relevance."}, - {"role": "user", "content": f"User Query: {user_query}\n\nWebpage Content:\n{page_text[:20000]}\n\n{prompt}"} + { + "role": "system", + "content": "You are a strict and concise evaluator of research relevance.", + }, + { + "role": "user", + "content": f"User Query: {user_query}\n\nWebpage Content:\n{page_text[:20000]}\n\n{prompt}", + }, ] response = await call_google_gemini(messages) return response and response.strip().lower() == "yes" + async def extract_relevant_context_async( - session: aiohttp.ClientSession, - user_query: str, - search_query: str, - page_text: str + session: aiohttp.ClientSession, user_query: str, search_query: str, page_text: str ) -> str: """Extract relevant information from page content.""" prompt = ( @@ -102,17 +124,24 @@ async def extract_relevant_context_async( "Return only the relevant context as plain text." ) messages = [ - {"role": "system", "content": "You are an expert in extracting relevant information."}, - {"role": "user", "content": f"Query: {user_query}\nSearch Query: {search_query}\n\nContent:\n{page_text[:20000]}\n\n{prompt}"} + { + "role": "system", + "content": "You are an expert in extracting relevant information.", + }, + { + "role": "user", + "content": f"Query: {user_query}\nSearch Query: {search_query}\n\nContent:\n{page_text[:20000]}\n\n{prompt}", + }, ] response = await call_google_gemini(messages) return response.strip() if response else "" + async def get_new_search_queries_async( session: aiohttp.ClientSession, user_query: str, previous_queries: List[str], - contexts: List[str] + contexts: List[str], ) -> List[str]: """Generate new search queries based on current findings.""" prompt = ( @@ -121,7 +150,10 @@ async def get_new_search_queries_async( ) messages = [ {"role": "system", "content": "You are a systematic research planner."}, - {"role": "user", "content": f"Query: {user_query}\nPrevious: {previous_queries}\nContexts:\n{''.join(contexts)}\n\n{prompt}"} + { + "role": "user", + "content": f"Query: {user_query}\nPrevious: {previous_queries}\nContexts:\n{''.join(contexts)}\n\n{prompt}", + }, ] response = await call_google_gemini(messages) if response: @@ -132,10 +164,9 @@ async def get_new_search_queries_async( return [] return [] + async def generate_final_report_async( - session: aiohttp.ClientSession, - user_query: str, - contexts: List[str] + session: aiohttp.ClientSession, user_query: str, contexts: List[str] ) -> str: """Generate final research report.""" prompt = ( @@ -144,18 +175,36 @@ async def generate_final_report_async( ) messages = [ {"role": "system", "content": "You are a skilled report writer."}, - {"role": "user", "content": f"Query: {user_query}\nContexts:\n{''.join(contexts)}\n\n{prompt}"} + { + "role": "user", + "content": f"Query: {user_query}\nContexts:\n{''.join(contexts)}\n\n{prompt}", + }, ] response = await call_google_gemini(messages) return response if response else "Unable to generate report." + class ResearchState(rx.State): """State management for the research assistant.""" + user_query: str = "" iteration_limit: int = 2 final_report: str = "" process_logs: list is_processing: bool = False + copy_success: bool = False + + def clear_form(self): + """Clear the form and reset state.""" + self.user_query = "" + self.final_report = "" + self.process_logs = "" + self.is_processing = False + self.copy_success = False + + def new_research(self): + """Start a new research session.""" + self.clear_form() def update_logs(self, message: str): """Update process logs with timestamp.""" @@ -165,15 +214,18 @@ def update_logs(self, message: str): else: self.process_logs = f"[{timestamp}] {message}" - - async def process_link(self, session: aiohttp.ClientSession, link: str, search_query: str) -> Optional[str]: + async def process_link( + self, session: aiohttp.ClientSession, link: str, search_query: str + ) -> Optional[str]: """Process a single link and extract relevant information.""" page_text = await fetch_webpage_text_async(session, link) if not page_text: return None if await is_page_useful_async(session, self.user_query, page_text): - context = await extract_relevant_context_async(session, self.user_query, search_query, page_text) + context = await extract_relevant_context_async( + session, self.user_query, search_query, page_text + ) return context return None @@ -190,19 +242,21 @@ async def handle_submit(self): async with aiohttp.ClientSession() as session: self.update_logs("Generating initial search queries...") yield - + queries = await generate_search_queries_async(session, self.user_query) if not queries: self.update_logs("No initial queries could be generated") yield return - self.update_logs(f"Generated {len(queries)} initial queries: {', '.join(queries)}") + self.update_logs( + f"Generated {len(queries)} initial queries: {', '.join(queries)}" + ) yield contexts = [] iteration = 0 - + while iteration < self.iteration_limit: self.update_logs(f"Starting research iteration {iteration + 1}") yield @@ -220,7 +274,7 @@ async def handle_submit(self): if len(all_links) >= 10: break all_links.extend(links) - + self.update_logs(f"Found {len(all_links)} links to process") yield @@ -228,34 +282,44 @@ async def handle_submit(self): for link in all_links: self.update_logs(f"Processing link: {link}") yield # Update UI after log entry - + context = await self.process_link(session, link, query) if context: - self.update_logs("Successfully extracted relevant information") + self.update_logs( + "Successfully extracted relevant information" + ) iteration_contexts.append(context) yield # Update UI after successful extraction else: self.update_logs("No useful information found in link") yield - - self.update_logs(f"Extracted information from {len(iteration_contexts)} sources") + + self.update_logs( + f"Extracted information from {len(iteration_contexts)} sources" + ) yield - + contexts.extend(iteration_contexts) - queries = await get_new_search_queries_async(session, self.user_query, queries, contexts) - + queries = await get_new_search_queries_async( + session, self.user_query, queries, contexts + ) + if not queries: self.update_logs("No more queries needed, research complete") yield break - - self.update_logs(f"Generated {len(queries)} new queries for next iteration") + + self.update_logs( + f"Generated {len(queries)} new queries for next iteration" + ) yield iteration += 1 self.update_logs("Generating final research report...") yield - self.final_report = await generate_final_report_async(session, self.user_query, contexts) + self.final_report = await generate_final_report_async( + session, self.user_query, contexts + ) self.update_logs("Research process completed successfully") except Exception as e: @@ -263,80 +327,273 @@ async def handle_submit(self): finally: self.is_processing = False yield - + + async def copy_to_clipboard(self): + """Copy report to clipboard and show success state.""" + # Set clipboard content using JavaScript + yield rx.set_clipboard(self.final_report) + + # Show success state + self.copy_success = True + yield + await asyncio.sleep(2) + self.copy_success = False + yield + + def export_to_pdf(self): + """Trigger PDF export using browser's print functionality.""" + return rx.call_script( + """ + const content = document.querySelector('#research-report').innerHTML; + const printWindow = window.open('', '_blank'); + printWindow.document.write(` + +
+