+ "answer": "\"\"\"\nModule for creating the smart scraper\n\"\"\"\n\nfrom typing import Optional\nfrom pydantic import BaseModel\n\nfrom .base_graph import BaseGraph\nfrom .abstract_graph import AbstractGraph\n\nfrom ..nodes import (\n FetchNode,\n RAGNode,\n GenerateAnswerCSVNode\n)\n\n\nclass CSVScraperGraph(AbstractGraph):\n \"\"\"\n SmartScraper is a comprehensive web scraping tool that automates the process of extracting\n information from web pages using a natural language model to interpret and answer prompts.\n \"\"\"\n\n def __init__(self, prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None):\n \"\"\"\n Initializes the CSVScraperGraph with a prompt, source, and configuration.\n \"\"\"\n super().__init__(prompt, config, source, schema)\n\n self.input_key = \"csv\" if source.endswith(\"csv\") else \"csv_dir\"\n\n def _create_graph(self):\n \"\"\"\n Creates the graph of nodes representing the workflow for web scraping.\n \"\"\"\n fetch_node = FetchNode(\n input=\"csv | csv_dir\",\n output=[\"doc\"],\n )\n rag_node = RAGNode(\n input=\"user_prompt & doc\",\n output=[\"relevant_chunks\"],\n node_config={\n \"llm_model\": self.llm_model,\n \"embedder_model\": self.embedder_model,\n }\n )\n generate_answer_node = GenerateAnswerCSVNode(\n input=\"user_prompt & (relevant_chunks | doc)\",\n output=[\"answer\"],\n node_config={\n \"llm_model\": self.llm_model,\n \"schema\": self.schema,\n }\n )\n\n return BaseGraph(\n nodes=[\n fetch_node,\n rag_node,\n generate_answer_node,\n ],\n edges=[\n (fetch_node, rag_node),\n (rag_node, generate_answer_node)\n ],\n entry_point=fetch_node,\n graph_name=self.__class__.__name__\n )\n\n def run(self) -> str:\n \"\"\"\n Executes the web scraping process and returns the answer to the prompt.\n \"\"\"\n inputs = {\"user_prompt\": self.prompt, self.input_key: self.source}\n self.final_state, self.execution_info = self.graph.execute(inputs)\n\n return self.final_state.get(\"answer\", \"No answer found.\")\n"
0 commit comments