Skip to content

Commit d38e342

Browse files
committed
add new files for configuration
1 parent b6a060c commit d38e342

24 files changed

+29
-23
lines changed

Examples/openai_config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[
22
{
33
"prompt": "How to create a script in Scrapegraphai for scraping a specific website given a prompt using openai as a provider?",
4-
"answer": "\"\"\" \nBasic example of scraping pipeline using SmartScraper\n\"\"\"\n\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import SmartScraperGraph\nfrom scrapegraphai.utils import prettify_exec_info\n\nload_dotenv()\n\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\nopenai_key = os.getenv(\"OPENAI_APIKEY\")\n\ngraph_config = {\n \"llm\": {\n \"api_key\": openai_key,\n \"model\": \"gpt-3.5-turbo\",\n },\n \"verbose\": True,\n \"headless\": False,\n}\n\n# ************************************************\n# Create the SmartScraperGraph instance and run it\n# ************************************************\n\nsmart_scraper_graph = SmartScraperGraph(\n prompt=\"List me all the projects with their description\",\n # also accepts a string with the already downloaded HTML code\n source=\"https://perinim.github.io/projects/\",\n config=graph_config,\n)\n\nresult = smart_scraper_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = smart_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n"
4+
"answer": "\"\"\" \nBasic example of scraping pipeline using SmartScraper\n\"\"\"\n\nimport os, json\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import SmartScraperGraph\nfrom scrapegraphai.utils import prettify_exec_info\n\nload_dotenv()\n\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\nopenai_key = os.getenv(\"OPENAI_APIKEY\")\n\ngraph_config = {\n \"llm\": {\n \"api_key\": openai_key,\n \"model\": \"gpt-3.5-turbo\",\n },\n \"verbose\": True,\n \"headless\": False,\n}\n\n# ************************************************\n# Create the SmartScraperGraph instance and run it\n# ************************************************\n\nsmart_scraper_graph = SmartScraperGraph(\n prompt=\"List me all the projects with their description\",\n # also accepts a string with the already downloaded HTML code\n source=\"https://perinim.github.io/projects/\",\n config=graph_config,\n)\n\nresult = smart_scraper_graph.run()\nprint(json.dumps(result, indent=4))\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = smart_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n"
55
},
66
{
77
"prompt": "How to create a script in Scrapegraphai for scraping a specific website given a prompt and a schema using openai as a provider?",

implementation/graphs_abstract_graph.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

implementation/graphs_base_graph.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[
22
{
33
"prompt": "how is csv_scraper_graph implemented in Scrapegraphai?",
4-
"answer": "\"\"\"\nModule for creating the smart scraper\n\"\"\"\n\nfrom typing import Optional\nfrom pydantic import BaseModel\n\nfrom .base_graph import BaseGraph\nfrom .abstract_graph import AbstractGraph\n\nfrom ..nodes import (\n FetchNode,\n RAGNode,\n GenerateAnswerCSVNode\n)\n\n\nclass CSVScraperGraph(AbstractGraph):\n \"\"\"\n SmartScraper is a comprehensive web scraping tool that automates the process of extracting\n information from web pages using a natural language model to interpret and answer prompts.\n \"\"\"\n\n def __init__(self, prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None):\n \"\"\"\n Initializes the CSVScraperGraph with a prompt, source, and configuration.\n \"\"\"\n super().__init__(prompt, config, source, schema)\n\n self.input_key = \"csv\" if source.endswith(\"csv\") else \"csv_dir\"\n\n def _create_graph(self):\n \"\"\"\n Creates the graph of nodes representing the workflow for web scraping.\n \"\"\"\n fetch_node = FetchNode(\n input=\"csv | csv_dir\",\n output=[\"doc\"],\n )\n rag_node = RAGNode(\n input=\"user_prompt & doc\",\n output=[\"relevant_chunks\"],\n node_config={\n \"llm_model\": self.llm_model,\n \"embedder_model\": self.embedder_model,\n }\n )\n generate_answer_node = GenerateAnswerCSVNode(\n input=\"user_prompt & (relevant_chunks | doc)\",\n output=[\"answer\"],\n node_config={\n \"llm_model\": self.llm_model,\n \"schema\": self.schema,\n }\n )\n\n return BaseGraph(\n nodes=[\n fetch_node,\n rag_node,\n generate_answer_node,\n ],\n edges=[\n (fetch_node, rag_node),\n (rag_node, generate_answer_node)\n ],\n entry_point=fetch_node\n )\n\n def run(self) -> str:\n \"\"\"\n Executes the web scraping process and returns the answer to the prompt.\n \"\"\"\n inputs = {\"user_prompt\": self.prompt, self.input_key: self.source}\n self.final_state, self.execution_info = self.graph.execute(inputs)\n\n return self.final_state.get(\"answer\", \"No answer found.\")\n"
4+
"answer": "\"\"\"\nModule for creating the smart scraper\n\"\"\"\n\nfrom typing import Optional\nfrom pydantic import BaseModel\n\nfrom .base_graph import BaseGraph\nfrom .abstract_graph import AbstractGraph\n\nfrom ..nodes import (\n FetchNode,\n RAGNode,\n GenerateAnswerCSVNode\n)\n\n\nclass CSVScraperGraph(AbstractGraph):\n \"\"\"\n SmartScraper is a comprehensive web scraping tool that automates the process of extracting\n information from web pages using a natural language model to interpret and answer prompts.\n \"\"\"\n\n def __init__(self, prompt: str, source: str, config: dict, schema: Optional[BaseModel] = None):\n \"\"\"\n Initializes the CSVScraperGraph with a prompt, source, and configuration.\n \"\"\"\n super().__init__(prompt, config, source, schema)\n\n self.input_key = \"csv\" if source.endswith(\"csv\") else \"csv_dir\"\n\n def _create_graph(self):\n \"\"\"\n Creates the graph of nodes representing the workflow for web scraping.\n \"\"\"\n fetch_node = FetchNode(\n input=\"csv | csv_dir\",\n output=[\"doc\"],\n )\n rag_node = RAGNode(\n input=\"user_prompt & doc\",\n output=[\"relevant_chunks\"],\n node_config={\n \"llm_model\": self.llm_model,\n \"embedder_model\": self.embedder_model,\n }\n )\n generate_answer_node = GenerateAnswerCSVNode(\n input=\"user_prompt & (relevant_chunks | doc)\",\n output=[\"answer\"],\n node_config={\n \"llm_model\": self.llm_model,\n \"schema\": self.schema,\n }\n )\n\n return BaseGraph(\n nodes=[\n fetch_node,\n rag_node,\n generate_answer_node,\n ],\n edges=[\n (fetch_node, rag_node),\n (rag_node, generate_answer_node)\n ],\n entry_point=fetch_node,\n graph_name=self.__class__.__name__\n )\n\n def run(self) -> str:\n \"\"\"\n Executes the web scraping process and returns the answer to the prompt.\n \"\"\"\n inputs = {\"user_prompt\": self.prompt, self.input_key: self.source}\n self.final_state, self.execution_info = self.graph.execute(inputs)\n\n return self.final_state.get(\"answer\", \"No answer found.\")\n"
55
}
66
]
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[
22
{
33
"prompt": "how is csv_scraper_multi_graph implemented in Scrapegraphai?",
4-
"answer": "\"\"\" \nCSVScraperMultiGraph Module\n\"\"\"\n\nfrom copy import copy, deepcopy\nfrom typing import List, Optional\n\nfrom .base_graph import BaseGraph\nfrom .abstract_graph import AbstractGraph\nfrom .csv_scraper_graph import CSVScraperGraph\n\nfrom ..nodes import (\n GraphIteratorNode,\n MergeAnswersNode\n)\n\n\nclass CSVScraperMultiGraph(AbstractGraph):\n \"\"\" \n CSVScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.\n It only requires a user prompt and a list of URLs.\n\n Attributes:\n prompt (str): The user prompt to search the internet.\n llm_model (dict): The configuration for the language model.\n embedder_model (dict): The configuration for the embedder model.\n headless (bool): A flag to run the browser in headless mode.\n verbose (bool): A flag to display the execution information.\n model_token (int): The token limit for the language model.\n\n Args:\n prompt (str): The user prompt to search the internet.\n source (List[str]): The source of the graph.\n config (dict): Configuration parameters for the graph.\n schema (Optional[str]): The schema for the graph output.\n\n Example:\n >>> search_graph = MultipleSearchGraph(\n ... \"What is Chioggia famous for?\",\n ... {\"llm\": {\"model\": \"gpt-3.5-turbo\"}}\n ... )\n >>> result = search_graph.run()\n \"\"\"\n\n def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):\n\n self.max_results = config.get(\"max_results\", 3)\n\n if all(isinstance(value, str) for value in config.values()):\n self.copy_config = copy(config)\n else:\n self.copy_config = deepcopy(config)\n\n super().__init__(prompt, config, source, schema)\n\n def _create_graph(self) -> BaseGraph:\n \"\"\"\n Creates the graph of nodes representing the workflow for web scraping and searching.\n\n Returns:\n BaseGraph: A graph instance representing the web scraping and searching workflow.\n \"\"\"\n\n # ************************************************\n # Create a SmartScraperGraph instance\n # ************************************************\n\n smart_scraper_instance = CSVScraperGraph(\n prompt=\"\",\n source=\"\",\n config=self.copy_config,\n )\n\n # ************************************************\n # Define the graph nodes\n # ************************************************\n\n graph_iterator_node = GraphIteratorNode(\n input=\"user_prompt & jsons\",\n output=[\"results\"],\n node_config={\n \"graph_instance\": smart_scraper_instance,\n }\n )\n\n merge_answers_node = MergeAnswersNode(\n input=\"user_prompt & results\",\n output=[\"answer\"],\n node_config={\n \"llm_model\": self.llm_model,\n \"schema\": self.schema\n }\n )\n\n return BaseGraph(\n nodes=[\n graph_iterator_node,\n merge_answers_node,\n ],\n edges=[\n (graph_iterator_node, merge_answers_node),\n ],\n entry_point=graph_iterator_node\n )\n\n def run(self) -> str:\n \"\"\"\n Executes the web scraping and searching process.\n\n Returns:\n str: The answer to the prompt.\n \"\"\"\n inputs = {\"user_prompt\": self.prompt, \"jsons\": self.source}\n self.final_state, self.execution_info = self.graph.execute(inputs)\n\n return self.final_state.get(\"answer\", \"No answer found.\")\n"
4+
"answer": "\"\"\" \nCSVScraperMultiGraph Module\n\"\"\"\n\nfrom copy import copy, deepcopy\nfrom typing import List, Optional\n\nfrom .base_graph import BaseGraph\nfrom .abstract_graph import AbstractGraph\nfrom .csv_scraper_graph import CSVScraperGraph\n\nfrom ..nodes import (\n GraphIteratorNode,\n MergeAnswersNode\n)\n\n\nclass CSVScraperMultiGraph(AbstractGraph):\n \"\"\" \n CSVScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.\n It only requires a user prompt and a list of URLs.\n\n Attributes:\n prompt (str): The user prompt to search the internet.\n llm_model (dict): The configuration for the language model.\n embedder_model (dict): The configuration for the embedder model.\n headless (bool): A flag to run the browser in headless mode.\n verbose (bool): A flag to display the execution information.\n model_token (int): The token limit for the language model.\n\n Args:\n prompt (str): The user prompt to search the internet.\n source (List[str]): The source of the graph.\n config (dict): Configuration parameters for the graph.\n schema (Optional[str]): The schema for the graph output.\n\n Example:\n >>> search_graph = MultipleSearchGraph(\n ... \"What is Chioggia famous for?\",\n ... {\"llm\": {\"model\": \"gpt-3.5-turbo\"}}\n ... )\n >>> result = search_graph.run()\n \"\"\"\n\n def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):\n\n self.max_results = config.get(\"max_results\", 3)\n\n if all(isinstance(value, str) for value in config.values()):\n self.copy_config = copy(config)\n else:\n self.copy_config = deepcopy(config)\n\n super().__init__(prompt, config, source, schema)\n\n def _create_graph(self) -> BaseGraph:\n \"\"\"\n Creates the graph of nodes representing the workflow for web scraping and searching.\n\n Returns:\n BaseGraph: A graph instance representing the web scraping and searching workflow.\n \"\"\"\n\n # ************************************************\n # Create a SmartScraperGraph instance\n # ************************************************\n\n smart_scraper_instance = CSVScraperGraph(\n prompt=\"\",\n source=\"\",\n config=self.copy_config,\n )\n\n # ************************************************\n # Define the graph nodes\n # ************************************************\n\n graph_iterator_node = GraphIteratorNode(\n input=\"user_prompt & jsons\",\n output=[\"results\"],\n node_config={\n \"graph_instance\": smart_scraper_instance,\n }\n )\n\n merge_answers_node = MergeAnswersNode(\n input=\"user_prompt & results\",\n output=[\"answer\"],\n node_config={\n \"llm_model\": self.llm_model,\n \"schema\": self.schema\n }\n )\n\n return BaseGraph(\n nodes=[\n graph_iterator_node,\n merge_answers_node,\n ],\n edges=[\n (graph_iterator_node, merge_answers_node),\n ],\n entry_point=graph_iterator_node,\n graph_name=self.__class__.__name__\n )\n\n def run(self) -> str:\n \"\"\"\n Executes the web scraping and searching process.\n\n Returns:\n str: The answer to the prompt.\n \"\"\"\n inputs = {\"user_prompt\": self.prompt, \"jsons\": self.source}\n self.final_state, self.execution_info = self.graph.execute(inputs)\n\n return self.final_state.get(\"answer\", \"No answer found.\")\n"
55
}
66
]

0 commit comments

Comments
 (0)