ScrapeGraphAI
diff --git a/‎Examples/antropic.json‎
Lines changed: 6 additions & 9 deletions b/‎Examples/antropic.json‎
Lines changed: 6 additions & 9 deletions
diff --git a/‎Examples/azure.json‎
Lines changed: 7 additions & 10 deletions b/‎Examples/azure.json‎
Lines changed: 7 additions & 10 deletions
@@ -44,18 +44,15 @@
         "answer": "Basic example of scraping pipeline using ScriptCreatorGraph\n\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import ScriptCreatorGraph\nfrom scrapegraphai.utils import prettify_exec_info\n\nload_dotenv()\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\ngraph_config = {\n    \"llm\": {\n        \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n        \"model\": \"claude-3-haiku-20240307\",\n        \"max_tokens\": 4000\n    }\n}\n\n# ************************************************\n# Create the ScriptCreatorGraph instance and run it\n# ************************************************\n\nscript_creator_graph = ScriptCreatorGraph(\n    prompt=\"List me all the projects with their description.\",\n    # also accepts a string with the already downloaded HTML code\n    source=\"https://perinim.github.io/projects\",\n    config=graph_config\n)\n\nresult = script_creator_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = script_creator_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))"
     },
     {
-        "prompt": "How to create a script in Scrapegraphai for scraping multiple XMLS in haiku (anthopic)? ",
-        "answer": ""
-
+        "prompt": "How to create a script in Scrapegraphai for scraping multiple XMLs in haiku (anthropic)?",
+        "answer": "Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents\n\n```python\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import XMLScraperMultiGraph\nfrom scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info\nload_dotenv()\n\n# ************************************************\n# Read the XML file\n# ************************************************\n\nFILE_NAME = \"inputs/books.xml\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\nwith open(file_path, 'r', encoding=\"utf-8\") as file:\n    text = file.read()\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\ngraph_config = {\n    \"llm\": {\n        \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n        \"model\": \"claude-3-haiku-20240307\",\n        \"max_tokens\": 4000},\n}\n\n# ************************************************\n# Create the XMLScraperMultiGraph instance and run it\n# ************************************************\n\nxml_scraper_graph = XMLScraperMultiGraph(\n    prompt=\"List me all the authors, title and genres of the books\",\n    source=[text, text],  # Pass the content of the file, not the file object\n    config=graph_config\n)\n\nresult = xml_scraper_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = xml_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n\n# Save to json or csv\nconvert_to_csv(result, \"result\")\nconvert_to_json(result, \"result\")\n```"
     },
     {
-        "prompt": "How to create a script in Scrapegraphai for scraping multiple CSVs in haiku (anthopic)? ",
-        "answer": ""
-
+        "prompt": "How to create a script in Scrapegraphai for scraping multiple CSVs in haiku (anthropic)?",
+        "answer": "Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents\n\n```python\nimport os\nfrom dotenv import load_dotenv\nimport pandas as pd\nfrom scrapegraphai.graphs import CSVScraperMultiGraph\nfrom scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info\n\nload_dotenv()\n# ************************************************\n# Read the CSV file\n# ************************************************\n\nFILE_NAME = \"inputs/username.csv\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\ntext = pd.read_csv(file_path)\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\ngraph_config = {\n    \"llm\": {\n        \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n        \"model\": \"claude-3-haiku-20240307\",\n        \"max_tokens\": 4000},\n}\n\n# ************************************************\n# Create the CSVScraperMultiGraph instance and run it\n# ************************************************\n\ncsv_scraper_graph = CSVScraperMultiGraph(\n    prompt=\"List me all the last names\",\n    source=[str(text), str(text)],\n    config=graph_config\n)\n\nresult = csv_scraper_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = csv_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n\n# Save to json or csv\nconvert_to_csv(result, \"result\")\nconvert_to_json(result, \"result\")\n```"
     },
     {
-        "prompt": "How to create a script in Scrapegraphai for scraping multiple JSONs in haiku (anthopic)? ",
-        "answer": ""
-
+        "prompt": "How to create a script in Scrapegraphai for scraping multiple JSONs in haiku (anthropic)?",
+        "answer": "Module for showing how JSONScraperMultiGraph multi works\n\n```python\nimport os\nimport json\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import JSONScraperMultiGraph\n\nload_dotenv()\n\ngraph_config = {\n    \"llm\": {\n        \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n        \"model\": \"claude-3-haiku-20240307\",\n        \"max_tokens\": 4000\n        },\n}\n\nFILE_NAME = \"inputs/example.json\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\nwith open(file_path, 'r', encoding=\"utf-8\") as file:\n    text = file.read()\n\nsources = [text, text]\n\nmultiple_search_graph = JSONScraperMultiGraph(\n    prompt= \"List me all the authors, title and genres of the books\",\n    source= sources,\n    schema=None,\n    config=graph_config\n)\n\nresult = multiple_search_graph.run()\nprint(json.dumps(result, indent=4))\n```"
     }
 ]
@@ -43,19 +43,16 @@
         "prompt": "How to create a script in Scrapegraphai for creating script in azure in beautifolusoup?",
         "answer": "Basic example of scraping pipeline using ScriptCreatorGraph import os from dotenv import load_dotenv from scrapegraphai.graphs import ScriptCreatorGraph from langchain_openai import AzureChatOpenAI from langchain_openai import AzureOpenAIEmbeddings from scrapegraphai.utils import prettify_exec_info load_dotenv() # ************************************************ # Define the configuration for the graph # ************************************************ llm_model_instance = AzureChatOpenAI( openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"], azure_deployment=os.environ[\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\"] ) embedder_model_instance = AzureOpenAIEmbeddings( azure_deployment=os.environ[\"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME\"], openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"] ) graph_config = { \"llm\": {\"model_instance\": llm_model_instance}, \"embeddings\": {\"model_instance\": embedder_model_instance} } # ************************************************ # Create the ScriptCreatorGraph instance and run it # ************************************************ script_creator_graph = ScriptCreatorGraph( prompt=\"List me all the projects with their description.\", # also accepts a string with the already downloaded HTML code source=\"https://perinim.github.io/projects\", config=graph_config ) result = script_creator_graph.run() print(result) # ************************************************ # Get graph execution info # ************************************************ graph_exec_info = script_creator_graph.get_execution_info() print(prettify_exec_info(graph_exec_info))"
     },
-    {
-        "prompt": "How to create a script in Scrapegraphai for scraping multiple XMLS in haiku (anthopic)? ",
-        "answer": ""
-
+      {
+        "prompt": "How to create a script in Scrapegraphai for scraping multiple XMLS in azure ?",
+        "answer": "Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents\n\n```python\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import XMLScraperMultiGraph\nfrom langchain_openai import AzureChatOpenAI\nfrom langchain_openai import AzureOpenAIEmbeddings\nfrom scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info\nload_dotenv()\n\n# ************************************************\n# Read the XML file\n# ************************************************\n\nFILE_NAME = \"inputs/books.xml\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\nwith open(file_path, 'r', encoding=\"utf-8\") as file:\n    text = file.read()\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\nllm_model_instance = AzureChatOpenAI(\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n    azure_deployment=os.environ[\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\"]\n)\n\nembedder_model_instance = AzureOpenAIEmbeddings(\n    azure_deployment=os.environ[\"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME\"],\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\n)\ngraph_config = {\n    \"llm\": {\"model_instance\": llm_model_instance},\n    \"embeddings\": {\"model_instance\": embedder_model_instance}\n}\n\n# ************************************************\n# Create the XMLScraperMultiGraph instance and run it\n# ************************************************\n\nxml_scraper_graph = XMLScraperMultiGraph(\n    prompt=\"List me all the authors, title and genres of the books\",\n    source=[text, text],  # Pass the content of the file, not the file object\n    config=graph_config\n)\n\nresult = xml_scraper_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = xml_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n\n# Save to json or csv\nconvert_to_csv(result, \"result\")\nconvert_to_json(result, \"result\")\n```"
     },
     {
-        "prompt": "How to create a script in Scrapegraphai for scraping multiple CSVs in haiku (anthopic)? ",
-        "answer": ""
-
+        "prompt": "How to create a script in Scrapegraphai for scraping multiple CSVs in azure ?",
+        "answer": "Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents\n\n```python\nimport os\nfrom dotenv import load_dotenv\nimport pandas as pd\nfrom scrapegraphai.graphs import CSVScraperMultiGraph\nfrom langchain_openai import AzureChatOpenAI\nfrom langchain_openai import AzureOpenAIEmbeddings\nfrom scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info\n\nload_dotenv()\n# ************************************************\n# Read the CSV file\n# ************************************************\n\nFILE_NAME = \"inputs/username.csv\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\ntext = pd.read_csv(file_path)\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\nllm_model_instance = AzureChatOpenAI(\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n    azure_deployment=os.environ[\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\"]\n)\n\nembedder_model_instance = AzureOpenAIEmbeddings(\n    azure_deployment=os.environ[\"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME\"],\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\n)\ngraph_config = {\n    \"llm\": {\"model_instance\": llm_model_instance},\n    \"embeddings\": {\"model_instance\": embedder_model_instance}\n}\n# ************************************************\n# Create the CSVScraperMultiGraph instance and run it\n# ************************************************\n\ncsv_scraper_graph = CSVScraperMultiGraph(\n    prompt=\"List me all the last names\",\n    source=[str(text), str(text)],\n    config=graph_config\n)\n\nresult = csv_scraper_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = csv_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n\n# Save to json or csv\nconvert_to_csv(result, \"result\")\nconvert_to_json(result, \"result\")\n```"
     },
     {
-        "prompt": "How to create a script in Scrapegraphai for scraping multiple JSONs in haiku (anthopic)? ",
-        "answer": ""
-
+        "prompt": "How to create a script in Scrapegraphai for scraping multiple JSONs in azure ?",
+        "answer": "Module for showing how JSONScraperMultiGraph multi works\n\n```python\nimport os\nimport json\nfrom langchain_openai import AzureChatOpenAI\nfrom langchain_openai import AzureOpenAIEmbeddings\nfrom scrapegraphai.graphs import JSONScraperMultiGraph\n\nllm_model_instance = AzureChatOpenAI(\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n    azure_deployment=os.environ[\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\"]\n)\n\nembedder_model_instance = AzureOpenAIEmbeddings(\n    azure_deployment=os.environ[\"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME\"],\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"]\n)\ngraph_config = {\n    \"llm\": {\"model_instance\": llm_model_instance},\n    \"embeddings\": {\"model_instance\": embedder_model_instance}\n}\nFILE_NAME = \"inputs/example.json\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\nwith open(file_path, 'r', encoding=\"utf-8\") as file:\n    text = file.read()\n\nsources = [text, text]\n\nmultiple_search_graph = JSONScraperMultiGraph(\n    prompt= \"List me all the authors, title and genres of the books\",\n    source= sources,\n    schema=None,\n    config=graph_config\n)\n\nresult = multiple_search_graph.run()\nprint(json.dumps(result, indent=4))\n```"
     }
 ]
Original file line number	Diff line number	Diff line change
`@@ -44,18 +44,15 @@`
`44`	`44`	"answer": "Basic example of scraping pipeline using ScriptCreatorGraph\n\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import ScriptCreatorGraph\nfrom scrapegraphai.utils import prettify_exec_info\n\nload_dotenv()\n\n# **********************************************\n# Define the configuration for the graph\n# ********************************************\n\ngraph_config = {\n \"llm\": {\n \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n \"model\": \"claude-3-haiku-20240307\",\n \"max_tokens\": 4000\n }\n}\n\n# ********************************************\n# Create the ScriptCreatorGraph instance and run it\n# ********************************************\n\nscript_creator_graph = ScriptCreatorGraph(\n prompt=\"List me all the projects with their description.\",\n # also accepts a string with the already downloaded HTML code\n source=\"https://perinim.github.io/projects\",\n config=graph_config\n)\n\nresult = script_creator_graph.run()\nprint(result)\n\n# ********************************************\n# Get graph execution info\n# **********************************************\n\ngraph_exec_info = script_creator_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))"
`45`	`45`	`},`
`46`	`46`	`{`
`47`		`- "prompt": "How to create a script in Scrapegraphai for scraping multiple XMLS in haiku (anthopic)? ",`
`48`		`- "answer": ""`
`49`		`-`
	`47`	`+ "prompt": "How to create a script in Scrapegraphai for scraping multiple XMLs in haiku (anthropic)?",`
	`48`	+ "answer": "Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents\n\n```python\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import XMLScraperMultiGraph\nfrom scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info\nload_dotenv()\n\n# **********************************************\n# Read the XML file\n# ********************************************\n\nFILE_NAME = \"inputs/books.xml\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\nwith open(file_path, 'r', encoding=\"utf-8\") as file:\n text = file.read()\n\n# ********************************************\n# Define the configuration for the graph\n# ********************************************\n\ngraph_config = {\n \"llm\": {\n \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n \"model\": \"claude-3-haiku-20240307\",\n \"max_tokens\": 4000},\n}\n\n# ********************************************\n# Create the XMLScraperMultiGraph instance and run it\n# ********************************************\n\nxml_scraper_graph = XMLScraperMultiGraph(\n prompt=\"List me all the authors, title and genres of the books\",\n source=[text, text], # Pass the content of the file, not the file object\n config=graph_config\n)\n\nresult = xml_scraper_graph.run()\nprint(result)\n\n# ********************************************\n# Get graph execution info\n# **********************************************\n\ngraph_exec_info = xml_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n\n# Save to json or csv\nconvert_to_csv(result, \"result\")\nconvert_to_json(result, \"result\")\n```"
`50`	`49`	`},`
`51`	`50`	`{`
`52`		`- "prompt": "How to create a script in Scrapegraphai for scraping multiple CSVs in haiku (anthopic)? ",`
`53`		`- "answer": ""`
`54`		`-`
	`51`	`+ "prompt": "How to create a script in Scrapegraphai for scraping multiple CSVs in haiku (anthropic)?",`
	`52`	+ "answer": "Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents\n\n```python\nimport os\nfrom dotenv import load_dotenv\nimport pandas as pd\nfrom scrapegraphai.graphs import CSVScraperMultiGraph\nfrom scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info\n\nload_dotenv()\n# **********************************************\n# Read the CSV file\n# ********************************************\n\nFILE_NAME = \"inputs/username.csv\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\ntext = pd.read_csv(file_path)\n\n# ********************************************\n# Define the configuration for the graph\n# ********************************************\n\ngraph_config = {\n \"llm\": {\n \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n \"model\": \"claude-3-haiku-20240307\",\n \"max_tokens\": 4000},\n}\n\n# ********************************************\n# Create the CSVScraperMultiGraph instance and run it\n# ********************************************\n\ncsv_scraper_graph = CSVScraperMultiGraph(\n prompt=\"List me all the last names\",\n source=[str(text), str(text)],\n config=graph_config\n)\n\nresult = csv_scraper_graph.run()\nprint(result)\n\n# ********************************************\n# Get graph execution info\n# **********************************************\n\ngraph_exec_info = csv_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n\n# Save to json or csv\nconvert_to_csv(result, \"result\")\nconvert_to_json(result, \"result\")\n```"
`55`	`53`	`},`
`56`	`54`	`{`
`57`		`- "prompt": "How to create a script in Scrapegraphai for scraping multiple JSONs in haiku (anthopic)? ",`
`58`		`- "answer": ""`
`59`		`-`
	`55`	`+ "prompt": "How to create a script in Scrapegraphai for scraping multiple JSONs in haiku (anthropic)?",`
	`56`	+ "answer": "Module for showing how JSONScraperMultiGraph multi works\n\n```python\nimport os\nimport json\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import JSONScraperMultiGraph\n\nload_dotenv()\n\ngraph_config = {\n \"llm\": {\n \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n \"model\": \"claude-3-haiku-20240307\",\n \"max_tokens\": 4000\n },\n}\n\nFILE_NAME = \"inputs/example.json\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\nwith open(file_path, 'r', encoding=\"utf-8\") as file:\n text = file.read()\n\nsources = [text, text]\n\nmultiple_search_graph = JSONScraperMultiGraph(\n prompt= \"List me all the authors, title and genres of the books\",\n source= sources,\n schema=None,\n config=graph_config\n)\n\nresult = multiple_search_graph.run()\nprint(json.dumps(result, indent=4))\n```"
`60`	`57`	`}`
`61`	`58`	`]`