ScrapeGraphAI
diff --git a/‎Configurations/fireworks_config.json
+62 b/‎Configurations/fireworks_config.json
+62
diff --git a/‎Examples/anthropic_config.json
+3 b/‎Examples/anthropic_config.json
+3
diff --git a/‎Examples/azure_config.json
+6 b/‎Examples/azure_config.json
+6
@@ -0,0 +1,62 @@
+[
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping a specific website given a prompt using fireworks as a provider?",
+        "filename":"smart_scraper_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping a specific website given a prompt and a schema using fireworks as a provider?",
+        "filename":"smart_scraper_schema_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping a specific website given a prompt and searching on internet using fireworks as a provider?",
+        "filename":"search_graph_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping a specific website given a prompt and searching on the internet using fireworks as a provider and given a schema?",
+        "filename":"search_graph_schema_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping an XML given a prompt using fireworks as a provider?",
+        "filename":"xml_scraper_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping a CSV given a prompt using fireworks as a provider?",
+        "filename":"csv_scraper_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping plain text given a prompt using fireworks as a provider?",
+        "filename":"scrape_plain_text_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping a PDF given a prompt using fireworks as a provider?",
+        "filename":"pdf_scraper_graph_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai a custom graph using fireworks as a provider?",
+        "filename":"custom_graph_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for creating script in fireworks using beautifoulsoup?",
+        "filename":"script_generator_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for creating multiple scripts in fireworks using beautifoulsoup?",
+        "filename": "script_generator_multi_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping multiple XMLs in fireworks?",
+        "filename":"xml_scraper_graph_multi_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping multiple CSVs in fireworks?",
+        "filename":"csv_scraper_graph_multi_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping a single JSON in fireworks?",
+        "filename":"json_scraper_fireworks.py"
+    },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping multiple JSONs in fireworks?",
+        "filename":"json_scraper_multi_fireworks.py"
+    }
+]
@@ -39,6 +39,9 @@
         "prompt": "How to create a script in Scrapegraphai for creating script in haiku (anthropic) using beautifoulsoup?",
         "answer": "\"\"\" \nBasic example of scraping pipeline using ScriptCreatorGraph\n\"\"\"\n\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import ScriptCreatorGraph\nfrom scrapegraphai.utils import prettify_exec_info\n\nload_dotenv()\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\ngraph_config = {\n    \"llm\": {\n        \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n        \"model\": \"claude-3-haiku-20240307\",\n        \"max_tokens\": 4000\n        },\n}\n\n# ************************************************\n# Create the ScriptCreatorGraph instance and run it\n# ************************************************\n\nscript_creator_graph = ScriptCreatorGraph(\n    prompt=\"List me all the projects with their description.\",\n    # also accepts a string with the already downloaded HTML code\n    source=\"https://perinim.github.io/projects\",\n    config=graph_config\n)\n\nresult = script_creator_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = script_creator_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n\n"
     },
+    {
+        "prompt": "How to create a script in Scrapegraphai for creating multiple scripts in haiku (anthropic) using beautifoulsoup?"
+    },
     {
         "prompt": "How to create a script in Scrapegraphai for scraping multiple XMLs in haiku (anthropic)?",
         "answer": "\"\"\"\nBasic example of scraping pipeline using XMLScraperMultiGraph from XML documents\n\"\"\"\n\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import XMLScraperMultiGraph\nfrom scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info\nload_dotenv()\n\n# ************************************************\n# Read the XML file\n# ************************************************\n\nFILE_NAME = \"inputs/books.xml\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\nwith open(file_path, 'r', encoding=\"utf-8\") as file:\n    text = file.read()\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\ngraph_config = {\n    \"llm\": {\n        \"api_key\": os.getenv(\"ANTHROPIC_API_KEY\"),\n        \"model\": \"claude-3-haiku-20240307\",\n        \"max_tokens\": 4000},\n}\n\n# ************************************************\n# Create the XMLScraperMultiGraph instance and run it\n# ************************************************\n\nxml_scraper_graph = XMLScraperMultiGraph(\n    prompt=\"List me all the authors, title and genres of the books\",\n    source=[text, text],  # Pass the content of the file, not the file object\n    config=graph_config\n)\n\nresult = xml_scraper_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = xml_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n\n# Save to json or csv\nconvert_to_csv(result, \"result\")\nconvert_to_json(result, \"result\")\n"
 
@@ -27,6 +27,9 @@
         "prompt": "How to create a script in Scrapegraphai for scraping plain text given a prompt using azure as a provider?",
         "answer": "\"\"\" \nBasic example of scraping pipeline using SmartScraper from text\n\"\"\"\n\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import SmartScraperGraph\nfrom langchain_openai import AzureChatOpenAI\nfrom langchain_openai import AzureOpenAIEmbeddings\nfrom scrapegraphai.utils import prettify_exec_info\n\nload_dotenv()\n\n# ************************************************\n# Read the text file\n# ************************************************\n\nFILE_NAME = \"inputs/plain_html_example.txt\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\n# It could be also a http request using the request model\nwith open(file_path, 'r', encoding=\"utf-8\") as file:\n    text = file.read()\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\nllm_model_instance = AzureChatOpenAI(\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n    azure_deployment=os.environ[\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\"]\n)\n\nembedder_model_instance = AzureOpenAIEmbeddings(\n    azure_deployment=os.environ[\"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME\"],\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n)\n\n# ************************************************\n# Create the JSONScraperGraph instance and run it\n# ************************************************\n\ngraph_config = {\n    \"llm\": {\"model_instance\": llm_model_instance},\n    \"embeddings\": {\"model_instance\": embedder_model_instance}\n}\n\n# ************************************************\n# Create the SmartScraperGraph instance and run it\n# ************************************************\n\nsmart_scraper_graph = SmartScraperGraph(\n    prompt=\"List me all the projects with their description.\",\n    source=text,\n    config=graph_config\n)\n\nresult = smart_scraper_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = smart_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n"
     },
+    {
+        "prompt": "How to create a script in Scrapegraphai for scraping a PDF given a prompt using azure as a provider?"
+    },
     {
         "prompt": "How to create a script in Scrapegraphai a custom graph using azure as a provider?",
         "answer": "\"\"\"\nExample of custom graph using existing nodes\n\"\"\"\n\nimport os\nfrom dotenv import load_dotenv\nfrom langchain_openai import OpenAIEmbeddings\nfrom langchain_openai import AzureChatOpenAI\nfrom langchain_openai import AzureOpenAIEmbeddings\nfrom scrapegraphai.graphs import BaseGraph\nfrom scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode\nload_dotenv()\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\nllm_model_instance = AzureChatOpenAI(\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n    azure_deployment=os.environ[\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\"]\n)\n\nembedder_model_instance = AzureOpenAIEmbeddings(\n    azure_deployment=os.environ[\"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME\"],\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n)\n\n# ************************************************\n# Create the JSONScraperGraph instance and run it\n# ************************************************\n\ngraph_config = {\n    \"llm\": {\"model_instance\": llm_model_instance},\n    \"embeddings\": {\"model_instance\": embedder_model_instance}\n}\n# define the nodes for the graph\nrobot_node = RobotsNode(\n    input=\"url\",\n    output=[\"is_scrapable\"],\n    node_config={\n        \"llm_model\": llm_model_instance,\n        \"force_scraping\": True,\n        \"verbose\": True,\n        }\n)\n\nfetch_node = FetchNode(\n    input=\"url | local_dir\",\n    output=[\"doc\", \"link_urls\", \"img_urls\"],\n    node_config={\n        \"verbose\": True,\n        \"headless\": True,\n    }\n)\nparse_node = ParseNode(\n    input=\"doc\",\n    output=[\"parsed_doc\"],\n    node_config={\n        \"chunk_size\": 4096,\n        \"verbose\": True,\n    }\n)\nrag_node = RAGNode(\n    input=\"user_prompt & (parsed_doc | doc)\",\n    output=[\"relevant_chunks\"],\n    node_config={\n        \"llm_model\": llm_model_instance,\n        \"embedder_model\": embedder_model_instance,\n        \"verbose\": True,\n    }\n)\ngenerate_answer_node = GenerateAnswerNode(\n    input=\"user_prompt & (relevant_chunks | parsed_doc | doc)\",\n    output=[\"answer\"],\n    node_config={\n        \"llm_model\": llm_model_instance,\n        \"verbose\": True,\n    }\n)\n\n# ************************************************\n# Create the graph by defining the connections\n# ************************************************\n\ngraph = BaseGraph(\n    nodes=[\n        robot_node,\n        fetch_node,\n        parse_node,\n        rag_node,\n        generate_answer_node,\n    ],\n    edges=[\n        (robot_node, fetch_node),\n        (fetch_node, parse_node),\n        (parse_node, rag_node),\n        (rag_node, generate_answer_node)\n    ],\n    entry_point=robot_node\n)\n\n# ************************************************\n# Execute the graph\n# ************************************************\n\nresult, execution_info = graph.execute({\n    \"user_prompt\": \"Describe the content\",\n    \"url\": \"https://example.com/\"\n})\n\n# get the answer from the result\nresult = result.get(\"answer\", \"No answer found.\")\nprint(result)\n"
@@ -35,6 +38,9 @@
         "prompt": "How to create a script in Scrapegraphai for creating script in azure using beautifoulsoup?",
         "answer": "\"\"\" \nBasic example of scraping pipeline using ScriptCreatorGraph\n\"\"\"\n\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import ScriptCreatorGraph\nfrom langchain_openai import AzureChatOpenAI\nfrom langchain_openai import AzureOpenAIEmbeddings\nfrom scrapegraphai.utils import prettify_exec_info\n\nload_dotenv()\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\nllm_model_instance = AzureChatOpenAI(\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n    azure_deployment=os.environ[\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\"]\n)\n\nembedder_model_instance = AzureOpenAIEmbeddings(\n    azure_deployment=os.environ[\"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME\"],\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n)\ngraph_config = {\n    \"llm\": {\"model_instance\": llm_model_instance},\n    \"embeddings\": {\"model_instance\": embedder_model_instance},\n    \"library\": \"beautifulsoup\"\n}\n\n# ************************************************\n# Create the ScriptCreatorGraph instance and run it\n# ************************************************\n\nscript_creator_graph = ScriptCreatorGraph(\n    prompt=\"List me all the projects with their description.\",\n    # also accepts a string with the already downloaded HTML code\n    source=\"https://perinim.github.io/projects\",\n    config=graph_config\n)\n\nresult = script_creator_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = script_creator_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n\n"
     },
+    {
+        "prompt": "How to create a script in Scrapegraphai for creating multiple scripts in azure using beautifoulsoup?"
+    },
     {
         "prompt": "How to create a script in Scrapegraphai for scraping multiple XMLs in azure?",
         "answer": "\"\"\"\nBasic example of scraping pipeline using XMLScraperMultiGraph from XML documents\n\"\"\"\n\nimport os\nfrom dotenv import load_dotenv\nfrom scrapegraphai.graphs import XMLScraperMultiGraph\nfrom langchain_openai import AzureChatOpenAI\nfrom langchain_openai import AzureOpenAIEmbeddings\nfrom scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info\nload_dotenv()\n\n# ************************************************\n# Read the XML file\n# ************************************************\n\nFILE_NAME = \"inputs/books.xml\"\ncurr_dir = os.path.dirname(os.path.realpath(__file__))\nfile_path = os.path.join(curr_dir, FILE_NAME)\n\nwith open(file_path, 'r', encoding=\"utf-8\") as file:\n    text = file.read()\n\n# ************************************************\n# Define the configuration for the graph\n# ************************************************\n\nllm_model_instance = AzureChatOpenAI(\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n    azure_deployment=os.environ[\"AZURE_OPENAI_CHAT_DEPLOYMENT_NAME\"]\n)\n\nembedder_model_instance = AzureOpenAIEmbeddings(\n    azure_deployment=os.environ[\"AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME\"],\n    openai_api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n)\ngraph_config = {\n    \"llm\": {\"model_instance\": llm_model_instance},\n    \"embeddings\": {\"model_instance\": embedder_model_instance}\n}\n\n# ************************************************\n# Create the XMLScraperMultiGraph instance and run it\n# ************************************************\n\nxml_scraper_graph = XMLScraperMultiGraph(\n    prompt=\"List me all the authors, title and genres of the books\",\n    source=[text, text],  # Pass the content of the file, not the file object\n    config=graph_config\n)\n\nresult = xml_scraper_graph.run()\nprint(result)\n\n# ************************************************\n# Get graph execution info\n# ************************************************\n\ngraph_exec_info = xml_scraper_graph.get_execution_info()\nprint(prettify_exec_info(graph_exec_info))\n\n# Save to json or csv\nconvert_to_csv(result, \"result\")\nconvert_to_json(result, \"result\")\n"