|
| 1 | +[ |
| 2 | + { |
| 3 | + "prompt": "What is the base node in ScrapeGraphAI?", |
| 4 | + "answer": "The ScrapegraphAI `BaseNode` is an abstract base class for nodes in a graph-based workflow, designed to perform specific actions when executed." |
| 5 | + }, |
| 6 | + { |
| 7 | + "prompt": "What is the purpose of the BaseNode class in ScrapeGraphAI?", |
| 8 | + "answer": "The BaseNode class is an abstract base class for nodes in a graph-based workflow. It is designed to perform specific actions when executed." |
| 9 | + }, |
| 10 | + { |
| 11 | + "prompt": "What are the attributes of the BaseNode class in ScrapeGraphAI?", |
| 12 | + "answer": "The attributes of the BaseNode class are `node_name`, `input`, `output`, `min_input_len`, `node_config`, and `logger`." |
| 13 | + }, |
| 14 | + { |
| 15 | + "prompt": "What is the role of the `node_name` attribute in the `BaseNode` class of ScrapeGraphAI?", |
| 16 | + "answer": "The `node_name` attribute is a unique identifier name for the node in the `BaseNode` class." |
| 17 | + }, |
| 18 | + { |
| 19 | + "prompt": "What is the role of the input attribute in the BaseNode class in ScrapeGraphAI?", |
| 20 | + "answer": "The `input` attribute is a boolean expression defining the input keys needed from the state in the `BaseNode` class." |
| 21 | + }, |
| 22 | + { |
| 23 | + "prompt": "What is the role of the output attribute in the BaseNode class in ScrapeGraphAI?", |
| 24 | + "answer": "The `output` attribute is a list of output keys to be updated in the state in the `BaseNode` class." |
| 25 | + }, |
| 26 | + { |
| 27 | + "prompt": "What is the role of the min_input_len attribute in the BaseNode class? in ScrapeGraphAI", |
| 28 | + "answer": "The `min_input_len` attribute is the minimum required number of input keys in the `BaseNode` class." |
| 29 | + }, |
| 30 | + { |
| 31 | + "prompt": "What is the role of the node_config attribute in the BaseNode class? in ScrapeGraphAI", |
| 32 | + "answer": "The `node_config` attribute is an optional additional configuration for the node in the `BaseNode` class." |
| 33 | + }, |
| 34 | + { |
| 35 | + "prompt": "What is the role of the logger attribute in the BaseNode class in ScrapeGraphAI?", |
| 36 | + "answer": "The `logger` attribute is the centralized root logger in the `BaseNode` class." |
| 37 | + }, |
| 38 | + { |
| 39 | + "prompt": "What are the methods of the BaseNode class in ScrapeGraphAI?", |
| 40 | + "answer": "The methods of the `BaseNode` class are `__init__`, `execute`, `update_config`, `get_input_keys`, `_validate_input_keys`, and `_parse_input_keys`." |
| 41 | + }, |
| 42 | + { |
| 43 | + "prompt": "What is the purpose of the __init__ method in the BaseNode class in ScrapeGraphAI?", |
| 44 | + "answer": "The `__init__` method is the constructor method for initializing the `BaseNode` class with the required attributes." |
| 45 | + }, |
| 46 | + { |
| 47 | + "prompt": "What is the purpose of the execute method in the BaseNode class in ScrapeGraphAI?", |
| 48 | + "answer": "The `execute` method is an abstract method that is responsible for executing the node's logic based on the current state and updating it accordingly." |
| 49 | + }, |
| 50 | + { |
| 51 | + "prompt": "What is the purpose of the update_config method in the BaseNode class in ScrapeGraphAI?", |
| 52 | + "answer": "The `update_config` method is responsible for updating the `node_config` dictionary as well as attributes with the same key in the `BaseNode` class." |
| 53 | + }, |
| 54 | + { |
| 55 | + "prompt": "What is the purpose of the get_input_keys method in the BaseNode class in ScrapeGraphAI?", |
| 56 | + "answer": "The `get_input_keys` method is responsible for determining the necessary state keys based on the input specification in the BaseNode class." |
| 57 | + }, |
| 58 | + { |
| 59 | + "prompt": "What is the purpose of the _validate_input_keys method in the BaseNode class in ScrapeGraphAI?", |
| 60 | + "answer": "The `_validate_input_keys` method is responsible for validating if the provided input keys meet the minimum length requirement in the `BaseNode` class." |
| 61 | + }, |
| 62 | + { |
| 63 | + "prompt": "What is the purpose of the _parse_input_keys method in the BaseNode class in ScrapeGraphAI?", |
| 64 | + "answer": "The `_parse_input_keys` method is responsible for parsing the input keys expression to extract relevant keys from the state based on logical conditions in the `BaseNode` class." |
| 65 | + }, |
| 66 | + { |
| 67 | + "prompt": "Show me the implementation of the BaseNode in ScrapegraphAI.", |
| 68 | + "answer": "Sure, here's the implementation of the `BaseNode` in ScrapeGraphAI:\n\n```python\n\"\"\" \nBaseNode Module\n\"\"\"\n\nimport re\nfrom abc import ABC, abstractmethod\nfrom typing import List, Optional\n\nfrom ..utils import get_logger\n\n\nclass BaseNode(ABC):\n \"\"\"\n An abstract base class for nodes in a graph-based workflow, designed to perform specific actions when executed.\n\n Attributes:\n node_name (str): The unique identifier name for the node.\n input (str): Boolean expression defining the input keys needed from the state.\n output (List[str]): List of\n min_input_len (int): Minimum required number of input keys.\n node_config (Optional[dict]): Additional configuration for the node.\n logger (logging.Logger): The centralized root logger\n\n Args:\n node_name (str): Name for identifying the node.\n node_type (str): Type of the node; must be 'node' or 'conditional_node'.\n input (str): Expression defining the input keys needed from the state.\n output (List[str]): List of output keys to be updated in the state.\n min_input_len (int, optional): Minimum required number of input keys; defaults to 1.\n node_config (Optional[dict], optional): Additional configuration for the node; defaults to None.\n\n Raises:\n ValueError: If `node_type` is not one of the allowed types.\n\n Example:\n >>> class MyNode(BaseNode):\n ... def execute(self, state):\n ... # Implementation of node logic here\n ... return state\n ...\n >>> my_node = MyNode(\"ExampleNode\", \"node\", \"input_spec\", [\"output_spec\"])\n >>> updated_state = my_node.execute({'key': 'value'})\n {'key': 'value'}\n \"\"\"\n\n def __init__(\n self,\n node_name: str,\n node_type: str,\n input: str,\n output: List[str],\n min_input_len: int = 1,\n node_config: Optional[dict] = None,\n ):\n\n self.node_name = node_name\n self.input = input\n self.output = output\n self.min_input_len = min_input_len\n self.node_config = node_config\n self.logger = get_logger()\n\n if node_type not in [\"node\", \"conditional_node\"]:\n raise ValueError(\n f\"node_type must be 'node' or 'conditional_node', got '{node_type}'\"\n )\n self.node_type = node_type\n\n @abstractmethod\n def execute(self, state: dict) -> dict:\n \"\"\"\n Execute the node's logic based on the current state and update it accordingly.\n\n Args:\n state (dict): The current state of the graph.\n\n Returns:\n dict: The updated state after executing the node's logic.\n \"\"\"\n\n pass\n\n def update_config(self, params: dict, overwrite: bool = False):\n \"\"\"\n Updates the node_config dictionary as well as attributes with same key.\n\n Args:\n param (dict): The dictionary to update node_config with.\n overwrite (bool): Flag indicating if the values of node_config should be overwritten if their value is not None.\n \"\"\"\n if self.node_config is None:\n self.node_config = {}\n for key, val in params.items():\n if hasattr(self, key) and (key not in self.node_config or overwrite):\n self.node_config[key] = val\n setattr(self, key, val)\n\n def get_input_keys(self, state: dict) -> List[str]:\n \"\"\"\n Determines the necessary state keys based on the input specification.\n\n Args:\n state (dict): The current state of the graph used to parse input keys.\n\n Returns:\n List[str]: A list of input keys required for node operation.\n\n Raises:\n ValueError: If error occurs in parsing input keys.\n \"\"\"\n\n try:\n input_keys = self._parse_input_keys(state, self.input)\n self._validate_input_keys(input_keys)\n return input_keys\n except ValueError as e:\n raise ValueError(f\"Error parsing input keys for {self.node_name}: {str(e)}\")\n\n def _validate_input_keys(self, input_keys):\n \"\"\"\n Validates if the provided input keys meet the minimum length requirement.\n\n Args:\n input_keys (List[str]): The list of input keys to validate.\n\n Raises:\n ValueError: If the number of input keys is less than the minimum required.\n \"\"\"\n\n if len(input_keys) < self.min_input_len:\n raise ValueError(\n f\"\"\"{self.node_name} requires at least {self.min_input_len} input keys,\n got {len(input_keys)}.\"\"\"\n )\n\n def _parse_input_keys(self, state: dict, expression: str) -> List[str]:\n \"\"\"\n Parses the input keys expression to extract relevant keys from the state based on logical conditions.\n The expression can contain AND (&), OR (|), and parentheses to group conditions.\n\n Args:\n state (dict): The current state of the graph.\n expression (str): The input keys expression to parse.\n\n Returns:\n List[str]: A list of key names that match the input keys expression logic.\n\n Raises:\n ValueError: If the expression is invalid or if no state keys match the expression.\n \"\"\"\n\n # Check for empty expression\n if not expression:\n raise ValueError(\"Empty expression.\")\n\n # Check for adjacent state keys without an operator between them\n pattern = (\n r\"\\b(\"\n + \"|\".join(re.escape(key) for key in state.keys())\n + r\")(\\b\\s*\\b)(\"\n + \"|\".join(re.escape(key) for key in state.keys())\n + r\")\\b\"\n )\n if re.search(pattern, expression):\n raise ValueError(\n \"Adjacent state keys found without an operator between them.\"\n )\n\n # Remove spaces\n expression = expression.replace(\" \", \"\")\n\n # Check for operators with empty adjacent tokens or at the start/end\n if (\n expression[0] in \"&|\"\n or expression[-1] in \"&|\"\n or \"&&\" in expression\n or \"||\" in expression\n or \"&|\" in expression\n or \"|&\" in expression\n ):\n raise ValueError(\"Invalid operator usage.\")\n\n # Check for balanced parentheses and valid operator placement\n open_parentheses = close_parentheses = 0\n for i, char in enumerate(expression):\n if char == \"(\":\n open_parentheses += 1\n elif char == \")\":\n close_parentheses += 1\n # Check for invalid operator sequences\n if char in \"&|\" and i + 1 < len(expression) and expression[i + 1] in \"&|\":\n raise ValueError(\n \"Invalid operator placement: operators cannot be adjacent.\"\n )\n\n # Check for missing or balanced parentheses\n if open_parentheses != close_parentheses:\n raise ValueError(\"Missing or unbalanced parentheses in expression.\")\n\n # Helper function to evaluate an expression without parentheses\n def evaluate_simple_expression(exp: str) -> List[str]:\n \"\"\"Evaluate an expression without parentheses.\"\"\"\n\n # Split the expression by the OR operator and process each segment\n for or_segment in exp.split(\"|\"):\n\n # Check if all elements in an AND segment are in state\n and_segment = or_segment.split(\"&\")\n if all(elem.strip() in state for elem in and_segment):\n return [\n elem.strip() for elem in and_segment if elem.strip() in state\n ]\n return []\n\n # Helper function to evaluate expressions with parentheses\n def evaluate_expression(expression: str) -> List[str]:\n \"\"\"Evaluate an expression with parentheses.\"\"\"\n\n while \"(\" in expression:\n start = expression.rfind(\"(\")\n end = expression.find(\")\", start)\n sub_exp = expression[start + 1 : end]\n\n # Replace the evaluated part with a placeholder and then evaluate it\n sub_result = evaluate_simple_expression(sub_exp)\n\n # For simplicity in handling, join sub-results with OR to reprocess them later\n expression = (\n expression[:start] + \"|\".join(sub_result) + expression[end + 1 :]\n )\n return evaluate_simple_expression(expression)\n\n result = evaluate_expression(expression)\n\n if not result:\n raise ValueError(\"No state keys matched the expression.\")\n\n # Remove redundant state keys from the result, without changing their order\n final_result = []\n for key in result:\n if key not in final_result:\n final_result.append(key)\n\n return final_result\n```" |
| 69 | + } |
| 70 | +] |
0 commit comments