Extract environment setup and exception checking boilerplate logic

DaveLak · DaveLak · commit 2e9c23995b70 · 2024-08-08T19:38:06.000-04:00
Changes:

   - Simplify exception handling in test harnesses via `handle_exception(e)`
     in the `except Exception as e:` block.

   - `setup_git_environment` is a step towards centralizing environment
     variable and logging configuration set up consistently across
     different fuzzing scripts. **Only applying it to a single test for
     now is an intentional choice in case it fails to work in the
     ClusterFuzz environment!** If it proves successful, a follow-up
     change set will be welcome.
diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py
@@ -1,67 +1,17 @@
-# ruff: noqa: E402
 import atheris
 import sys
 import os
-import traceback
 import tempfile
 from configparser import ParsingError
-from utils import get_max_filename_length
-import re
-
-bundle_dir = os.path.dirname(os.path.abspath(__file__))
-
-if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):  # pragma: no cover
-    bundled_git_binary_path = os.path.join(bundle_dir, "git")
-    os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path
-
 from git import Repo, GitCommandError, InvalidGitRepositoryError
+from utils import (
+    setup_git_environment,
+    handle_exception,
+    get_max_filename_length,
+)
 
-
-def load_exception_list(file_path):
-    """Load and parse the exception list from a file."""
-    try:
-        with open(file_path, "r") as file:
-            lines = file.readlines()
-        exception_list = set()
-        for line in lines:
-            match = re.match(r"(.+):(\d+):", line)
-            if match:
-                file_path = match.group(1).strip()
-                line_number = int(match.group(2).strip())
-                exception_list.add((file_path, line_number))
-        return exception_list
-    except FileNotFoundError:
-        print(f"File not found: {file_path}")
-        return set()
-    except Exception as e:
-        print(f"Error loading exception list: {e}")
-        return set()
-
-
-def match_exception_with_traceback(exception_list, exc_traceback):
-    """Match exception traceback with the entries in the exception list."""
-    for filename, lineno, _, _ in traceback.extract_tb(exc_traceback):
-        for file_pattern, line_pattern in exception_list:
-            if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)):
-                return True
-    return False
-
-
-def check_exception_against_list(exception_list, exc_traceback):
-    """Check if the exception traceback matches any entry in the exception list."""
-    return match_exception_with_traceback(exception_list, exc_traceback)
-
-
-if not sys.warnoptions:  # pragma: no cover
-    # The warnings filter below can be overridden by passing the -W option
-    # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
-    import warnings
-    import logging
-
-    # Fuzzing data causes some modules to generate a large number of warnings
-    # which are not usually interesting and make the test output hard to read, so we ignore them.
-    warnings.simplefilter("ignore")
-    logging.getLogger().setLevel(logging.ERROR)
+# Setup the git environment
+setup_git_environment()
 
 
 def TestOneInput(data):
@@ -131,12 +81,10 @@ def TestOneInput(data):
         ):
             return -1
         except Exception as e:
-            exc_traceback = e.__traceback__
-            exception_list = load_exception_list(os.path.join(bundle_dir, "explicit-exceptions-list.txt"))
-            if check_exception_against_list(exception_list, exc_traceback):
+            if isinstance(e, ValueError) and "embedded null byte" in str(e):
                 return -1
             else:
-                raise e
+                return handle_exception(e)
 
 
 def main():
diff --git a/fuzzing/fuzz-targets/utils.py b/fuzzing/fuzz-targets/utils.py
@@ -1,6 +1,9 @@
 import atheris  # pragma: no cover
 import os  # pragma: no cover
-from typing import List  # pragma: no cover
+import re  # pragma: no cover
+import traceback  # pragma: no cover
+import sys  # pragma: no cover
+from typing import Set, Tuple, List  # pragma: no cover
 
 
 @atheris.instrument_func
@@ -35,3 +38,85 @@ def get_max_filename_length(path: str) -> int:  # pragma: no cover
         int: The maximum filename length.
     """
     return os.pathconf(path, "PC_NAME_MAX")
+
+
+@atheris.instrument_func
+def read_lines_from_file(file_path: str) -> list:
+    """Read lines from a file and return them as a list."""
+    try:
+        with open(file_path, "r") as f:
+            return [line.strip() for line in f if line.strip()]
+    except FileNotFoundError:
+        print(f"File not found: {file_path}")
+        return []
+    except IOError as e:
+        print(f"Error reading file {file_path}: {e}")
+        return []
+
+
+@atheris.instrument_func
+def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]:
+    """Load and parse the exception list from a default or specified file."""
+    try:
+        bundle_dir = os.path.dirname(os.path.abspath(__file__))
+        full_path = os.path.join(bundle_dir, file_path)
+        lines = read_lines_from_file(full_path)
+        exception_list: Set[Tuple[str, str]] = set()
+        for line in lines:
+            match = re.match(r"(.+):(\d+):", line)
+            if match:
+                file_path: str = match.group(1).strip()
+                line_number: str = str(match.group(2).strip())
+                exception_list.add((file_path, line_number))
+        return exception_list
+    except Exception as e:
+        print(f"Error loading exception list: {e}")
+        return set()
+
+
+@atheris.instrument_func
+def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool:
+    """Match exception traceback with the entries in the exception list."""
+    for filename, lineno, _, _ in traceback.extract_tb(exc_traceback):
+        for file_pattern, line_pattern in exception_list:
+            # Ensure filename and line_number are strings for regex matching
+            if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)):
+                return True
+    return False
+
+
+@atheris.instrument_func
+def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool:
+    """Check if the exception traceback matches any entry in the exception list."""
+    exception_list = load_exception_list(exception_file)
+    return match_exception_with_traceback(exception_list, exc_traceback)
+
+
+@atheris.instrument_func
+def handle_exception(e: Exception) -> int:
+    """Encapsulate exception handling logic for reusability."""
+    exc_traceback = e.__traceback__
+    if check_exception_against_list(exc_traceback):
+        return -1
+    else:
+        raise e
+
+
+@atheris.instrument_func
+def setup_git_environment() -> None:
+    """Set up the environment variables for Git."""
+    bundle_dir = os.path.dirname(os.path.abspath(__file__))
+    if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"):  # pragma: no cover
+        bundled_git_binary_path = os.path.join(bundle_dir, "git")
+        os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path
+
+    if not sys.warnoptions:  # pragma: no cover
+        # The warnings filter below can be overridden by passing the -W option
+        # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable.
+        import warnings
+        import logging
+
+        # Fuzzing data causes some modules to generate a large number of warnings
+        # which are not usually interesting and make the test output hard to read, so we ignore them.
+        warnings.simplefilter("ignore")
+        logging.getLogger().setLevel(logging.ERROR)