Merge pull request #239 from vprivat-ads/http_headers

jonhealy1 · web-flow · commit 7983a8177289 · 2025-01-09T12:16:31.000+08:00
Allow to provide HTTP headers
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ The format is (loosely) based on [Keep a Changelog](http://keepachangelog.com/)
 
 - Added publish.yml to automatically publish new releases to PyPI [#236](https://github.com/stac-utils/stac-validator/pull/236)
 - Configure whether to open URLs when validating assets [#238](https://github.com/stac-utils/stac-validator/pull/238)
+- Allow to provide HTTP headers [#239](https://github.com/stac-utils/stac-validator/pull/239)
 
 ## [v3.4.0] - 2024-10-08
 
diff --git a/README.md b/README.md
@@ -108,6 +108,8 @@ Options:
                            with --pages. Defaults to one page.
   --no-assets-urls         Disables the opening of href links when validating
                            assets (enabled by default).
+  --header KEY VALUE       HTTP header to include in the requests. Can be used
+                           multiple times.
   -p, --pages INTEGER      Maximum number of pages to validate via --item-
                            collection. Defaults to one page.
   -v, --verbose            Enables verbose output for recursive mode.
@@ -332,3 +334,9 @@ stac-validator https://spot-canada-ortho.s3.amazonaws.com/catalog.json --recursi
 ```bash
 stac-validator https://earth-search.aws.element84.com/v0/collections/sentinel-s2-l2a/items --item-collection --pages 2
 ```
+
+**--header**
+
+```bash
+stac-validator https://stac-catalog.eu/collections/sentinel-s2-l2a/items --header x-api-key $MY_API_KEY --header foo bar
+```
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -2,4 +2,5 @@ black
 pytest
 pytest-mypy
 pre-commit
+requests-mock
 types-jsonschema
diff --git a/setup.py b/setup.py
@@ -33,6 +33,7 @@
     extras_require={
         "dev": [
             "pytest",
+            "requests-mock",
             "types-setuptools",
         ],
     },
@@ -41,5 +42,5 @@
         "console_scripts": ["stac-validator = stac_validator.stac_validator:main"]
     },
     python_requires=">=3.8",
-    tests_require=["pytest"],
+    tests_require=["pytest", "requests-mock"],
 )
diff --git a/stac_validator/stac_validator.py b/stac_validator/stac_validator.py
@@ -114,6 +114,12 @@ def collections_summary(message: List[Dict[str, Any]]) -> None:
     is_flag=True,
     help="Disables the opening of href links when validating assets (enabled by default).",
 )
+@click.option(
+    "--header",
+    type=(str, str),
+    multiple=True,
+    help="HTTP header to include in the requests. Can be used multiple times.",
+)
 @click.option(
     "--pages",
     "-p",
@@ -134,6 +140,7 @@ def main(
     collections: bool,
     item_collection: bool,
     no_assets_urls: bool,
+    header: list,
     pages: int,
     recursive: bool,
     max_depth: int,
@@ -154,6 +161,7 @@ def main(
         collections (bool): Validate response from /collections endpoint.
         item_collection (bool): Whether to validate item collection responses.
         no_assets_urls (bool): Whether to open href links when validating assets (enabled by default).
+        headers (dict): HTTP headers to include in the requests.
         pages (int): Maximum number of pages to validate via `item_collection`.
         recursive (bool): Whether to recursively validate all related STAC objects.
         max_depth (int): Maximum depth to traverse when recursing.
@@ -185,6 +193,7 @@ def main(
         links=links,
         assets=assets,
         assets_open_urls=not no_assets_urls,
+        headers=dict(header),
         extensions=extensions,
         custom=custom,
         verbose=verbose,
diff --git a/stac_validator/utilities.py b/stac_validator/utilities.py
@@ -3,7 +3,7 @@
 import ssl
 from typing import Dict
 from urllib.parse import urlparse
-from urllib.request import urlopen
+from urllib.request import Request, urlopen
 
 import requests  # type: ignore
 
@@ -77,7 +77,7 @@ def get_stac_type(stac_content: Dict) -> str:
         return str(e)
 
 
-def fetch_and_parse_file(input_path: str) -> Dict:
+def fetch_and_parse_file(input_path: str, headers: Dict = {}) -> Dict:
     """Fetches and parses a JSON file from a URL or local file.
 
     Given a URL or local file path to a JSON file, this function fetches the file,
@@ -87,6 +87,7 @@ def fetch_and_parse_file(input_path: str) -> Dict:
 
     Args:
         input_path: A string representing the URL or local file path to the JSON file.
+        headers: For URLs: HTTP headers to include in the request
 
     Returns:
         A dictionary containing the parsed contents of the JSON file.
@@ -97,7 +98,7 @@ def fetch_and_parse_file(input_path: str) -> Dict:
     """
     try:
         if is_url(input_path):
-            resp = requests.get(input_path)
+            resp = requests.get(input_path, headers=headers)
             resp.raise_for_status()
             data = resp.json()
         else:
@@ -150,9 +151,7 @@ def set_schema_addr(version: str, stac_type: str) -> str:
 
 
 def link_request(
-    link: Dict,
-    initial_message: Dict,
-    open_urls: bool = True,
+    link: Dict, initial_message: Dict, open_urls: bool = True, headers: Dict = {}
 ) -> None:
     """Makes a request to a URL and appends it to the relevant field of the initial message.
 
@@ -161,6 +160,7 @@ def link_request(
         initial_message: A dictionary containing lists for "request_valid", "request_invalid",
         "format_valid", and "format_invalid" URLs.
         open_urls: Whether to open link href URL
+        headers: HTTP headers to include in the request
 
     Returns:
         None
@@ -169,11 +169,12 @@ def link_request(
     if is_url(link["href"]):
         try:
             if open_urls:
+                request = Request(link["href"], headers=headers)
                 if "s3" in link["href"]:
                     context = ssl._create_unverified_context()
-                    response = urlopen(link["href"], context=context)
+                    response = urlopen(request, context=context)
                 else:
-                    response = urlopen(link["href"])
+                    response = urlopen(request)
                 status_code = response.getcode()
                 if status_code == 200:
                     initial_message["request_valid"].append(link["href"])
diff --git a/stac_validator/validate.py b/stac_validator/validate.py
@@ -34,6 +34,7 @@ class StacValidate:
         links (bool): Whether to additionally validate links (only works in default mode).
         assets (bool): Whether to additionally validate assets (only works in default mode).
         assets_open_urls (bool): Whether to open assets URLs when validating assets.
+        headers (dict): HTTP headers to include in the requests.
         extensions (bool): Whether to only validate STAC object extensions.
         custom (str): The local filepath or remote URL of a custom JSON schema to validate the STAC object.
         verbose (bool): Whether to enable verbose output in recursive mode.
@@ -56,6 +57,7 @@ def __init__(
         links: bool = False,
         assets: bool = False,
         assets_open_urls: bool = True,
+        headers: dict = {},
         extensions: bool = False,
         custom: str = "",
         verbose: bool = False,
@@ -70,6 +72,7 @@ def __init__(
         self.links = links
         self.assets = assets
         self.assets_open_urls = assets_open_urls
+        self.headers: Dict = headers
         self.recursive = recursive
         self.max_depth = max_depth
         self.extensions = extensions
@@ -125,7 +128,9 @@ def assets_validator(self) -> Dict:
         assets = self.stac_content.get("assets")
         if assets:
             for asset in assets.values():
-                link_request(asset, initial_message, self.assets_open_urls)
+                link_request(
+                    asset, initial_message, self.assets_open_urls, self.headers
+                )
         return initial_message
 
     def links_validator(self) -> Dict:
@@ -145,7 +150,7 @@ def links_validator(self) -> Dict:
         for link in self.stac_content["links"]:
             if not is_valid_url(link["href"]):
                 link["href"] = root_url + link["href"][1:]
-            link_request(link, initial_message)
+            link_request(link, initial_message, True, self.headers)
 
         return initial_message
 
@@ -345,7 +350,9 @@ def recursive_validator(self, stac_type: str) -> bool:
                         self.stac_file = st + "/" + address
                     else:
                         self.stac_file = address
-                    self.stac_content = fetch_and_parse_file(str(self.stac_file))
+                    self.stac_content = fetch_and_parse_file(
+                        str(self.stac_file), self.headers
+                    )
                     self.stac_content["stac_version"] = self.version
                     stac_type = get_stac_type(self.stac_content).lower()
 
@@ -414,7 +421,7 @@ def validate_collections(self) -> None:
         Returns:
             None
         """
-        collections = fetch_and_parse_file(str(self.stac_file))
+        collections = fetch_and_parse_file(str(self.stac_file), self.headers)
         for collection in collections["collections"]:
             self.schema = ""
             self.validate_dict(collection)
@@ -437,7 +444,7 @@ def validate_item_collection(self) -> None:
         """
         page = 1
         print(f"processing page {page}")
-        item_collection = fetch_and_parse_file(str(self.stac_file))
+        item_collection = fetch_and_parse_file(str(self.stac_file), self.headers)
         self.validate_item_collection_dict(item_collection)
         try:
             if self.pages is not None:
@@ -450,7 +457,7 @@ def validate_item_collection(self) -> None:
                                 next_link = link["href"]
                                 self.stac_file = next_link
                                 item_collection = fetch_and_parse_file(
-                                    str(self.stac_file)
+                                    str(self.stac_file), self.headers
                                 )
                                 self.validate_item_collection_dict(item_collection)
                                 break
@@ -489,7 +496,7 @@ def run(self) -> bool:
                 and not self.item_collection
                 and not self.collections
             ):
-                self.stac_content = fetch_and_parse_file(self.stac_file)
+                self.stac_content = fetch_and_parse_file(self.stac_file, self.headers)
 
             stac_type = get_stac_type(self.stac_content).upper()
             self.version = self.stac_content["stac_version"]
diff --git a/tests/test_header.py b/tests/test_header.py
@@ -0,0 +1,50 @@
+"""
+Description: Test --header option
+
+"""
+
+import json
+
+import requests_mock
+
+from stac_validator import stac_validator
+
+
+def test_header():
+    stac_file = "tests/test_data/v110/simple-item.json"
+    url = "https://localhost/" + stac_file
+
+    no_headers = {}
+    valid_headers = {"x-api-key": "a-valid-api-key"}
+
+    with requests_mock.Mocker(real_http=True) as mock, open(stac_file) as json_data:
+        mock.get(url, request_headers=no_headers, status_code=403)
+        mock.get(url, request_headers=valid_headers, json=json.load(json_data))
+
+        stac = stac_validator.StacValidate(url, core=True, headers=valid_headers)
+        stac.run()
+        assert stac.message == [
+            {
+                "version": "1.1.0",
+                "path": "https://localhost/tests/test_data/v110/simple-item.json",
+                "schema": [
+                    "https://schemas.stacspec.org/v1.1.0/item-spec/json-schema/item.json"
+                ],
+                "valid_stac": True,
+                "asset_type": "ITEM",
+                "validation_method": "core",
+            }
+        ]
+
+        stac = stac_validator.StacValidate(url, core=True, headers=no_headers)
+        stac.run()
+        assert stac.message == [
+            {
+                "version": "",
+                "path": "https://localhost/tests/test_data/v110/simple-item.json",
+                "schema": [""],
+                "valid_stac": False,
+                "error_type": "HTTPError",
+                "error_message": "403 Client Error: None for url: https://localhost/tests/test_data/v110/simple-item.json",
+            }
+        ]
diff --git a/tox.ini b/tox.ini
@@ -2,5 +2,7 @@
 envlist = py38,py39,py310,py311,py312,py313
 
 [testenv]
-deps = pytest
+deps = 
+    pytest
+    requests-mock
 commands = pytest
diff --git a/tox/Dockerfile-tox b/tox/Dockerfile-tox
@@ -4,5 +4,5 @@ COPY . /code/
 RUN export LC_ALL=C.UTF-8 && \
     export LANG=C.UTF-8 && \
     pip3 install . && \
-    pip3 install tox==4.0.11 && \
+    pip3 install tox==4.23.2 && \
     tox