|
1 | 1 | import functools
|
2 | 2 | import json
|
3 | 3 | import ssl
|
4 |
| -from typing import Dict |
| 4 | +from typing import Dict, Optional |
5 | 5 | from urllib.parse import urlparse
|
6 | 6 | from urllib.request import Request, urlopen
|
7 | 7 |
|
| 8 | +import jsonschema |
8 | 9 | import requests # type: ignore
|
| 10 | +from jsonschema import Draft202012Validator |
| 11 | +from referencing import Registry, Resource |
| 12 | +from referencing.jsonschema import DRAFT202012 |
| 13 | +from referencing.retrieval import to_cached_resource |
| 14 | +from referencing.typing import URI |
9 | 15 |
|
10 | 16 | NEW_VERSIONS = [
|
11 | 17 | "1.0.0-beta.2",
|
@@ -77,7 +83,7 @@ def get_stac_type(stac_content: Dict) -> str:
|
77 | 83 | return str(e)
|
78 | 84 |
|
79 | 85 |
|
80 |
| -def fetch_and_parse_file(input_path: str, headers: Dict = {}) -> Dict: |
| 86 | +def fetch_and_parse_file(input_path: str, headers: Optional[Dict] = None) -> Dict: |
81 | 87 | """Fetches and parses a JSON file from a URL or local file.
|
82 | 88 |
|
83 | 89 | Given a URL or local file path to a JSON file, this function fetches the file,
|
@@ -184,3 +190,90 @@ def link_request(
|
184 | 190 | else:
|
185 | 191 | initial_message["request_invalid"].append(link["href"])
|
186 | 192 | initial_message["format_invalid"].append(link["href"])
|
| 193 | + |
| 194 | + |
| 195 | +def fetch_remote_schema(uri: str) -> dict: |
| 196 | + """ |
| 197 | + Fetch a remote schema from a URI. |
| 198 | +
|
| 199 | + Args: |
| 200 | + uri (str): The URI of the schema to fetch. |
| 201 | +
|
| 202 | + Returns: |
| 203 | + dict: The fetched schema content as a dictionary. |
| 204 | +
|
| 205 | + Raises: |
| 206 | + requests.RequestException: If the request to fetch the schema fails. |
| 207 | + """ |
| 208 | + response = requests.get(uri) |
| 209 | + response.raise_for_status() |
| 210 | + return response.json() |
| 211 | + |
| 212 | + |
| 213 | +@to_cached_resource() # type: ignore |
| 214 | +def cached_retrieve(uri: URI) -> str: |
| 215 | + """ |
| 216 | + Retrieve and cache a remote schema. |
| 217 | +
|
| 218 | + Args: |
| 219 | + uri (str): The URI of the schema. |
| 220 | +
|
| 221 | + Returns: |
| 222 | + str: The raw JSON string of the schema. |
| 223 | +
|
| 224 | + Raises: |
| 225 | + requests.RequestException: If the request to fetch the schema fails. |
| 226 | + Exception: For any other unexpected errors. |
| 227 | + """ |
| 228 | + try: |
| 229 | + response = requests.get(uri, timeout=10) # Set a timeout for robustness |
| 230 | + response.raise_for_status() # Raise an error for HTTP response codes >= 400 |
| 231 | + return response.text |
| 232 | + except requests.exceptions.RequestException as e: |
| 233 | + raise requests.RequestException( |
| 234 | + f"Failed to fetch schema from {uri}: {str(e)}" |
| 235 | + ) from e |
| 236 | + except Exception as e: |
| 237 | + raise Exception( |
| 238 | + f"Unexpected error while retrieving schema from {uri}: {str(e)}" |
| 239 | + ) from e |
| 240 | + |
| 241 | + |
| 242 | +def validate_with_ref_resolver(schema_path: str, content: dict) -> None: |
| 243 | + """ |
| 244 | + Validate a JSON document against a JSON Schema with dynamic reference resolution. |
| 245 | +
|
| 246 | + Args: |
| 247 | + schema_path (str): Path or URI of the JSON Schema. |
| 248 | + content (dict): JSON content to validate. |
| 249 | +
|
| 250 | + Raises: |
| 251 | + jsonschema.exceptions.ValidationError: If validation fails. |
| 252 | + requests.RequestException: If fetching a remote schema fails. |
| 253 | + FileNotFoundError: If a local schema file is not found. |
| 254 | + Exception: If any other error occurs during validation. |
| 255 | + """ |
| 256 | + # Load the schema |
| 257 | + if schema_path.startswith("http"): |
| 258 | + schema = fetch_remote_schema(schema_path) |
| 259 | + else: |
| 260 | + try: |
| 261 | + with open(schema_path, "r") as f: |
| 262 | + schema = json.load(f) |
| 263 | + except FileNotFoundError as e: |
| 264 | + raise FileNotFoundError(f"Schema file not found: {schema_path}") from e |
| 265 | + |
| 266 | + # Set up the resource and registry for schema resolution |
| 267 | + resource: Resource = Resource(contents=schema, specification=DRAFT202012) # type: ignore |
| 268 | + registry: Registry = Registry(retrieve=cached_retrieve).with_resource( # type: ignore |
| 269 | + uri=schema_path, resource=resource |
| 270 | + ) # type: ignore |
| 271 | + |
| 272 | + # Validate the content against the schema |
| 273 | + try: |
| 274 | + validator = Draft202012Validator(schema, registry=registry) |
| 275 | + validator.validate(content) |
| 276 | + except jsonschema.exceptions.ValidationError as e: |
| 277 | + raise jsonschema.exceptions.ValidationError(f"{e.message}") from e |
| 278 | + except Exception as e: |
| 279 | + raise Exception(f"Unexpected error during validation: {str(e)}") from e |
0 commit comments