Skip to content

Commit e84e40f

Browse files
feat: improved sbom filename extension handling
1 parent 82ccffd commit e84e40f

File tree

5 files changed

+293
-72
lines changed

5 files changed

+293
-72
lines changed

cve_bin_tool/output_engine/util.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,8 @@ def intermediate_output(
273273

274274
def add_extension_if_not(filename: str, output_type: str) -> str:
275275
"""
276-
summary: Checks if the filename ends with the extension and if not
277-
adds one. And if the filename ends with a different extension it replaces the extension.
276+
Handles both replacement of invalid extensions (for known types)
277+
and appending for completely unknown extensions.
278278
279279
Args:
280280
filename (str): filename from OutputEngine
@@ -283,18 +283,40 @@ def add_extension_if_not(filename: str, output_type: str) -> str:
283283
Returns:
284284
str: Filename with extension according to output_type
285285
"""
286-
import re
286+
# Map all output types to their valid extensions
287+
extensions = {
288+
"json": ["json"],
289+
"cyclonedx": ["json", "xml"],
290+
"csv": ["csv"],
291+
"html": ["html"],
292+
"pdf": ["pdf"],
293+
"txt": ["txt"],
294+
}
295+
296+
# Create set of ALL valid extensions for recognition
297+
all_valid_extensions = {ext for exts in extensions.values() for ext in exts}
298+
299+
# Get valid extensions for current output type
300+
valid_ext = extensions.get(output_type, [])
287301

288-
extensions = ["json", "csv", "html", "pdf", "txt"]
289-
for extension in extensions:
290-
if not filename.endswith(f".{extension}"):
291-
continue
292-
if extension == output_type:
302+
# Split filename
303+
if "." in filename:
304+
name, ext = filename.rsplit(".", 1)
305+
# Check if extension is either:
306+
# 1. Valid for current type -> keep
307+
# 2. Valid for another type -> replace
308+
# 3. Invalid everywhere -> append
309+
if ext in valid_ext:
293310
return filename
294-
filename = re.sub(f".{extension}$", f".{output_type}", filename)
295-
return filename
296-
filename = f"{filename}.{output_type}"
297-
return filename
311+
elif ext in all_valid_extensions:
312+
# Replace with first valid extension for current type
313+
return f"{name}.{valid_ext[0]}"
314+
else:
315+
# Append first valid extension for current type
316+
return f"{filename}.{valid_ext[0]}"
317+
else:
318+
# No extension - append first valid one
319+
return f"{filename}.{valid_ext[0]}" if valid_ext else filename
298320

299321

300322
def group_cve_by_remark(
@@ -308,7 +330,7 @@ def group_cve_by_remark(
308330
{
309331
"cve_number": "CVE-XXX-XXX",
310332
"severity": "High",
311-
"decription: "Lorem Ipsm",
333+
"description: "Lorem Ipsm",
312334
},
313335
{...}
314336
],

cve_bin_tool/sbom_manager/generate.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from lib4sbom.sbom import SBOM
1313

1414
from cve_bin_tool.log import LOGGER
15+
from cve_bin_tool.output_engine.util import add_extension_if_not
1516
from cve_bin_tool.version import VERSION
1617

1718

@@ -46,6 +47,10 @@ def __init__(
4647

4748
def generate_sbom(self) -> None:
4849
"""Create SBOM package and generate SBOM file."""
50+
# Force .json extension for CycloneDX only if not already specified
51+
if self.sbom_type == "cyclonedx":
52+
self.filename = add_extension_if_not(self.filename, "json")
53+
4954
# Create SBOM
5055
sbom_relationships = []
5156
my_package = SBOMPackage()

cve_bin_tool/sbom_manager/parse.py

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from __future__ import annotations
55

6+
import json
67
import re
78
import sys
89
from collections import defaultdict
@@ -16,13 +17,7 @@
1617
from cve_bin_tool.cvedb import CVEDB
1718
from cve_bin_tool.input_engine import TriageData
1819
from cve_bin_tool.log import LOGGER
19-
from cve_bin_tool.util import (
20-
ProductInfo,
21-
Remarks,
22-
decode_cpe22,
23-
decode_cpe23,
24-
validate_serialNumber,
25-
)
20+
from cve_bin_tool.util import ProductInfo, Remarks, decode_cpe22, decode_cpe23
2621
from cve_bin_tool.validator import validate_cyclonedx, validate_spdx, validate_swid
2722

2823

@@ -77,12 +72,22 @@ def parse_sbom(self) -> dict[ProductInfo, TriageData]:
7772
modules = []
7873
try:
7974
if Path(self.filename).exists():
75+
# Validate CycloneDX JSON or XML extension
76+
if self.type == "cyclonedx" and not (
77+
self.filename.lower().endswith(".json")
78+
or self.filename.lower().endswith(".xml")
79+
):
80+
self.logger.error(
81+
"CycloneDX SBOMs require .json or .xml extension."
82+
)
83+
return {}
84+
8085
if self.type == "swid":
8186
modules = self.parse_swid(self.filename)
8287
else:
8388
modules = self.parse_cyclonedx_spdx()
8489
except (KeyError, FileNotFoundError, ET.ParseError) as e:
85-
LOGGER.debug(e, exc_info=True)
90+
self.logger.debug(e, exc_info=True)
8691

8792
LOGGER.debug(
8893
f"The number of modules identified in SBOM - {len(modules)}\n{modules}"
@@ -147,7 +152,7 @@ def common_prefix_split(self, product, version) -> list[ProductInfo]:
147152
if not found_common_prefix:
148153
# if vendor not found after removing common prefix try splitting it
149154
LOGGER.debug(
150-
f"No Vendor found for {product}, trying splitted product. "
155+
f"No Vendor found for {product}, trying split product. "
151156
"Some results may be inaccurate due to vendor identification limitations."
152157
)
153158
splitted_product = product.split("-")
@@ -217,31 +222,45 @@ def parse_cyclonedx_spdx(self) -> [(str, str, str)]:
217222
218223
Returns:
219224
- List[(str, str, str)]: A list of tuples, each containing vendor, product, and version information for a module.
220-
221225
"""
226+
# Validate CycloneDX JSON or XML extension
227+
if self.type == "cyclonedx" and not (
228+
self.filename.lower().endswith(".json")
229+
or self.filename.lower().endswith(".xml")
230+
):
231+
self.logger.error(
232+
f"CycloneDX SBOMs require .json or .xml extension. Invalid file: {self.filename}"
233+
)
234+
return []
235+
236+
# Validate JSON content for CycloneDX JSON files
237+
if self.type == "cyclonedx" and self.filename.lower().endswith(".json"):
238+
try:
239+
with open(self.filename, encoding="utf-8") as f:
240+
json.load(f) # Basic JSON validation
241+
except json.JSONDecodeError as e:
242+
self.logger.error(f"Invalid JSON in CycloneDX SBOM: {str(e)}")
243+
return []
222244

223245
# Set up SBOM parser
224246
sbom_parser = SBOMParser(sbom_type=self.type)
225247
# Load SBOM
226248
sbom_parser.parse_file(self.filename)
227249
doc = sbom_parser.get_document()
228-
uuid = doc.get("uuid", "")
229250
if self.type == "cyclonedx":
230-
parts = uuid.split(":")
231-
if len(parts) == 3 and parts[0] == "urn" and parts[1] == "uuid":
232-
serialNumber = parts[2]
233-
if validate_serialNumber(serialNumber):
251+
# Extract serialNumber (optional in CycloneDX spec)
252+
serialNumber = doc.get("serialNumber", "").lower()
253+
if serialNumber: # Only validate if present
254+
if re.match(
255+
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
256+
serialNumber,
257+
):
234258
self.serialNumber = serialNumber
235259
else:
236-
LOGGER.error(
260+
LOGGER.warning( # Downgrade to warning
237261
f"The SBOM file '{self.filename}' has an invalid serial number."
238262
)
239-
return []
240-
else:
241-
LOGGER.error(
242-
f"The SBOM file '{self.filename}' has an invalid serial number."
243-
)
244-
return []
263+
# Do NOT return early; continue parsing components
245264

246265
modules = []
247266
if self.validate and self.filename.endswith(".xml"):
@@ -281,7 +300,7 @@ def parse_cyclonedx_spdx(self) -> [(str, str, str)]:
281300
# Found at least package and version, save the results
282301
modules.append([vendor, package_name, version])
283302

284-
LOGGER.debug(f"Parsed SBOM {self.filename} {modules}")
303+
LOGGER.debug(f"SBOM Data {self.sbom_data}")
285304
return modules
286305

287306
def parse_swid(self, sbom_file: str) -> list[list[str]]:
@@ -372,7 +391,7 @@ def decode_purl(self, purl) -> (str | None, str | None, str | None):
372391
- purl (str): Package URL (purl) string.
373392
374393
Returns:
375-
- Tuple[str | None, str | None, str | None]: A tuple containing the vendor (which is always None for purl),
394+
- Tuple[str | None, str | None, str | None]]: A tuple containing the vendor (which is always None for purl),
376395
product, and version information extracted from the purl string, or None if the purl is invalid or incomplete.
377396
378397
"""

cve_bin_tool/sbom_manager/sbom_detection.py

Lines changed: 123 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,51 +2,146 @@
22
# SPDX-License-Identifier: GPL-3.0-or-later
33

44
import json
5+
from typing import Optional
56

67
import defusedxml.ElementTree as ET
78

8-
from cve_bin_tool.validator import validate_cyclonedx, validate_swid
9+
from cve_bin_tool.log import LOGGER
10+
from cve_bin_tool.validator import validate_cyclonedx
911

1012

11-
def sbom_detection(file_path: str) -> str:
13+
def sbom_detection(file_path: str) -> Optional[str]:
1214
"""
13-
Identifies SBOM type of file based on its format and schema.
15+
Identifies SBOM type with content validation and extension checks.
16+
Returns 'spdx', 'cyclonedx', 'swid', or None if the SBOM type cannot be determined.
1417
1518
Args:
16-
file_path (str): The path to the file.
19+
file_path (str): Path to the SBOM file.
1720
1821
Returns:
19-
str: The detected SBOM type (spdx, cyclonedx, swid) or None.
22+
Optional[str]: The detected SBOM type or None if detection fails.
2023
"""
2124
try:
22-
with open(file_path) as file:
23-
if ".spdx" in file_path:
24-
return "spdx"
25-
26-
elif file_path.endswith(".json"):
27-
data = json.load(file)
28-
if (
29-
"bomFormat" in data
30-
and "specVersion" in data
31-
and data["bomFormat"] == "CycloneDX"
32-
):
25+
# Check for CycloneDX JSON format
26+
if file_path.lower().endswith(".json"):
27+
try:
28+
with open(file_path, encoding="utf-8") as f:
29+
data = json.load(f)
30+
# Check for CycloneDX-specific structure
31+
if isinstance(data, dict):
32+
if (
33+
data.get("bomFormat") == "CycloneDX"
34+
and "components" in data
35+
):
36+
return "cyclonedx"
37+
# Fallback: Check for required fields
38+
if "specVersion" in data.get(
39+
"bom", {}
40+
) and "version" in data.get("bom", {}):
41+
LOGGER.warning(
42+
f"Possible CycloneDX SBOM with non-standard structure: {file_path}"
43+
)
44+
return "cyclonedx"
45+
except (json.JSONDecodeError, UnicodeDecodeError) as e:
46+
LOGGER.debug(f"JSON parsing error for {file_path}: {str(e)}")
47+
pass # Not JSON, continue with other checks
48+
49+
# Check XML-based formats with namespace validation
50+
if file_path.endswith(".xml"):
51+
try:
52+
tree = ET.parse(file_path)
53+
root = tree.getroot()
54+
namespace = (
55+
root.tag.split("}", 1)[0].strip("{") if "}" in root.tag else ""
56+
)
57+
58+
# Check CycloneDX namespace
59+
if "cyclonedx.org" in namespace and validate_cyclonedx(file_path):
3360
return "cyclonedx"
61+
# Check SWID by root tag and namespace
62+
elif root.tag.endswith("SoftwareIdentity") and "iso/19770" in namespace:
63+
return "swid"
64+
except ET.ParseError as e:
65+
LOGGER.debug(f"XML parsing error for {file_path}: {str(e)}")
66+
return None
67+
68+
# SPDX detection (case-insensitive and path check)
69+
if any(
70+
ext in file_path.lower()
71+
for ext in [".spdx", ".spdx.json", ".spdx.xml", ".spdx.yml", ".spdx.yaml"]
72+
):
73+
return "spdx"
74+
75+
except Exception as e:
76+
LOGGER.error(f"SBOM detection failed for {file_path}: {str(e)}")
77+
return None
78+
79+
80+
def detect_sbom_type_from_content(file_path: str) -> Optional[str]:
81+
"""
82+
Detects SBOM type by analyzing file content without relying on file extensions.
83+
This is a fallback method if the primary detection fails.
84+
85+
Args:
86+
file_path (str): Path to the SBOM file.
87+
88+
Returns:
89+
Optional[str]: The detected SBOM type or None if detection fails.
90+
"""
91+
try:
92+
with open(file_path, "rb") as f:
93+
content = f.read(1024) # Read first 1KB for analysis
3494

35-
else:
36-
return None
95+
# Check for JSON content
96+
if content.startswith(b"{"):
97+
try:
98+
with open(file_path, encoding="utf-8") as f:
99+
data = json.load(f)
100+
if isinstance(data, dict) and data.get("bomFormat") == "CycloneDX":
101+
return "cyclonedx"
102+
elif "SPDXID" in data or "spdxVersion" in data:
103+
return "spdx"
104+
except json.JSONDecodeError:
105+
pass
37106

38-
elif file_path.endswith(".xml"):
107+
# Check for XML content
108+
if content.startswith(b"<?xml"):
109+
try:
39110
tree = ET.parse(file_path)
40111
root = tree.getroot()
41-
root_tag = root.tag.split("}")[-1] if "}" in root.tag else root.tag
42-
if root_tag == "bom" and validate_cyclonedx(file_path):
112+
namespace = (
113+
root.tag.split("}", 1)[0].strip("{") if "}" in root.tag else ""
114+
)
115+
116+
if "cyclonedx.org" in namespace:
43117
return "cyclonedx"
44-
elif root_tag == "SoftwareIdentity" and validate_swid(file_path):
118+
elif "iso/19770" in namespace:
45119
return "swid"
46-
else:
47-
return None
48-
else:
49-
return None
120+
elif "spdx.org" in namespace:
121+
return "spdx"
122+
except ET.ParseError:
123+
pass
124+
125+
except Exception as e:
126+
LOGGER.debug(f"Content-based SBOM detection failed for {file_path}: {str(e)}")
127+
return None
128+
129+
130+
def detect_sbom(file_path: str) -> Optional[str]:
131+
"""
132+
Detects SBOM type using both file extension and content-based methods.
133+
This is the main function to be used for SBOM detection.
134+
135+
Args:
136+
file_path (str): Path to the SBOM file.
137+
138+
Returns:
139+
Optional[str]: The detected SBOM type or None if detection fails.
140+
"""
141+
# First, try detection based on file extension and content
142+
sbom_type = sbom_detection(file_path)
143+
if sbom_type:
144+
return sbom_type
50145

51-
except (json.JSONDecodeError, ET.ParseError):
52-
return None
146+
# If primary detection fails, try content-based detection as a fallback
147+
return detect_sbom_type_from_content(file_path)

0 commit comments

Comments
 (0)