Skip to content

Commit

Permalink
Adds a new purl prefix column to the index to assist with purl based …
Browse files Browse the repository at this point in the history
…searches

Signed-off-by: Prabhu Subramanian <[email protected]>
  • Loading branch information
prabhu committed Mar 19, 2024
1 parent aeeb38c commit 3b040e7
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 37 deletions.
20 changes: 18 additions & 2 deletions vdb/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,24 @@
import orjson

# Known application package types
# See https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst
KNOWN_PKG_TYPES = [
"alpm",
"bitbucket",
"bitnami",
"cargo"
"composer",
"cocoapods",
"conda",
"cpan",
"cran",
"docker",
"generic",
"github",
"huggingface",
"mlflow",
"qpkg",
"oci",
"maven",
"npm",
"nuget",
Expand Down Expand Up @@ -46,7 +62,7 @@
"gem": ["ruby"],
"rubygems": ["ruby", "gem"],
"golang": ["go"],
"crates": ["rust", "crates.io", "cargo"],
"cargo": ["rust", "crates.io", "cargo"],
"pub": ["dart"],
"hex": ["elixir"],
"github": ["actions"],
Expand All @@ -62,7 +78,7 @@
"opensuse",
"fedora",
],
"alpm": ["arch"],
"alpm": ["arch", "archlinux"],
}

# CPE Regex
Expand Down
16 changes: 12 additions & 4 deletions vdb/lib/cve.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
config,
CPE_FULL_REGEX,
KNOWN_PKG_TYPES,
PKG_TYPES_MAP,
Vulnerability,
VulnerabilityDetail,
VulnerabilitySource,
Expand Down Expand Up @@ -199,9 +200,10 @@ def to_cve_affected(avuln: Vulnerability) -> Affected | None:
product = vendor
# See if we can substitute vers scheme
if config.VENDOR_TO_VERS_SCHEME.get(vendor):
if vendor == product:
product = None
vendor = config.VENDOR_TO_VERS_SCHEME.get(vendor)
# This prevents cargo:cargo or nuget:nuget
if product == vendor and vendor in KNOWN_PKG_TYPES:
product = None
p = Product(
vendor=vendor,
product=product,
Expand Down Expand Up @@ -382,6 +384,11 @@ def store5(self, data: list[CVE]):
if d.containers.cna and d.containers.cna.affected:
for affected in d.containers.cna.affected.root:
vers = to_purl_vers(affected.vendor, affected.versions)
purl_type = affected.vendor if PKG_TYPES_MAP.get(affected.vendor) else "generic"
purl_prefix = f"""pkg:{purl_type}/"""
if affected.product:
purl_prefix = f"{purl_prefix}{affected.product}/"
purl_prefix = f"{purl_prefix}{affected.packageName}"
pkg_key = f"{cve_id}|{affected.vendor}|{affected.product}|{affected.packageName}|{source_hash}"
index_pkg_key = f"{cve_id}|{affected.vendor}|{affected.product}|{affected.packageName}|{vers}"
# Filter obvious duplicates
Expand All @@ -399,12 +406,13 @@ def store5(self, data: list[CVE]):
source_completed_keys[pkg_key] = True
if not index_completed_keys.get(index_pkg_key):
indexc.execute(
"INSERT INTO cve_index values(?, ?, ?, ?, ?);", (
"INSERT INTO cve_index values(?, ?, ?, ?, ?, ?);", (
cve_id,
affected.vendor,
affected.product,
affected.packageName,
vers
vers,
purl_prefix
)
)
index_completed_keys[index_pkg_key] = True
4 changes: 3 additions & 1 deletion vdb/lib/db6.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def ensure_schemas(db_conn_obj: apsw.Connection, index_conn_obj: apsw.Connection
db_conn_obj.pragma("synchronous", "OFF")
db_conn_obj.pragma("journal_mode", "MEMORY")
index_conn_obj.execute(
"CREATE TABLE if not exists cve_index(cve_id TEXT NOT NULL, type TEXT NOT NULL, namespace TEXT, name TEXT NOT NULL, vers TEXT NOT NULL);")
"CREATE TABLE if not exists cve_index(cve_id TEXT NOT NULL, type TEXT NOT NULL, namespace TEXT, name TEXT NOT NULL, vers TEXT NOT NULL, purl_prefix TEXT NOT NULL);")
index_conn_obj.pragma("synchronous", "OFF")
index_conn_obj.pragma("journal_mode", "MEMORY")

Expand Down Expand Up @@ -78,5 +78,7 @@ def optimize_and_close_all():
"CREATE INDEX if not exists cidx3 on cve_index(type, name);")
index_conn.execute(
"CREATE INDEX if not exists cidx4 on cve_index(namespace, name);")
index_conn.execute(
"CREATE INDEX if not exists cidx5 on cve_index(purl_prefix);")
index_conn.execute("VACUUM;")
index_conn.close()
7 changes: 4 additions & 3 deletions vdb/lib/osv.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
"apk": "alpine",
"deb": "debian",
"go": "golang",
"crates.io": "crates",
"crates.io": "cargo",
"swifturl": "swift",
"github actions": "github"
}


Expand Down Expand Up @@ -210,7 +211,7 @@ def to_vuln(cve_data):
pkg_name = f'{purl["namespace"]}/{purl["name"]}'
elif purl.get("name"):
pkg_name = purl["name"]
if ":" in pkg_name and vendor not in ("swift", "swifturl"):
if ":" in pkg_name and vendor.lower() not in ("swift", "swifturl", "github", "github actions"):
tmp_a = pkg_name.split(":")
if len(tmp_a) == 2:
vendor = tmp_a[0]
Expand All @@ -221,7 +222,7 @@ def to_vuln(cve_data):
if pkg_name:
pkg_name_list.append(pkg_name)
# Since swift allows both url and local based lookups, we store both the variations
if vendor in ("swift", "swifturl") and pkg_name.startswith("github.com"):
if vendor in ("swift", "swifturl", "github", "github actions") and pkg_name.startswith("github.com"):
pkg_name_list.append(pkg_name.removeprefix("github.com/"))
# For OS packages, such as alpine OSV appends the os version to the vendor
# Let's remove it and add it to package name
Expand Down
8 changes: 6 additions & 2 deletions vdb/lib/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ def search_by_purl(purl: str, with_data=False) -> list | None:
namespace = purl_obj.get("namespace")
name = purl_obj.get("name")
version = purl_obj.get("version", "*")
args = [ptype]
purl_prefix = f"pkg:{ptype}/"
if namespace:
purl_prefix = f"{purl_prefix}{namespace}/"
purl_prefix = f"{purl_prefix}{name}"
args = [purl_prefix, ptype]
# We enforce a strict search for None
if namespace is not None:
args.append(namespace)
Expand All @@ -95,7 +99,7 @@ def search_by_purl(purl: str, with_data=False) -> list | None:
extra_filter = f"{extra_filter} AND name = ?"
args.append(name)
raw_hits = exec_query(index_conn,
f"SELECT cve_id, type, namespace, name, vers FROM cve_index where type = ?{extra_filter};",
f"SELECT cve_id, type, namespace, name, vers FROM cve_index where purl_prefix = ? OR (type = ?{extra_filter});",
args)
filtered_list = _filter_hits(raw_hits, version)
if with_data:
Expand Down
51 changes: 26 additions & 25 deletions vdb/lib/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,9 +448,9 @@ def trim_epoch(
def vers_compare(compare_ver: str | int | float, vers: str) -> bool:
"""Purl vers based version comparison"""
min_version, max_version, min_excluding, max_excluding = None, None, None, None
if vers == "*":
if vers == "*" or compare_ver is None:
return True
elif vers.startswith("vers:"):
if vers.startswith("vers:"):
vers_parts = vers.split("/")[-1].split("|")
if len(vers_parts) == 1:
single_version = vers_parts[0].strip().replace(" ", "")
Expand Down Expand Up @@ -984,29 +984,30 @@ def convert_md_references(md_text):
def parse_purl(purl_str: str) -> dict:
"""Method to parse a package url string safely"""
purl_obj = None
try:
purl_obj = PackageURL.from_string(purl_str).to_dict() if purl_str else None
# For golang, move everything to name since there is no concept of namespace
if purl_obj and purl_obj.get("type") == "golang" and purl_obj.get("namespace"):
purl_obj["name"] = f'{purl_obj["namespace"]}/{purl_obj["name"]}'
purl_obj["namespace"] = ""
except ValueError:
# Ignore errors
pass
if not purl_obj and purl_str:
tmp_a = purl_str.split("@")[0]
purl_obj = {}
if tmp_a:
tmp_b = tmp_a.split("/")
if tmp_b:
if len(tmp_b) < 2:
purl_obj["name"] = tmp_b[-1].lower()
purl_obj["namespace"] = tmp_b[0].split(":")[-1]
if len(tmp_b) > 2:
namespace = tmp_b[-2]
if tmp_b[-2].startswith("pkg:"):
namespace = tmp_b[-2].split(":")[-1]
purl_obj["namespace"] = namespace
if purl_str and purl_str.startswith("pkg:"):
try:
purl_obj = PackageURL.from_string(purl_str).to_dict() if purl_str else None
# For golang, move everything to name since there is no concept of namespace
if purl_obj and purl_obj.get("type") == "golang" and purl_obj.get("namespace"):
purl_obj["name"] = f'{purl_obj["namespace"]}/{purl_obj["name"]}'
purl_obj["namespace"] = ""
except ValueError:
# Ignore errors
pass
if not purl_obj and purl_str:
tmp_a = purl_str.split("@")[0]
purl_obj = {}
if tmp_a:
tmp_b = tmp_a.split("/")
if tmp_b:
if len(tmp_b) < 2:
purl_obj["name"] = tmp_b[-1].lower()
purl_obj["namespace"] = tmp_b[0].split(":")[-1]
if len(tmp_b) > 2:
namespace = tmp_b[-2]
if tmp_b[-2].startswith("pkg:"):
namespace = tmp_b[-2].split(":")[-1]
purl_obj["namespace"] = namespace
return purl_obj


Expand Down

0 comments on commit 3b040e7

Please sign in to comment.