Skip to content

Commit

Permalink
Improve purl prefix for os packages
Browse files Browse the repository at this point in the history
Signed-off-by: Prabhu Subramanian <[email protected]>
  • Loading branch information
prabhu committed Mar 20, 2024
1 parent 92b33f2 commit 547ca69
Show file tree
Hide file tree
Showing 9 changed files with 148 additions and 57 deletions.
7 changes: 2 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
# Introduction

This repo is a vulnerability database and package search for sources such as Aqua Security vuln-list, OSV, NVD, GitHub, and NPM. Vulnerability data are downloaded from the sources and stored in a sqlite based storage with indexes to allow offline access and quick searches.
This repo is a vulnerability database and package search for sources such as AppThreat vuln-list, OSV, NVD, and GitHub. Vulnerability data are downloaded from the sources and stored in a sqlite based storage with indexes to allow offline access and quick searches.

## Vulnerability Data sources

- Linux [vuln-list](https://github.com/appthreat/vuln-list) (Forked from AquaSecurity)
- OSV (1)
- NVD (2)
- NVD
- GitHub
- NPM

1 - We exclude Linux and oss-fuzz feeds by default. Set the environment variable `OSV_INCLUDE_FUZZ` to include them.

2 - We exclude hardware (h) by default. Set the environment variable `NVD_EXCLUDE_TYPES` to exclude additional types such as OS (o) or application (a). An empty value means include all categories. Comma-separated values are allowed. Eg: `o,h`

## Linux distros

- AlmaLinux
Expand Down
63 changes: 63 additions & 0 deletions contrib/cpe_research.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from typing import Any

import apsw
import orjson

from vdb.lib import KNOWN_PKG_TYPES, db6
from vdb.lib.cve_model import CVE, CVE1


def get_cve_data(db_conn, index_hits: list[dict, Any]):
"""Get CVE data for the index results
Args:
db_conn: DB Connection or None to create a new one
index_hits: Hits from one of the search methods
search_str: Original search string used
Returns:
generator: generator for CVE data with original source data as a pydantic model
"""
if not db_conn:
db_conn, _ = db6.get(read_only=True)
for ahit in index_hits:
results: apsw.Cursor = db_conn.execute(
"SELECT cve_id, type, namespace, name, json_object('source', source_data) FROM cve_data WHERE cve_id = ? AND type = ? ORDER BY cve_id DESC;",
(ahit[0], ahit[1]),
)
for res in results:
yield {
"cve_id": res[0],
"type": res[1],
"namespace": res[2],
"name": res[3],
"purl_prefix": ahit[-1],
"source_data": (
CVE(
root=CVE1.model_validate(
orjson.loads(res[4])["source"], strict=False
)
)
if res[4]
else None
)
}


def get_unmapped_namespaces() -> list:
"""Get a list of namespaces without a precise purl prefix"""
db_conn, index_conn = db6.get(read_only=True)
raw_hits = index_conn.execute(f"""select distinct cve_id, type, namespace, name, purl_prefix from cve_index where type not in ({', '.join([f"'{p}'" for p in KNOWN_PKG_TYPES])})""")
for ahit in raw_hits:
data_list_gen = get_cve_data(db_conn, [ahit])
for data_list in data_list_gen:
source_data: CVE1 = data_list["source_data"].root
affected = source_data.containers.cna.affected.root
cpes = [", ".join([b.root for b in a.cpes]) for a in affected]
references = source_data.containers.cna.references.root
ref_urls = [str(a.url.root) for a in references]
print(data_list["cve_id"], data_list["type"], data_list["namespace"], data_list["name"], data_list["purl_prefix"], cpes, ref_urls)


if __name__ == "__main__":
get_unmapped_namespaces()
10 changes: 6 additions & 4 deletions vdb/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

# Known application package types
# See https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst
# chainguard and wolfi has been added for suppression purposes since the data quality is poor
KNOWN_PKG_TYPES = [
"alpm",
"bitbucket",
Expand All @@ -33,7 +34,6 @@
"gem",
"rubygems",
"golang",
"crates",
"clojars",
"conan",
"pub",
Expand All @@ -50,20 +50,21 @@
"linux",
"swid",
"oss-fuzz",
"ebuild"
"ebuild",
"swift",
]

# Maps variations of string to package types
PKG_TYPES_MAP = {
"composer": ["php", "laravel", "wordpress", "joomla"],
"maven": ["jenkins", "java", "kotlin", "groovy", "clojars", "hackage"],
"npm": ["javascript", "node.js", "nodejs"],
"npm": ["javascript", "node.js", "nodejs", "npmjs"],
"nuget": [".net_framework", "csharp", ".net_core", "asp.net"],
"pypi": ["python"],
"gem": ["ruby"],
"rubygems": ["ruby", "gem"],
"golang": ["go"],
"cargo": ["rust", "crates.io", "cargo"],
"cargo": ["rust", "crates.io", "crates"],
"pub": ["dart"],
"hex": ["elixir"],
"github": ["actions"],
Expand All @@ -78,6 +79,7 @@
"suse",
"opensuse",
"fedora",
"fedoraproject"
],
"alpm": ["arch", "archlinux"],
"ebuild": ["gentoo", "portage"]
Expand Down
44 changes: 22 additions & 22 deletions vdb/lib/aqua.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ def alsa_to_vuln(cve_data):
references=references,
description="",
vectorString=vector_string,
vendor=vendor,
product=pkg_name,
vendor="rpm",
product=f"{vendor}/{pkg_name}",
version="*",
edition="*",
version_start_including=version_start_including,
Expand Down Expand Up @@ -222,9 +222,9 @@ def alas_rlsa_to_vuln(cve_data, vendor):
"""Amazon Linux"""
ret_data = []
packages = cve_data.get("packages", [])
if not packages or not len(packages) > 0:
return ret_data
cve_id = cve_data.get("id")
if not packages or cve_id in ("CVE-PENDING",) or not len(packages) > 0:
return ret_data
cwe_id = ""
cve_references = cve_data.get("references", [])
references = []
Expand Down Expand Up @@ -272,8 +272,8 @@ def alas_rlsa_to_vuln(cve_data, vendor):
references=references,
description="",
vectorString=vector_string,
vendor=vendor,
product=pkg_name,
vendor="rpm",
product=f"{vendor}/{pkg_name}",
version="*",
edition="*",
version_start_including=version_start_including,
Expand Down Expand Up @@ -372,8 +372,8 @@ def ubuntu_to_vuln(cve_data):
references=references,
description="",
vectorString=vector_string,
vendor=vendor,
product=full_pkg_name,
vendor="deb",
product=f"{vendor}/{full_pkg_name}",
version="*",
edition=distro_name,
version_start_including=version_start_including,
Expand Down Expand Up @@ -483,8 +483,8 @@ def redhat_to_vuln(cve_data):
references=references,
description="",
vectorString=vector_string,
vendor="redhat",
product=pkg_name,
vendor="rpm",
product=f"redhat/{pkg_name}",
version="*",
edition=edition,
version_start_including=version_start_including,
Expand Down Expand Up @@ -549,8 +549,8 @@ def arch_to_vuln(cve_data):
references=references,
description="",
vectorString=vector_string,
vendor="arch",
product=pkg_name,
vendor="alpm",
product=f"arch/{pkg_name}",
version="*",
edition="*",
version_start_including=version_start_including,
Expand Down Expand Up @@ -645,8 +645,8 @@ def suse_to_vuln(self, cve_data):
references=references,
description="",
vectorString=vector_string,
vendor="suse",
product=pkg_name,
vendor="rpm",
product=f"suse/{pkg_name}",
version="*",
edition="*",
version_start_including=version_start_including,
Expand Down Expand Up @@ -717,8 +717,8 @@ def photon_to_vuln(cve_data):
references=references,
description="",
vectorString=vector_string,
vendor="photon",
product=pkg_name,
vendor="rpm",
product=f"photon/{pkg_name}",
version="*",
edition=distro_name,
version_start_including=version_start_including,
Expand Down Expand Up @@ -845,8 +845,8 @@ def debian_to_vuln(cve_data):
references=references,
description="",
vectorString=vector_string,
vendor=vendor,
product=pkg_name,
vendor="deb",
product=f"{vendor}/{pkg_name}",
version="*",
edition=distro_name if distro_name else "*",
version_start_including=version_start_including,
Expand Down Expand Up @@ -891,9 +891,9 @@ def wolfi_to_vuln(cve_data):
for fix_version_start_including, cve_list in cve_data.get("secfixes").items():
for cve_id in cve_list:
version_start_including = ""
version_end_including = ""
version_end_including = "*" if fix_version_start_including == "0" else ""
version_start_excluding = ""
version_end_excluding = fix_version_start_including
version_end_excluding = fix_version_start_including if fix_version_start_including != "0" else ""
fix_version_end_including = ""
fix_version_start_excluding = ""
fix_version_end_excluding = ""
Expand All @@ -904,8 +904,8 @@ def wolfi_to_vuln(cve_data):
references=references,
description="",
vectorString=vector_string,
vendor=assigner,
product=pkg_name,
vendor="apk",
product=f"{assigner}/{pkg_name}",
version="*",
edition="*",
version_start_including=version_start_including,
Expand Down
28 changes: 22 additions & 6 deletions vdb/lib/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,6 @@

VULN_LIST_URL = "https://github.com/appthreat/vuln-list/archive/refs/heads/main.zip"

# CVE types to exclude - hardware
nvd_exclude_types = ["h"]
if os.getenv("NVD_EXCLUDE_TYPES") is not None:
nvd_exclude_types = os.getenv("NVD_EXCLUDE_TYPES", "").split(",")

# Placeholder fix version to use to indicate max versions
PLACEHOLDER_FIX_VERSION = "99.99.9"

Expand Down Expand Up @@ -117,11 +112,32 @@

VENDOR_TO_VERS_SCHEME = {
"almalinux": "rpm",
"rocky": "rpm",
"photon": "rpm",
"ubuntu": "deb",
"debian": "deb",
"suse": "rpm",
"redhat": "rpm",
"opensuse": "rpm",
"alpine": "apk",
"gentoo": "ebuild"
"gentoo": "ebuild",
"amazon": "rpm",
"wolfi": "apk",
"chainguard": "apk"
}

OS_PKG_TYPES = (
"deb",
"apk",
"rpm",
"swid",
"alpm",
"docker",
"oci",
"container",
"generic",
"qpkg",
"buildroot",
"coreos",
"ebuild",
)
16 changes: 14 additions & 2 deletions vdb/lib/cve.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,12 @@ def to_cve_affected(avuln: Vulnerability) -> Affected | None:
package_name = parts.group("package")
if "/" in product:
tmp_a = product.split("/")
if len(tmp_a) != 2:
# ubuntu/upstream/virtualbox should become
# product=ubuntu and package_name=upstream/virtualbox
if vendor in config.OS_PKG_TYPES:
product = tmp_a[0]
package_name = "/".join(tmp_a[1:])
elif len(tmp_a) != 2:
if len(tmp_a) > 2 and vendor in ("generic", "swift"):
product = os.path.dirname(product)
package_name = os.path.basename(package_name)
Expand All @@ -202,8 +207,15 @@ def to_cve_affected(avuln: Vulnerability) -> Affected | None:
if config.VENDOR_TO_VERS_SCHEME.get(vendor):
vendor = config.VENDOR_TO_VERS_SCHEME.get(vendor)
# This prevents cargo:cargo or nuget:nuget
if product == vendor and vendor in KNOWN_PKG_TYPES:
# or openssl:openssl:openssl
if product == vendor and (package_name == product or vendor in KNOWN_PKG_TYPES):
product = None
# Deal with NVD mess such as npmjs or crates
if vendor not in KNOWN_PKG_TYPES:
for k, v in PKG_TYPES_MAP.items():
if vendor.lower() in v:
vendor = k
break
p = Product(
vendor=vendor,
product=product,
Expand Down
Loading

0 comments on commit 547ca69

Please sign in to comment.