Skip to content

Commit

Permalink
Tweaks
Browse files Browse the repository at this point in the history
Signed-off-by: Prabhu Subramanian <[email protected]>

Improve git url detection for generic purls

Signed-off-by: Prabhu Subramanian <[email protected]>
  • Loading branch information
prabhu committed Mar 21, 2024
1 parent d40b81e commit 2082165
Show file tree
Hide file tree
Showing 8 changed files with 300 additions and 90 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Introduction

This repo is a vulnerability database and package search for sources such as AppThreat vuln-list, OSV, NVD, and GitHub. Vulnerability data are downloaded from the sources and stored in a sqlite based storage with indexes to allow offline access and quick searches.
This repo is a vulnerability database and package search for sources such as AppThreat vuln-list, OSV, NVD, and GitHub. Vulnerability data are downloaded from the sources and stored in a sqlite based storage with indexes to allow offline access and efficient searches.

## Vulnerability Data sources

Expand Down Expand Up @@ -86,13 +86,16 @@ It is possible to customize the cache behavior by increasing the historic data p
- NVD_START_YEAR - Default: 2018. Supports up to 2002
- GITHUB_PAGE_COUNT - Default: 2. Supports up to 20

### Basic search
## CLI search

It is possible to perform a simple search using the cli.
It is possible to perform a range of searches using the cli.

```shell
vdb --search pkg:pypi/[email protected]

# Search based on a purl prefix
vdb --search pkg:pypi/xml2dict

# Full url and short form for swift
vdb --search "pkg:swift/github.com/vapor/[email protected]"

Expand Down
106 changes: 84 additions & 22 deletions contrib/cpe_research.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@

import apsw
import orjson
from rich.console import Console
from rich.live import Live
from rich.table import Table

from vdb.lib import KNOWN_PKG_TYPES, db6
from vdb.lib import KNOWN_PKG_TYPES, db6, CPE_FULL_REGEX
from vdb.lib.cve_model import CVE, CVE1

console = Console(markup=False, highlight=False, emoji=False)

purl_proposal_cache = {}

def get_cve_data(db_conn, index_hits: list[dict, Any]):
"""Get CVE data for the index results
Expand All @@ -22,42 +28,98 @@ def get_cve_data(db_conn, index_hits: list[dict, Any]):
db_conn, _ = db6.get(read_only=True)
for ahit in index_hits:
results: apsw.Cursor = db_conn.execute(
"SELECT cve_id, type, namespace, name, json_object('source', source_data) FROM cve_data WHERE cve_id = ? AND type = ? ORDER BY cve_id DESC;",
"SELECT distinct json_object('source', source_data) FROM cve_data WHERE cve_id = ? AND type = ? ORDER BY cve_id DESC;",
(ahit[0], ahit[1]),
)
for res in results:
yield {
"cve_id": res[0],
"type": res[1],
"namespace": res[2],
"name": res[3],
"purl_prefix": ahit[-1],
"source_data": (
CVE(
root=CVE1.model_validate(
orjson.loads(res[4])["source"], strict=False
orjson.loads(res[0])["source"], strict=False
)
)
if res[4]
if res[0]
else None
)
),
}


def get_unmapped_namespaces() -> list:
"""Get a list of namespaces without a precise purl prefix"""
def propose_pseudo_purls() -> list:
"""Get a list of namespaces without a precise purl prefix and propose a pseudo purls"""
db_conn, index_conn = db6.get(read_only=True)
raw_hits = index_conn.execute(f"""select distinct cve_id, type, namespace, name, purl_prefix from cve_index where type not in ({', '.join([f"'{p}'" for p in KNOWN_PKG_TYPES])})""")
for ahit in raw_hits:
data_list_gen = get_cve_data(db_conn, [ahit])
for data_list in data_list_gen:
source_data: CVE1 = data_list["source_data"].root
affected = source_data.containers.cna.affected.root
cpes = [", ".join([b.root for b in a.cpes]) for a in affected]
references = source_data.containers.cna.references.root
ref_urls = [str(a.url.root) for a in references]
print(data_list["cve_id"], data_list["type"], data_list["namespace"], data_list["name"], data_list["purl_prefix"], cpes, ref_urls)
ptypes = KNOWN_PKG_TYPES
# These vendors are causing noise and slow-downs
ptypes.extend(
[
"oracle",
"microsoft",
"adobe",
"f5",
"dell",
"cisco",
"symantec",
"gigabyte",
"mozilla",
"wireshark",
"schneider-electric",
"ibm",
"fujitsu",
"apple",
"netapp",
"synology",
"citrix",
]
)
raw_hits = index_conn.execute(
f"""select distinct cve_id, type, namespace, name, purl_prefix from cve_index where type not in ({', '.join([f"'{p}'" for p in ptypes])})"""
)
table = Table(title="Results", highlight=False, show_lines=True)
table.add_column("PURL prefix")
table.add_column("CPEs")
table.add_column("References")
with Live(
table, console=console, refresh_per_second=4, vertical_overflow="visible"
):
for ahit in raw_hits:
data_list_gen = get_cve_data(db_conn, [ahit])
for data_list in data_list_gen:
source_data: CVE1 = data_list["source_data"].root
if not source_data.containers.cna.references:
continue
references = source_data.containers.cna.references.root
ref_urls = [
str(a.url.root).lower()
for a in references
if "git" in str(a.url.root).lower()
]
if not ref_urls:
continue
purl_prefix = data_list["purl_prefix"]
affected = source_data.containers.cna.affected.root
cpes = ["\n".join([b.root for b in a.cpes]) for a in affected]
generic_cpes = [
acpe for acpe in cpes if acpe.startswith("cpe:2.3:a:generic")
]
proposed_purls = []
for generic_cpe in generic_cpes:
all_parts = CPE_FULL_REGEX.match(generic_cpe)
proposed_purl = f"pkg:generic/{all_parts.group('package')}"
version = all_parts.group("version")
if version and version != "*":
proposed_purl = f"{proposed_purl}@{version}"
proposed_purls.append(proposed_purl)
if proposed_purls:
purl_proposal_cache[purl_prefix] = proposed_purls
elif purl_proposal_cache.get(purl_prefix):
proposed_purls = purl_proposal_cache[purl_prefix]
table.add_row(
purl_prefix + "\n" + "\n".join(proposed_purls),
cpes[0],
"\n".join(ref_urls),
)


if __name__ == "__main__":
get_unmapped_namespaces()
propose_pseudo_purls()
2 changes: 1 addition & 1 deletion vdb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
for _ in ("httpx",):
logging.getLogger(_).disabled = True

AT_LOGO = """
AT_LOGO = r"""
___
/\ ._ ._ | |_ ._ _ _. _|_
/--\ |_) |_) | | | | (/_ (_| |_
Expand Down
22 changes: 11 additions & 11 deletions vdb/lib/aqua.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,28 +67,28 @@ def fetch(self, url):
return []

def convert(self, cve_data):
if cve_data.get("vulnStatus"):
return self.nvd_api_to_vuln(cve_data)
if cve_data.get("updateinfo_id"):
return self.alsa_to_vuln(cve_data)
elif cve_data.get("id", "").startswith("ALAS"):
if cve_data.get("id", "").startswith("ALAS"):
return self.alas_rlsa_to_vuln(cve_data, "amazon")
elif cve_data.get("id", "").startswith("RLSA"):
if cve_data.get("id", "").startswith("RLSA"):
return self.alas_rlsa_to_vuln(cve_data, "rocky")
elif cve_data.get("Candidate"):
if cve_data.get("Candidate"):
return self.ubuntu_to_vuln(cve_data)
elif cve_data.get("affected_release"):
if cve_data.get("affected_release"):
return self.redhat_to_vuln(cve_data)
elif cve_data.get("name", "").startswith("AVG"):
if cve_data.get("name", "").startswith("AVG"):
return self.arch_to_vuln(cve_data)
elif cve_data.get("Tracking"):
if cve_data.get("Tracking"):
return self.suse_to_vuln(cve_data)
elif cve_data.get("os_version"):
if cve_data.get("os_version"):
return self.photon_to_vuln(cve_data)
elif cve_data.get("Annotations") and cve_data.get("Header"):
if cve_data.get("Annotations") and cve_data.get("Header"):
return self.debian_to_vuln(cve_data)
elif cve_data.get("secfixes"):
if cve_data.get("secfixes"):
return self.wolfi_to_vuln(cve_data)
elif cve_data.get("vulnStatus"):
return self.nvd_api_to_vuln(cve_data)
return []

@staticmethod
Expand Down
1 change: 0 additions & 1 deletion vdb/lib/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@
"docker",
"oci",
"container",
"generic",
"qpkg",
"buildroot",
"coreos",
Expand Down
2 changes: 1 addition & 1 deletion vdb/lib/cve.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def to_cve_affected(avuln: Vulnerability) -> Affected | None:
# Similar to purl type
vendor = parts.group("vendor")
# Similar to purl namespace
product = parts.group("package")
product = parts.group("package").removesuffix("\\").removesuffix("!")
# Similar to purl name
package_name = parts.group("package")
if "/" in product:
Expand Down
Loading

0 comments on commit 2082165

Please sign in to comment.