Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve git url detection for generic purls #109

Merged
merged 1 commit into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Introduction

This repo is a vulnerability database and package search for sources such as AppThreat vuln-list, OSV, NVD, and GitHub. Vulnerability data are downloaded from the sources and stored in a sqlite based storage with indexes to allow offline access and quick searches.
This repo is a vulnerability database and package search for sources such as AppThreat vuln-list, OSV, NVD, and GitHub. Vulnerability data are downloaded from the sources and stored in a sqlite based storage with indexes to allow offline access and efficient searches.

## Vulnerability Data sources

Expand Down Expand Up @@ -86,13 +86,16 @@ It is possible to customize the cache behavior by increasing the historic data p
- NVD_START_YEAR - Default: 2018. Supports up to 2002
- GITHUB_PAGE_COUNT - Default: 2. Supports up to 20

### Basic search
## CLI search

It is possible to perform a simple search using the cli.
It is possible to perform a range of searches using the cli.

```shell
vdb --search pkg:pypi/[email protected]

# Search based on a purl prefix
vdb --search pkg:pypi/xml2dict

# Full url and short form for swift
vdb --search "pkg:swift/github.com/vapor/[email protected]"

Expand Down
106 changes: 84 additions & 22 deletions contrib/cpe_research.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@

import apsw
import orjson
from rich.console import Console
from rich.live import Live
from rich.table import Table

from vdb.lib import KNOWN_PKG_TYPES, db6
from vdb.lib import KNOWN_PKG_TYPES, db6, CPE_FULL_REGEX
from vdb.lib.cve_model import CVE, CVE1

console = Console(markup=False, highlight=False, emoji=False)

purl_proposal_cache = {}

def get_cve_data(db_conn, index_hits: list[dict, Any]):
"""Get CVE data for the index results
Expand All @@ -22,42 +28,98 @@ def get_cve_data(db_conn, index_hits: list[dict, Any]):
db_conn, _ = db6.get(read_only=True)
for ahit in index_hits:
results: apsw.Cursor = db_conn.execute(
"SELECT cve_id, type, namespace, name, json_object('source', source_data) FROM cve_data WHERE cve_id = ? AND type = ? ORDER BY cve_id DESC;",
"SELECT distinct json_object('source', source_data) FROM cve_data WHERE cve_id = ? AND type = ? ORDER BY cve_id DESC;",
(ahit[0], ahit[1]),
)
for res in results:
yield {
"cve_id": res[0],
"type": res[1],
"namespace": res[2],
"name": res[3],
"purl_prefix": ahit[-1],
"source_data": (
CVE(
root=CVE1.model_validate(
orjson.loads(res[4])["source"], strict=False
orjson.loads(res[0])["source"], strict=False
)
)
if res[4]
if res[0]
else None
)
),
}


def get_unmapped_namespaces() -> list:
"""Get a list of namespaces without a precise purl prefix"""
def propose_pseudo_purls() -> list:
"""Get a list of namespaces without a precise purl prefix and propose a pseudo purls"""
db_conn, index_conn = db6.get(read_only=True)
raw_hits = index_conn.execute(f"""select distinct cve_id, type, namespace, name, purl_prefix from cve_index where type not in ({', '.join([f"'{p}'" for p in KNOWN_PKG_TYPES])})""")
for ahit in raw_hits:
data_list_gen = get_cve_data(db_conn, [ahit])
for data_list in data_list_gen:
source_data: CVE1 = data_list["source_data"].root
affected = source_data.containers.cna.affected.root
cpes = [", ".join([b.root for b in a.cpes]) for a in affected]
references = source_data.containers.cna.references.root
ref_urls = [str(a.url.root) for a in references]
print(data_list["cve_id"], data_list["type"], data_list["namespace"], data_list["name"], data_list["purl_prefix"], cpes, ref_urls)
ptypes = KNOWN_PKG_TYPES
# These vendors are causing noise and slow-downs
ptypes.extend(
[
"oracle",
"microsoft",
"adobe",
"f5",
"dell",
"cisco",
"symantec",
"gigabyte",
"mozilla",
"wireshark",
"schneider-electric",
"ibm",
"fujitsu",
"apple",
"netapp",
"synology",
"citrix",
]
)
raw_hits = index_conn.execute(
f"""select distinct cve_id, type, namespace, name, purl_prefix from cve_index where type not in ({', '.join([f"'{p}'" for p in ptypes])})"""
)
table = Table(title="Results", highlight=False, show_lines=True)
table.add_column("PURL prefix")
table.add_column("CPEs")
table.add_column("References")
with Live(
table, console=console, refresh_per_second=4, vertical_overflow="visible"
):
for ahit in raw_hits:
data_list_gen = get_cve_data(db_conn, [ahit])
for data_list in data_list_gen:
source_data: CVE1 = data_list["source_data"].root
if not source_data.containers.cna.references:
continue
references = source_data.containers.cna.references.root
ref_urls = [
str(a.url.root).lower()
for a in references
if "git" in str(a.url.root).lower()
]
if not ref_urls:
continue
purl_prefix = data_list["purl_prefix"]
affected = source_data.containers.cna.affected.root
cpes = ["\n".join([b.root for b in a.cpes]) for a in affected]
generic_cpes = [
acpe for acpe in cpes if acpe.startswith("cpe:2.3:a:generic")
]
proposed_purls = []
for generic_cpe in generic_cpes:
all_parts = CPE_FULL_REGEX.match(generic_cpe)
proposed_purl = f"pkg:generic/{all_parts.group('package')}"
version = all_parts.group("version")
if version and version != "*":
proposed_purl = f"{proposed_purl}@{version}"
proposed_purls.append(proposed_purl)
if proposed_purls:
purl_proposal_cache[purl_prefix] = proposed_purls
elif purl_proposal_cache.get(purl_prefix):
proposed_purls = purl_proposal_cache[purl_prefix]
table.add_row(
purl_prefix + "\n" + "\n".join(proposed_purls),
cpes[0],
"\n".join(ref_urls),
)


if __name__ == "__main__":
get_unmapped_namespaces()
propose_pseudo_purls()
2 changes: 1 addition & 1 deletion vdb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
for _ in ("httpx",):
logging.getLogger(_).disabled = True

AT_LOGO = """
AT_LOGO = r"""
___
/\ ._ ._ | |_ ._ _ _. _|_
/--\ |_) |_) | | | | (/_ (_| |_
Expand Down
22 changes: 11 additions & 11 deletions vdb/lib/aqua.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,28 +67,28 @@ def fetch(self, url):
return []

def convert(self, cve_data):
if cve_data.get("vulnStatus"):
return self.nvd_api_to_vuln(cve_data)
if cve_data.get("updateinfo_id"):
return self.alsa_to_vuln(cve_data)
elif cve_data.get("id", "").startswith("ALAS"):
if cve_data.get("id", "").startswith("ALAS"):
return self.alas_rlsa_to_vuln(cve_data, "amazon")
elif cve_data.get("id", "").startswith("RLSA"):
if cve_data.get("id", "").startswith("RLSA"):
return self.alas_rlsa_to_vuln(cve_data, "rocky")
elif cve_data.get("Candidate"):
if cve_data.get("Candidate"):
return self.ubuntu_to_vuln(cve_data)
elif cve_data.get("affected_release"):
if cve_data.get("affected_release"):
return self.redhat_to_vuln(cve_data)
elif cve_data.get("name", "").startswith("AVG"):
if cve_data.get("name", "").startswith("AVG"):
return self.arch_to_vuln(cve_data)
elif cve_data.get("Tracking"):
if cve_data.get("Tracking"):
return self.suse_to_vuln(cve_data)
elif cve_data.get("os_version"):
if cve_data.get("os_version"):
return self.photon_to_vuln(cve_data)
elif cve_data.get("Annotations") and cve_data.get("Header"):
if cve_data.get("Annotations") and cve_data.get("Header"):
return self.debian_to_vuln(cve_data)
elif cve_data.get("secfixes"):
if cve_data.get("secfixes"):
return self.wolfi_to_vuln(cve_data)
elif cve_data.get("vulnStatus"):
return self.nvd_api_to_vuln(cve_data)
return []

@staticmethod
Expand Down
1 change: 0 additions & 1 deletion vdb/lib/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@
"docker",
"oci",
"container",
"generic",
"qpkg",
"buildroot",
"coreos",
Expand Down
2 changes: 1 addition & 1 deletion vdb/lib/cve.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def to_cve_affected(avuln: Vulnerability) -> Affected | None:
# Similar to purl type
vendor = parts.group("vendor")
# Similar to purl namespace
product = parts.group("package")
product = parts.group("package").removesuffix("\\").removesuffix("!")
# Similar to purl name
package_name = parts.group("package")
if "/" in product:
Expand Down
Loading
Loading