diff --git a/README.md b/README.md index 413c13b..445b294 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Introduction -This repo is a vulnerability database and package search for sources such as AppThreat vuln-list, OSV, NVD, and GitHub. Vulnerability data are downloaded from the sources and stored in a sqlite based storage with indexes to allow offline access and quick searches. +This repo is a vulnerability database and package search for sources such as AppThreat vuln-list, OSV, NVD, and GitHub. Vulnerability data are downloaded from the sources and stored in a sqlite based storage with indexes to allow offline access and efficient searches. ## Vulnerability Data sources @@ -86,13 +86,16 @@ It is possible to customize the cache behavior by increasing the historic data p - NVD_START_YEAR - Default: 2018. Supports up to 2002 - GITHUB_PAGE_COUNT - Default: 2. Supports up to 20 -### Basic search +## CLI search -It is possible to perform a simple search using the cli. +It is possible to perform a range of searches using the cli. ```shell vdb --search pkg:pypi/xml2dict@0.2.2 +# Search based on a purl prefix +vdb --search pkg:pypi/xml2dict + # Full url and short form for swift vdb --search "pkg:swift/github.com/vapor/vapor@4.39.0" diff --git a/vdb/lib/nvd.py b/vdb/lib/nvd.py index 08058f8..3b01197 100644 --- a/vdb/lib/nvd.py +++ b/vdb/lib/nvd.py @@ -29,7 +29,7 @@ # Size of the stream to read and write to the file DOWNLOAD_CHUNK_SIZE = 128 -purl_proposal_cache = defaultdict(set) +purl_proposal_cache = defaultdict(list) def get_version(inc_version: str, exc_version: str) -> str: @@ -90,7 +90,7 @@ def filterable_git_url(url: str, hostname: str) -> bool: "/blog", "/news", "/support/", - "/bug_report" + "/bug_report", ): if part in url.lower(): return True @@ -105,7 +105,11 @@ def get_alt_cpes(cpe_uri, git_urls): for agit_url in git_urls: url_obj = urlparse(agit_url) # Ignore obvious filterable urls - if filterable_git_url(agit_url, url_obj.hostname) and not url_obj.path and not url_obj.query: + if ( + filterable_git_url(agit_url, url_obj.hostname) + and not url_obj.path + and not url_obj.query + ): continue git_repo_name = url_obj.hostname if url_obj.path: @@ -115,8 +119,8 @@ def get_alt_cpes(cpe_uri, git_urls): if p and p not in ("/", "pub", "scm", "cgi-bin", "cgit", "gitweb") ] if paths: - max_path = 3 if len(paths) >= 2 else 2 - git_repo_name = f"""{git_repo_name}{'/'.join(paths[:max_path])}""" + max_path = 2 if len(paths) >= 2 else 1 + git_repo_name = f"""{git_repo_name}/{'/'.join(paths[:max_path])}""" if url_obj.query: query_obj = parse_qs(url_obj.query) # Eg: https://git.eyrie.org/?p=kerberos/remctl.git%3Ba=commit%3Bh=86c7e4 @@ -140,14 +144,14 @@ def get_alt_cpes(cpe_uri, git_urls): continue parsed_git_repo_names[git_repo_name] = True # We only need 2 new aliases - if len(purl_proposal_cache.get(cpe_url, [])) > 2: + if len(purl_proposal_cache.get(cpe_uri, [])) > 2: purl_proposal_cache[cpe_uri].pop(0) - purl_proposal_cache[cpe_uri].add( + purl_proposal_cache[cpe_uri].append( f"cpe:2.3:a:generic:{git_repo_name}:*:*:*:*:*:*:*:*" ) # See if there is something useful in the cache if not alt_cpes: - alt_cpes = list(purl_proposal_cache.get(cpe_uri, [])) + alt_cpes = purl_proposal_cache.get(cpe_uri, []) return alt_cpes