Skip to content

Commit

Permalink
add inverted index for faster cpe search #92
Browse files Browse the repository at this point in the history
  • Loading branch information
fqrious committed Jan 23, 2025
1 parent 5f79f39 commit 02b1c91
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 22 deletions.
34 changes: 17 additions & 17 deletions vulmatch/server/arango_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@

CPE_RELATIONSHIP_TYPES = {"vulnerable-to": "exploits", "in-pattern": "relies-on"}
CPE_REL_SORT_FIELDS = ["modified_descending", "modified_ascending", "created_descending", "created_ascending"]
CPE_SORT_FIELDS = ['part_descending', 'part_ascending', 'vendor_descending', 'vendor_ascending', 'product_ascending', 'product_descending', 'version_ascending', 'version_descending']
CVE_BUNDLE_TYPES = set([
"vulnerability",
"indicator",
Expand Down Expand Up @@ -176,7 +177,10 @@ def query_as_bool(self, key, default=True):
return default
return query_str.lower() == 'true'


@classmethod
def like_string(cls, string: str):
return '%'+string+'%'

@classmethod
def get_page_params(cls, request):
kwargs = request.GET.copy()
Expand Down Expand Up @@ -524,29 +528,22 @@ def get_softwares(self):
"FILTER doc.id in @ids"
)

if value := self.query_as_array('cpe_match_string'):
bind_vars['cpe_match_string'] = value
if value := self.query.get('cpe_match_string'):
bind_vars['cpe_match_string'] = self.like_string(value)
filters.append(
"FILTER @cpe_match_string[? ANY FILTER CONTAINS(doc.cpe, CURRENT)]"
"FILTER doc.cpe LIKE @cpe_match_string"
)

struct_match = {}
if value := self.query.get('product_type'):
struct_match['part'] = value[0]
filters.append('FILTER doc.x_cpe_struct.part == @struct_match.part')

if value := self.query.get('product'):
struct_match['product'] = value.lower()
filters.append('FILTER CONTAINS(doc.x_cpe_struct.product, @struct_match.product)')

if value := self.query.get('vendor'):
struct_match['vendor'] = value
filters.append('FILTER CONTAINS(doc.x_cpe_struct.vendor, @struct_match.vendor)')

for k in ['version', 'update', 'edition', 'language', 'sw_edition', 'target_sw', 'target_hw', 'other']:
for k in ['product', 'vendor', 'version', 'update', 'edition', 'language', 'sw_edition', 'target_sw', 'target_hw', 'other']:
if v := self.query.get(k):
struct_match[k] = v
filters.append(f'FILTER CONTAINS(doc.x_cpe_struct.{k}, @struct_match.{k})')
struct_match[k] = self.like_string(v)
filters.append(f'FILTER doc.x_cpe_struct.{k} LIKE @struct_match.{k}')

if struct_match:
bind_vars['struct_match'] = struct_match
Expand All @@ -568,14 +565,17 @@ def get_softwares(self):
filters.append('FILTER CONTAINS(doc.name, @name)')

query = """
FOR doc in @@collection
FILTER doc.type == 'software' AND doc._is_latest
FOR doc in @@collection OPTIONS {indexHint: "cpe_search_inv", forceIndexHint: true}
FILTER doc.type == 'software' AND doc._is_latest == TRUE
LET cve_matches = (FOR d in nvd_cve_edge_collection FILTER d._to == doc._id AND d.relationship_type IN ['exploits', 'relies-on'] RETURN [d.relationship_type, d.external_references[0].external_id])
@filters
@sort_stmt
LIMIT @offset, @count
RETURN KEEP(doc, KEYS(doc, true))
""".replace('@filters', '\n'.join(filters))
""".replace('@filters', '\n'.join(filters))\
.replace('@sort_stmt', self.get_sort_stmt(CPE_SORT_FIELDS, doc_name='doc.x_cpe_struct'))
# return HttpResponse(f"""{query}\n// {json.dumps(bind_vars)}""")
return self.execute_query(query, bind_vars=bind_vars)

def get_relationships(self, docs_query, binds):
Expand Down
10 changes: 8 additions & 2 deletions vulmatch/server/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from django.shortcuts import render
from rest_framework import viewsets, filters, status, decorators

from vulmatch.server.arango_helpers import ATLAS_TYPES, CPE_REL_SORT_FIELDS, CPE_RELATIONSHIP_TYPES, CVE_BUNDLE_TYPES, CVE_SORT_FIELDS, LOCATION_TYPES, TLP_TYPES, ArangoDBHelper, ATTACK_TYPES, CWE_TYPES, SOFTWARE_TYPES, CAPEC_TYPES
from vulmatch.server.arango_helpers import ATLAS_TYPES, CPE_REL_SORT_FIELDS, CPE_RELATIONSHIP_TYPES, CPE_SORT_FIELDS, CVE_BUNDLE_TYPES, CVE_SORT_FIELDS, LOCATION_TYPES, TLP_TYPES, ArangoDBHelper, ATTACK_TYPES, CWE_TYPES, SOFTWARE_TYPES, CAPEC_TYPES
from vulmatch.server.autoschema import DEFAULT_400_ERROR
from vulmatch.server.utils import Pagination, Response, Ordering, split_mitre_version
from vulmatch.worker.tasks import new_task
Expand Down Expand Up @@ -394,7 +394,13 @@ class filterset_class(FilterSet):
target_sw = CharFilter(help_text='Characterises the software computing environment within which the product operates (this is the 10th value in the CPE URI).')
target_hw = CharFilter(help_text='Characterises the instruction set architecture (e.g., x86) on which the product being described or identified operates (this is the 11th value in the CPE URI).')
other = CharFilter(help_text='Capture any other general descriptive or identifying information which is vendor- or product-specific and which does not logically fit in any other attribute value (this is the 12th value in the CPE URI).')



@extend_schema(
parameters=[
OpenApiParameter('sort', enum=CPE_SORT_FIELDS, description="Sort results by"),
]
)
@decorators.action(methods=['GET'], url_path="objects", detail=False)
def list_objects(self, request, *args, **kwargs):
return ArangoDBHelper('', request).get_softwares()
Expand Down
15 changes: 12 additions & 3 deletions vulmatch/worker/populate_dbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,18 @@ def create_indexes(db: StandardDatabase):
time = int(datetime.now().timestamp())
for sorter in "created modified name cpe".split():
vertex_collection.add_index(dict(type='persistent', fields=["type", "_is_latest", sorter], inBackground=True, name=f"vulmatch_cve_sort_{sorter}_{time}"))
vertex_collection.add_index(dict(type='persistent', fields=["cpe"], storedValues=["id"], inBackground=True, name=f"vulmatch_cpe"))
vertex_collection.add_index(dict(type='persistent', fields=["name"], inBackground=True, name=f"vulmatch_name"))

vertex_collection.add_index(dict(type='persistent', fields=["cpe"], storedValues=["id"], inBackground=True, name=f"vulmatch_cpe", sparse=True))
vertex_collection.add_index(dict(type='persistent', fields=["type", "cpe"], storedValues=["id"], inBackground=True, name=f"vulmatch_type_cpe"))
vertex_collection.add_index(dict(type='persistent', fields=["name"], inBackground=True, name=f"vulmatch_name"))
db.create_analyzer('norm_en', analyzer_type='norm', properties={ "locale": "en", "accent": False, "case": "lower" })
vertex_collection.add_index(dict(type='inverted', name='cpe_search_inv', fields=[
dict(name='cpe', analyzer='norm_en'),
"id",
"type",
*[dict(name=f'x_cpe_struct.{name}', analyzer='norm_en') for name in ['product', 'vendor', 'version', 'update', 'edition', 'language', 'sw_edition', 'target_sw', 'target_hw', 'other']],
"x_cpe_struct.part"
], inBackground=True))


def create_collections():
#create db/collections
Expand Down

0 comments on commit 02b1c91

Please sign in to comment.