Skip to content

Commit

Permalink
go back to json which is surprisingly more efficient than blob
Browse files Browse the repository at this point in the history
Signed-off-by: Prabhu Subramanian <[email protected]>
  • Loading branch information
prabhu committed Mar 19, 2024
1 parent 54a4234 commit 6dc099c
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 44 deletions.
78 changes: 40 additions & 38 deletions vdb/lib/cve.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,41 +368,43 @@ def store(self, data: list[Vulnerability]):

def store5(self, data: list[CVE]):
"""Store the CVE and index data into the SQLite database"""
for d in data:
cve_id = d.cveMetadata.cveId
cve_id = cve_id.model_dump(mode="python")
source_data = d.model_dump(mode="json",
exclude_defaults=True,
exclude_unset=True,
exclude_none=True)
source_data_str = orjson.dumps(source_data).decode("utf-8", "ignore")
source_hash = calculate_hash(source_data_str)
if d.containers.cna and d.containers.cna.affected:
for affected in d.containers.cna.affected.root:
vers = to_purl_vers(affected.vendor, affected.versions)
pkg_key = f"{cve_id}|{affected.vendor}|{affected.product}|{affected.packageName}|{source_hash}"
index_pkg_key = f"{cve_id}|{affected.vendor}|{affected.product}|{affected.packageName}|{vers}"
# Filter obvious duplicates
if not source_completed_keys.get(pkg_key):
self.db_conn.execute(
"INSERT INTO cve_data values(?, ?, ?, ?, jsonb(?), ?, ?);", (
cve_id,
affected.vendor,
affected.product,
affected.packageName,
source_data_str,
None,
source_hash
))
source_completed_keys[pkg_key] = True
if not index_completed_keys.get(index_pkg_key):
self.index_conn.execute(
"INSERT INTO cve_index values(?, ?, ?, ?, ?);", (
cve_id,
affected.vendor,
affected.product,
affected.packageName,
vers
)
)
index_completed_keys[index_pkg_key] = True
with self.db_conn as dbc:
with self.index_conn as indexc:
for d in data:
cve_id = d.cveMetadata.cveId
cve_id = cve_id.model_dump(mode="python")
source_data = d.model_dump(mode="json",
exclude_defaults=True,
exclude_unset=True,
exclude_none=True)
source_data_str = orjson.dumps(source_data).decode("utf-8", "ignore")
source_hash = calculate_hash(source_data_str)
if d.containers.cna and d.containers.cna.affected:
for affected in d.containers.cna.affected.root:
vers = to_purl_vers(affected.vendor, affected.versions)
pkg_key = f"{cve_id}|{affected.vendor}|{affected.product}|{affected.packageName}|{source_hash}"
index_pkg_key = f"{cve_id}|{affected.vendor}|{affected.product}|{affected.packageName}|{vers}"
# Filter obvious duplicates
if not source_completed_keys.get(pkg_key):
dbc.execute(
"INSERT INTO cve_data values(?, ?, ?, ?, json(?), ?, ?);", (
cve_id,
affected.vendor,
affected.product,
affected.packageName,
source_data_str,
None,
source_hash
))
source_completed_keys[pkg_key] = True
if not index_completed_keys.get(index_pkg_key):
indexc.execute(
"INSERT INTO cve_index values(?, ?, ?, ?, ?);", (
cve_id,
affected.vendor,
affected.product,
affected.packageName,
vers
)
)
index_completed_keys[index_pkg_key] = True
4 changes: 1 addition & 3 deletions vdb/lib/db6.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,13 @@
def ensure_schemas(db_conn_obj: apsw.Connection, index_conn_obj: apsw.Connection):
"""Create the sqlite tables and indexes in case they don't exist"""
db_conn_obj.execute(
"CREATE TABLE if not exists cve_data(cve_id TEXT NOT NULL, type TEXT NOT NULL, namespace TEXT, name TEXT NOT NULL, source_data BLOB NOT NULL, override_data BLOB, source_data_hash TEXT NOT NULL);")
"CREATE TABLE if not exists cve_data(cve_id TEXT NOT NULL, type TEXT NOT NULL, namespace TEXT, name TEXT NOT NULL, source_data JSON NOT NULL, override_data JSON, source_data_hash TEXT NOT NULL);")
db_conn_obj.pragma("synchronous", "OFF")
db_conn_obj.pragma("journal_mode", "MEMORY")
db_conn_obj.pragma("auto_vacuum", "FULL")
index_conn_obj.execute(
"CREATE TABLE if not exists cve_index(cve_id TEXT NOT NULL, type TEXT NOT NULL, namespace TEXT, name TEXT NOT NULL, vers TEXT NOT NULL);")
index_conn_obj.pragma("synchronous", "OFF")
index_conn_obj.pragma("journal_mode", "MEMORY")
index_conn_obj.pragma("auto_vacuum", "FULL")


def get(db_file: str = config.VDB_BIN_FILE, index_file: str = config.VDB_BIN_INDEX, read_only=False) -> (
Expand Down
6 changes: 3 additions & 3 deletions vdb/lib/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get_cve_data(db_conn, index_hits: list[dict, Any], search_str: str) -> list[
data_list = []
for ahit in index_hits:
results = exec_query(db_conn,
"SELECT cve_id, type, namespace, name, json_object('source', source_data), json_object('override', override_data) FROM cve_data WHERE cve_id = ? AND type = ? ORDER BY cve_id DESC;",
"SELECT cve_id, type, namespace, name, source_data, override_data FROM cve_data WHERE cve_id = ? AND type = ? ORDER BY cve_id DESC;",
(ahit["cve_id"], ahit["type"]))
for res in results:
data_list.append({
Expand All @@ -48,8 +48,8 @@ def get_cve_data(db_conn, index_hits: list[dict, Any], search_str: str) -> list[
"name": res[3],
"matching_vers": ahit["vers"],
"matched_by": search_str,
"source_data": CVE.model_validate(orjson.loads(res[4])["source"], strict=False) if res[4] else None,
"override_data": orjson.loads(res[5])["override"] if res[5] else None
"source_data": CVE.model_validate(orjson.loads(res[4]), strict=False) if res[4] else None,
"override_data": orjson.loads(res[5]) if res[5] else None
})
return data_list

Expand Down

0 comments on commit 6dc099c

Please sign in to comment.