Skip to content

Commit 61ef39f

Browse files
committed
Improved zip summary performance.
1 parent d43b5a4 commit 61ef39f

File tree

3 files changed

+42
-59
lines changed

3 files changed

+42
-59
lines changed

src/downloader/jobs/open_zip_contents_worker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,4 @@ def operate_on(self, job: OpenZipContentsJob) -> WorkerResult: # type: ignore[o
103103

104104
logger.bench('OpenZipContentsWorker launching recovery process index...', job.db.db_id, job.zip_id)
105105

106-
return create_fetch_jobs(self._ctx, job.db.db_id, invalid_files, job.zip_description.get('base_files_url', job.db.base_files_url))
106+
return create_fetch_jobs(self._ctx, job.db.db_id, invalid_files, [], job.zip_description.get('base_files_url', job.db.base_files_url)), None

src/downloader/jobs/process_db_main_worker.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,15 @@ def operate_on(self, job: ProcessDbMainJob) -> WorkerResult: # type: ignore[ove
6060
zip_jobs = []
6161
zip_job_tags = []
6262

63+
logger.bench('zip_summaries start: ', job.db.db_id)
64+
zip_summaries = read_only_store.zip_summaries()
65+
logger.bench('zip_summaries end: ', job.db.db_id)
66+
6367
for zip_id, zip_description in job.db.zips.items():
6468
#if zip_id != 'cheats_folder_psx': continue
65-
zip_job, err = _make_zip_job(ZipJobContext(zip_id=zip_id, zip_description=zip_description, config=config, job=job), self._ctx.logger)
69+
logger.bench('make zip job start: ', job.db.db_id, zip_id)
70+
zip_job, err = _make_zip_job(zip_summaries.get(zip_id, None), ZipJobContext(zip_id=zip_id, zip_description=zip_description, config=config, job=job))
71+
logger.bench('make zip job end: ', job.db.db_id, zip_id)
6672
if err is not None:
6773
self._ctx.swallow_error(err)
6874
job.ignored_zips.append(zip_id)
@@ -98,16 +104,12 @@ def operate_on(self, job: ProcessDbMainJob) -> WorkerResult: # type: ignore[ove
98104
return next_jobs, None
99105

100106

101-
def _make_zip_job(z: ZipJobContext, logger: Logger) -> Tuple[Job, Optional[Exception]]:
102-
logger.bench('make zip job start: ', z.job.db.db_id, z.zip_id)
107+
def _make_zip_job(index: Optional, z: ZipJobContext) -> Tuple[Job, Optional[Exception]]:
103108
try:
104109
check_zip(z.zip_description, z.job.db.db_id, z.zip_id)
105110
except Exception as e:
106111
return NilJob(), e
107112
if 'summary_file' in z.zip_description:
108-
logger.bench('zip_summary start: ', z.job.db.db_id, z.zip_id)
109-
index = z.job.store.read_only().zip_summary(z.zip_id)
110-
logger.bench('zip_summary end: ', z.job.db.db_id, z.zip_id)
111113
process_zip_job = None if index is None else _make_process_zip_job_from_ctx(z, zip_summary=index, has_new_zip_summary=False)
112114

113115
# if there is a recent enough index in the store, use it
@@ -119,7 +121,6 @@ def _make_zip_job(z: ZipJobContext, logger: Logger) -> Tuple[Job, Optional[Excep
119121
else:
120122
job = _make_process_zip_job_from_ctx(z, zip_summary=z.zip_description['internal_summary'], has_new_zip_summary=True)
121123

122-
logger.bench('make zip job end: ', z.job.db.db_id, z.zip_id)
123124
return job, None
124125

125126

src/downloader/local_store_wrapper.py

Lines changed: 33 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -546,58 +546,40 @@ def list_missing_files(self, db_files):
546546
files.update(external['files'])
547547
return {f: d for f, d in files.items() if f not in db_files}
548548

549-
def zip_summary(self, zip_id) -> Optional[Dict[str, Any]]:
550-
files = {}
551-
folders = {}
552-
for file_path, file_description in self._aggregated_summary['files'].items():
553-
if 'zip_id' in file_description and file_description['zip_id'] == zip_id:
554-
# @TODO: This if should not be necessary if we store all the zip information on the store
555-
# Explicit asking for games and cheats is a hack, as this should only be declared in
556-
# the database information. Remove ASAP
557-
if file_path.startswith('games') or file_path.startswith('cheats'):
558-
files['|' + file_path] = file_description
559-
else:
560-
files[file_path] = file_description
561-
562-
for folder_path, folder_description in self._aggregated_summary['folders'].items():
563-
if 'zip_id' in folder_description and folder_description['zip_id'] == zip_id:
564-
# @TODO: This if should not be necessary if we store all the zip information on the store
565-
# Explicit asking for games and cheats is a hack, as this should only be declared in
566-
# the database information. Remove ASAP
567-
if folder_path.startswith('games') or folder_path.startswith('cheats'):
568-
folders['|' + folder_path] = folder_description
569-
else:
570-
folders[folder_path] = folder_description
571-
572-
for zip_id, zip_description in self._store.get('filtered_zip_data', {}).items():
573-
if zip_id != zip_id:
574-
continue
549+
def zip_summaries(self) -> dict[str, Any]:
550+
grouped = defaultdict(lambda: {'files': {}, 'folders': {}})
551+
552+
# @TODO: This if startswith('games') should be removed when we store all the zip information on the store
553+
# Explicit asking for games is a hack, as this should only be declared in the database information. Remove ASAP
554+
555+
for fp, fd in self._store.get('files', {}).items():
556+
if 'zip_id' not in fd: continue
557+
grouped[fd['zip_id']]['files'][f'|{fp}' if fp.startswith('games') else fp] = fd
558+
559+
for dp, dd in self._store.get('folders', {}).items():
560+
if 'zip_id' not in dd: continue
561+
grouped[dd['zip_id']]['folders'][f'|{dp}' if dp.startswith('games') else dp] = dd
562+
563+
for summary in self._store.get('external', {}).values():
564+
for fp, fd in summary.get('files', {}).items():
565+
if 'zip_id' not in fd: continue
566+
grouped[fd['zip_id']]['files'][f'|{fp}' if fp.startswith('games') else fp] = fd
567+
568+
for summary in self._store.get('external', {}).values():
569+
for dp, dd in summary.get('folders', {}).items():
570+
if 'zip_id' not in dd: continue
571+
grouped[dd['zip_id']]['folders'][f'|{dp}' if dp.startswith('games') else dp] = dd
572+
573+
for zip_id, summary in self._store.get('filtered_zip_data', {}).items():
574+
for fp, fd in summary.get('files', {}).items():
575+
grouped[zip_id]['files'][f'|{fp}' if fp.startswith('games') else fp] = fd
576+
for dp, dd in summary.get('folders', {}).items():
577+
grouped[zip_id]['folders'][f'|{dp}' if dp.startswith('games') else dp] = dd
578+
579+
for zip_id, data in grouped.items():
580+
data['hash'] = self._store.get('zips', {}).get(zip_id, {}).get('summary_file', {}).get('hash', NO_HASH_IN_STORE_CODE)
575581

576-
for file_path, file_description in zip_description['files'].items():
577-
if 'zip_id' in file_description and file_description['zip_id'] == zip_id:
578-
# @TODO: This if should not be necessary if we store all the zip information on the store
579-
# Explicit asking for games and cheats is a hack, as this should only be declared in
580-
# the database information. Remove ASAP
581-
if file_path.startswith('games') or file_path.startswith('cheats'):
582-
files['|' + file_path] = file_description
583-
else:
584-
files[file_path] = file_description
585-
586-
for folder_path, folder_description in zip_description['folders'].items():
587-
if 'zip_id' in folder_description and folder_description['zip_id'] == zip_id:
588-
# @TODO: This if should not be necessary if we store all the zip information on the store
589-
# Explicit asking for games and cheats is a hack, as this should only be declared in
590-
# the database information. Remove ASAP
591-
if folder_path.startswith('games') or folder_path.startswith('cheats'):
592-
folders['|' + folder_path] = folder_description
593-
else:
594-
folders[folder_path] = folder_description
595-
596-
if len(files) == 0 and len(folders) == 0:
597-
return None
598-
599-
summary_hash = self._store.get('zips', {}).get(zip_id, {}).get('summary_file', {}).get('hash', NO_HASH_IN_STORE_CODE)
600-
return {'files': files, 'folders': folders, 'hash': summary_hash}
582+
return grouped
601583

602584
@property
603585
def filtered_zip_data(self):

0 commit comments

Comments
 (0)