Skip to content

Commit

Permalink
Improving folder precache optimization.
Browse files Browse the repository at this point in the history
  • Loading branch information
theypsilon committed Feb 11, 2025
1 parent 24b2dd2 commit 3489063
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 13 deletions.
35 changes: 24 additions & 11 deletions src/downloader/file_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def is_folder(self, path: str) -> bool:
"""interface"""

@abstractmethod
def precache_is_file_with_folders(self, folders: List[PathPackage]) -> None:
def precache_is_file_with_folders(self, folders: List[PathPackage], recheck: bool = False) -> None:
"""interface"""

@abstractmethod
Expand Down Expand Up @@ -195,8 +195,8 @@ def are_files(self, file_pkgs: List[PathPackage]) -> Tuple[List[PathPackage], Li
def is_folder(self, path):
return self._fs.is_folder(path)

def precache_is_file_with_folders(self, folders: List[PathPackage]):
return self._fs.precache_is_file_with_folders(folders)
def precache_is_file_with_folders(self, folders: List[PathPackage], recheck: bool = False):
return self._fs.precache_is_file_with_folders(folders, recheck)

def download_target_path(self, path):
return self._fs.download_target_path(path)
Expand Down Expand Up @@ -291,8 +291,9 @@ def print_debug(self) -> None:
def is_folder(self, path: str) -> bool:
return os.path.isdir(self._path(path))

def precache_is_file_with_folders(self, folders: List[PathPackage]) -> None:
for folder_pkg in folders:
def precache_is_file_with_folders(self, folders: List[PathPackage], recheck: bool = False) -> None:
not_checked_folders = folders if recheck else self._shared_state.consult_not_checked_folders(folders)
for folder_pkg in not_checked_folders:
try:
self._shared_state.add_many_files([f.path for f in os.scandir(folder_pkg.full_path) if f.is_file()])
except FileNotFoundError:
Expand Down Expand Up @@ -580,17 +581,29 @@ class FsSharedState:
def __init__(self) -> None:
self.interrupting_operations = False
self._files: Set[str] = set()
self._lock = threading.Lock()
self._files_lock = threading.Lock()
self._cached_folders = set()
self._cached_folders_lock = threading.Lock()

def consult_not_checked_folders(self, folders: List[PathPackage]) -> List[PathPackage]:
precaching_folders = []
with self._cached_folders_lock:
for folder_pkg in folders:
if folder_pkg.full_path not in self._cached_folders:
self._cached_folders.add(folder_pkg.full_path)
precaching_folders.append(folder_pkg)

return precaching_folders

def contains_file(self, path: str) -> bool:
with self._lock:
with self._files_lock:
return path in self._files

def contained_file_pkgs(self, pkgs: List[PathPackage]) -> Tuple[List[PathPackage], List[PathPackage]]:
if len(pkgs) == 0: return [], []
contained = []
foreigns = []
with self._lock:
with self._files_lock:
for p in pkgs:
if p.full_path in self._files:
contained.append(p)
Expand All @@ -600,13 +613,13 @@ def contained_file_pkgs(self, pkgs: List[PathPackage]) -> Tuple[List[PathPackage

def add_many_files(self, paths: List[str]) -> None:
if len(paths) == 0: return
with self._lock:
with self._files_lock:
self._files.update(paths)

def add_file(self, path: str) -> None:
with self._lock:
with self._files_lock:
self._files.add(path)

def remove_file(self, path: str) -> None:
with self._lock:
with self._files_lock:
if path in self._files: self._files.remove(path)
2 changes: 1 addition & 1 deletion src/downloader/jobs/open_zip_contents_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def operate_on(self, job: OpenZipContentsJob) -> WorkerResult: # type: ignore[o
job.downloaded_files.extend(job.files_to_unzip)

logger.bench('Precaching is_file...')
self._ctx.file_system.precache_is_file_with_folders(job.recipient_folders)
self._ctx.file_system.precache_is_file_with_folders(job.recipient_folders, recheck=True)

logger.bench('OpenZipContentsWorker validating...')

Expand Down
2 changes: 1 addition & 1 deletion src/test/fake_file_system_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def is_folder(self, path):
def read_file_contents(self, path):
return self.state.files[self._path(path)]['content']

def precache_is_file_with_folders(self, folders: list[str]):
def precache_is_file_with_folders(self, _folders: list[str], _recheck: bool = False):
pass

def write_file_contents(self, path, content):
Expand Down

0 comments on commit 3489063

Please sign in to comment.