diff --git a/src/downloader/file_system.py b/src/downloader/file_system.py index e7aa956..94b6ae4 100644 --- a/src/downloader/file_system.py +++ b/src/downloader/file_system.py @@ -90,7 +90,7 @@ def is_folder(self, path: str) -> bool: """interface""" @abstractmethod - def precache_is_file_with_folders(self, folders: List[PathPackage]) -> None: + def precache_is_file_with_folders(self, folders: List[PathPackage], recheck: bool = False) -> None: """interface""" @abstractmethod @@ -195,8 +195,8 @@ def are_files(self, file_pkgs: List[PathPackage]) -> Tuple[List[PathPackage], Li def is_folder(self, path): return self._fs.is_folder(path) - def precache_is_file_with_folders(self, folders: List[PathPackage]): - return self._fs.precache_is_file_with_folders(folders) + def precache_is_file_with_folders(self, folders: List[PathPackage], recheck: bool = False): + return self._fs.precache_is_file_with_folders(folders, recheck) def download_target_path(self, path): return self._fs.download_target_path(path) @@ -291,8 +291,9 @@ def print_debug(self) -> None: def is_folder(self, path: str) -> bool: return os.path.isdir(self._path(path)) - def precache_is_file_with_folders(self, folders: List[PathPackage]) -> None: - for folder_pkg in folders: + def precache_is_file_with_folders(self, folders: List[PathPackage], recheck: bool = False) -> None: + not_checked_folders = folders if recheck else self._shared_state.consult_not_checked_folders(folders) + for folder_pkg in not_checked_folders: try: self._shared_state.add_many_files([f.path for f in os.scandir(folder_pkg.full_path) if f.is_file()]) except FileNotFoundError: @@ -580,17 +581,29 @@ class FsSharedState: def __init__(self) -> None: self.interrupting_operations = False self._files: Set[str] = set() - self._lock = threading.Lock() + self._files_lock = threading.Lock() + self._cached_folders = set() + self._cached_folders_lock = threading.Lock() + + def consult_not_checked_folders(self, folders: List[PathPackage]) -> List[PathPackage]: + precaching_folders = [] + with self._cached_folders_lock: + for folder_pkg in folders: + if folder_pkg.full_path not in self._cached_folders: + self._cached_folders.add(folder_pkg.full_path) + precaching_folders.append(folder_pkg) + + return precaching_folders def contains_file(self, path: str) -> bool: - with self._lock: + with self._files_lock: return path in self._files def contained_file_pkgs(self, pkgs: List[PathPackage]) -> Tuple[List[PathPackage], List[PathPackage]]: if len(pkgs) == 0: return [], [] contained = [] foreigns = [] - with self._lock: + with self._files_lock: for p in pkgs: if p.full_path in self._files: contained.append(p) @@ -600,13 +613,13 @@ def contained_file_pkgs(self, pkgs: List[PathPackage]) -> Tuple[List[PathPackage def add_many_files(self, paths: List[str]) -> None: if len(paths) == 0: return - with self._lock: + with self._files_lock: self._files.update(paths) def add_file(self, path: str) -> None: - with self._lock: + with self._files_lock: self._files.add(path) def remove_file(self, path: str) -> None: - with self._lock: + with self._files_lock: if path in self._files: self._files.remove(path) diff --git a/src/downloader/jobs/open_zip_contents_worker.py b/src/downloader/jobs/open_zip_contents_worker.py index 203c757..6b1aec0 100644 --- a/src/downloader/jobs/open_zip_contents_worker.py +++ b/src/downloader/jobs/open_zip_contents_worker.py @@ -73,7 +73,7 @@ def operate_on(self, job: OpenZipContentsJob) -> WorkerResult: # type: ignore[o job.downloaded_files.extend(job.files_to_unzip) logger.bench('Precaching is_file...') - self._ctx.file_system.precache_is_file_with_folders(job.recipient_folders) + self._ctx.file_system.precache_is_file_with_folders(job.recipient_folders, recheck=True) logger.bench('OpenZipContentsWorker validating...') diff --git a/src/test/fake_file_system_factory.py b/src/test/fake_file_system_factory.py index 32f78e9..bc98298 100644 --- a/src/test/fake_file_system_factory.py +++ b/src/test/fake_file_system_factory.py @@ -169,7 +169,7 @@ def is_folder(self, path): def read_file_contents(self, path): return self.state.files[self._path(path)]['content'] - def precache_is_file_with_folders(self, folders: list[str]): + def precache_is_file_with_folders(self, _folders: list[str], _recheck: bool = False): pass def write_file_contents(self, path, content):