From a5e099ab6134f8f4ce1413af1e1a796174618c2d Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Tue, 25 Oct 2022 13:47:44 -0400 Subject: [PATCH 1/4] Improve push command performance --- lean/components/cloud/push_manager.py | 76 ++++++++++++--------------- 1 file changed, 35 insertions(+), 41 deletions(-) diff --git a/lean/components/cloud/push_manager.py b/lean/components/cloud/push_manager.py index 786d5fd0..b1004e48 100644 --- a/lean/components/cloud/push_manager.py +++ b/lean/components/cloud/push_manager.py @@ -43,6 +43,8 @@ def __init__(self, self._project_manager = project_manager self._project_config_manager = project_config_manager self._last_file = None + self._cloud_projects = [] + self._lean_environments = None def push_projects(self, projects_to_push: List[Path], organization_id: Optional[str] = None) -> None: """Pushes the given projects from the local drive to the cloud. @@ -56,17 +58,13 @@ def push_projects(self, projects_to_push: List[Path], organization_id: Optional[ for project in projects_to_push for library in self._project_manager.get_project_libraries(project)] projects = sorted(projects) - - cloud_projects = self._api_client.projects.get_all() - environments = self._api_client.lean.environments() - pushed_projects = {} for index, project in enumerate(projects, start=1): relative_path = project.relative_to(Path.cwd()) try: self._logger.info(f"[{index}/{len(projects)}] Pushing '{relative_path}'") - pushed_project = self._push_project(project, cloud_projects, organization_id, environments) + pushed_project = self._push_project(project, organization_id) pushed_projects[project] = pushed_project except Exception as ex: self._logger.debug(traceback.format_exc().strip()) @@ -75,19 +73,14 @@ def push_projects(self, projects_to_push: List[Path], organization_id: Optional[ else: self._logger.warn(f"Cannot push '{relative_path}': {ex}") - pushed_cloud_projects = pushed_projects.values() - cloud_projects = [project for project in cloud_projects if project.projectId not in pushed_cloud_projects] - cloud_projects.extend(pushed_cloud_projects) + self._update_cloud_library_references(pushed_projects) - self._update_cloud_library_references(pushed_projects, cloud_projects) - - def _update_cloud_library_references(self, projects: Dict[Path, QCProject], - cloud_projects: List[QCProject]) -> None: + def _update_cloud_library_references(self, projects: Dict[Path, QCProject]) -> None: for path, project in projects.items(): local_libraries_cloud_ids = self._get_local_libraries_cloud_ids(path) - self._add_new_libraries(project, local_libraries_cloud_ids, cloud_projects) - self._remove_outdated_libraries(project, local_libraries_cloud_ids, cloud_projects) + self._add_new_libraries(project, local_libraries_cloud_ids) + self._remove_outdated_libraries(project, local_libraries_cloud_ids) def _get_local_libraries_cloud_ids(self, project_dir: Path) -> List[int]: project_config = self._project_config_manager.get_project_config(project_dir) @@ -100,14 +93,10 @@ def _get_local_libraries_cloud_ids(self, project_dir: Path) -> List[int]: return local_libraries_cloud_ids - @staticmethod - def _get_library_name(library_cloud_id: int, cloud_projects: List[QCProject]) -> str: - return [project.name for project in cloud_projects if project.projectId == library_cloud_id][0] + def _get_library_name(self, library_cloud_id: int) -> str: + return self._get_cloud_project(library_cloud_id).name - def _add_new_libraries(self, - project: QCProject, - local_libraries_cloud_ids: List[int], - cloud_projects: List[QCProject]) -> None: + def _add_new_libraries(self, project: QCProject, local_libraries_cloud_ids: List[int]) -> None: libraries_to_add = [library_id for library_id in local_libraries_cloud_ids if library_id not in project.libraries] @@ -115,15 +104,12 @@ def _add_new_libraries(self, self._logger.info(f"Adding libraries to project {project.name} in the cloud") for i, library_cloud_id in enumerate(libraries_to_add, start=1): - library_name = self._get_library_name(library_cloud_id, cloud_projects) + library_name = self._get_library_name(library_cloud_id) self._logger.info(f"[{i}/{len(libraries_to_add)}] " f"Adding library {library_name} to project {project.name} in the cloud") self._api_client.projects.add_library(project.projectId, library_cloud_id) - def _remove_outdated_libraries(self, - project: QCProject, - local_libraries_cloud_ids: List[int], - cloud_projects: List[QCProject]) -> None: + def _remove_outdated_libraries(self, project: QCProject, local_libraries_cloud_ids: List[int]) -> None: libraries_to_remove = [library_id for library_id in project.libraries if library_id not in local_libraries_cloud_ids] @@ -131,36 +117,28 @@ def _remove_outdated_libraries(self, self._logger.info(f"Removing libraries from project {project.name} in the cloud") for i, library_cloud_id in enumerate(libraries_to_remove, start=1): - library_name = self._get_library_name(library_cloud_id, cloud_projects) + library_name = self._get_library_name(library_cloud_id) self._logger.info(f"[{i}/{len(libraries_to_remove)}] " f"Removing library {library_name} from project {project.name} in the cloud") self._api_client.projects.delete_library(project.projectId, library_cloud_id) - def _push_project(self, - project: Path, - cloud_projects: List[QCProject], - organization_id: Optional[str], - environments: List[QCLeanEnvironment]) -> QCProject: + def _push_project(self, project: Path, organization_id: Optional[str]) -> QCProject: """Pushes a single local project to the cloud. Raises an error with a descriptive message if the project cannot be pushed. :param project: the local project to push - :param cloud_projects: a list containing all of the user's cloud projects :param organization_id: the id of the organization to push the project to - :param environments: list of available lean environments """ project_name = project.relative_to(Path.cwd()).as_posix() project_config = self._project_config_manager.get_project_config(project) cloud_id = project_config.get("cloud-id") - cloud_project_by_id = next(iter([p for p in cloud_projects if p.projectId == cloud_id]), None) - # Find the cloud project to push the files to - if cloud_project_by_id is not None: + if cloud_id is not None: # Project has cloud id which matches cloud project, update cloud project - cloud_project = cloud_project_by_id + cloud_project = self._get_cloud_project(cloud_id) else: # Project has invalid cloud id or no cloud id at all, create new cloud project new_project = self._api_client.projects.create(project_name, @@ -174,6 +152,7 @@ def _push_project(self, # We need to retrieve the created project again to get all project details cloud_project = self._api_client.projects.get(new_project.projectId) + self._cloud_projects.append(cloud_project) # set organization-id in project config project_config.set("organization-id", cloud_project.organizationId) @@ -182,7 +161,7 @@ def _push_project(self, self._push_files(project, cloud_project) # Finalize pushing by updating locally modified metadata - self._push_metadata(project, cloud_project, environments) + self._push_metadata(project, cloud_project) return cloud_project @@ -225,7 +204,7 @@ def _push_files(self, project: Path, cloud_project: QCProject) -> None: self._last_file = None - def _push_metadata(self, project: Path, cloud_project: QCProject, environments: List[QCLeanEnvironment]) -> None: + def _push_metadata(self, project: Path, cloud_project: QCProject) -> None: """Pushes local project description and parameters to the cloud. Does nothing if the cloud is already up-to-date. @@ -245,7 +224,7 @@ def _push_metadata(self, project: Path, cloud_project: QCProject, environments: local_lean_version = int(project_config.get("lean-engine", "-1")) cloud_lean_version = cloud_project.leanVersionId - default_lean_venv = next((env.id for env in environments if env.path is None), None) + default_lean_venv = self._default_environment local_lean_venv = project_config.get("python-venv", default_lean_venv) cloud_lean_venv = cloud_project.leanEnvironment @@ -267,3 +246,18 @@ def _push_metadata(self, project: Path, cloud_project: QCProject, environments: if update_args != {}: self._api_client.projects.update(cloud_project.projectId, **update_args) self._logger.info(f"Successfully updated {' and '.join(update_args.keys())} for '{cloud_project.name}'") + + def _get_cloud_project(self, project_id: int) -> QCProject: + project = next(iter(p for p in self._cloud_projects if p.projectId == project_id), None) + if project is None: + project = self._api_client.projects.get(project_id) + self._cloud_projects.append(project) + + return project + + @property + def _default_environment(self) -> List[QCLeanEnvironment]: + if self._lean_environments is None: + self._lean_environments = self._api_client.lean.environments() + + return next((env.id for env in self._lean_environments if env.path is None), None) From 493bb88746f5e60b93913ab49847a6c378210908 Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Wed, 26 Oct 2022 11:06:50 -0400 Subject: [PATCH 2/4] Improve push command performance --- lean/components/cloud/push_manager.py | 163 +++++++++++------- tests/commands/cloud/test_push.py | 14 +- .../cloud/test_cloud_project_manager.py | 3 - tests/components/util/test_push_manager.py | 38 ++-- 4 files changed, 127 insertions(+), 91 deletions(-) diff --git a/lean/components/cloud/push_manager.py b/lean/components/cloud/push_manager.py index b1004e48..9cf770e3 100644 --- a/lean/components/cloud/push_manager.py +++ b/lean/components/cloud/push_manager.py @@ -11,15 +11,18 @@ # See the License for the specific language governing permissions and # limitations under the License. +import multiprocessing import traceback from pathlib import Path from typing import List, Optional, Dict +from joblib import Parallel, delayed + from lean.components.api.api_client import APIClient from lean.components.config.project_config_manager import ProjectConfigManager from lean.components.util.logger import Logger from lean.components.util.project_manager import ProjectManager -from lean.models.api import QCLanguage, QCProject, QCLeanEnvironment +from lean.models.api import QCLanguage, QCProject, QCFullFile from lean.models.utils import LeanLibraryReference @@ -44,7 +47,24 @@ def __init__(self, self._project_config_manager = project_config_manager self._last_file = None self._cloud_projects = [] - self._lean_environments = None + + def _process_push_project(self, + index: int, + project: Path, + organization_id: Optional[str], + total_projects: int) -> Optional[QCProject]: + relative_path = project.relative_to(Path.cwd()) + try: + self._logger.info(f"[{index}/{total_projects}] Pushing '{relative_path}'") + return self._push_project(project, organization_id) + except Exception as ex: + self._logger.debug(traceback.format_exc().strip()) + if self._last_file is not None: + self._logger.warn(f"Cannot push '{relative_path}' (failed on {self._last_file}): {ex}") + else: + self._logger.warn(f"Cannot push '{relative_path}': {ex}") + + return None def push_projects(self, projects_to_push: List[Path], organization_id: Optional[str] = None) -> None: """Pushes the given projects from the local drive to the cloud. @@ -54,33 +74,46 @@ def push_projects(self, projects_to_push: List[Path], organization_id: Optional[ :param projects_to_push: a list of directories containing the local projects that need to be pushed :param organization_id: the id of the organization where the project will be pushed to """ - projects = projects_to_push + [library - for project in projects_to_push - for library in self._project_manager.get_project_libraries(project)] - projects = sorted(projects) - pushed_projects = {} - - for index, project in enumerate(projects, start=1): - relative_path = project.relative_to(Path.cwd()) - try: - self._logger.info(f"[{index}/{len(projects)}] Pushing '{relative_path}'") - pushed_project = self._push_project(project, organization_id) - pushed_projects[project] = pushed_project - except Exception as ex: - self._logger.debug(traceback.format_exc().strip()) - if self._last_file is not None: - self._logger.warn(f"Cannot push '{relative_path}' (failed on {self._last_file}): {ex}") - else: - self._logger.warn(f"Cannot push '{relative_path}': {ex}") + if len(projects_to_push) == 0: + return + + projects_paths = projects_to_push + [library + for project in projects_to_push + for library in self._project_manager.get_project_libraries(project)] + projects_paths = sorted(projects_paths) + + if len(projects_paths) > 1: + parallel = Parallel(n_jobs=max(1, multiprocessing.cpu_count() - 1), prefer="threads") + pushed_projects = parallel( + delayed(self._process_push_project)(index, project, organization_id, len(projects_paths)) + for index, project in enumerate(projects_paths, start=1)) + pushed_projects = {path: project + for path, project in zip(projects_paths, pushed_projects) if project is not None} + else: + path = projects_paths[0] + project = self._process_push_project(1, path, organization_id, len(projects_paths)) + pushed_projects = {} + if project is not None: + pushed_projects[path] = project self._update_cloud_library_references(pushed_projects) + def _update_cloud_project_references(self, path: Path, project: QCProject) -> None: + local_libraries_cloud_ids = self._get_local_libraries_cloud_ids(path) + self._add_new_libraries(project, local_libraries_cloud_ids) + self._remove_outdated_libraries(project, local_libraries_cloud_ids) + def _update_cloud_library_references(self, projects: Dict[Path, QCProject]) -> None: - for path, project in projects.items(): - local_libraries_cloud_ids = self._get_local_libraries_cloud_ids(path) + if len(projects) == 0: + return - self._add_new_libraries(project, local_libraries_cloud_ids) - self._remove_outdated_libraries(project, local_libraries_cloud_ids) + if len(projects) > 1: + parallel = Parallel(n_jobs=max(1, multiprocessing.cpu_count() - 1), prefer="threads") + parallel(delayed(self._update_cloud_project_references)(path, project) + for path, project in projects.items()) + else: + path, project = list(projects.items())[0] + self._update_cloud_project_references(path, project) def _get_local_libraries_cloud_ids(self, project_dir: Path) -> List[int]: project_config = self._project_config_manager.get_project_config(project_dir) @@ -165,6 +198,38 @@ def _push_project(self, project: Path, organization_id: Optional[str]) -> QCProj return cloud_project + def _push_file(self, + local_file_path: Path, + local_file_name: str, + cloud_files: List[QCFullFile], + cloud_project: QCProject) -> None: + try: + if "bin/" in local_file_name or "obj/" in local_file_name or ".ipynb_checkpoints/" in local_file_name: + return + + file_content = local_file_path.read_text(encoding="utf-8") + cloud_file = next(iter([f for f in cloud_files if f.name == local_file_name]), None) + + if cloud_file is None: + new_file = self._api_client.files.create(cloud_project.projectId, local_file_name, file_content) + self._project_manager.update_last_modified_time(local_file_path, new_file.modified) + self._logger.info(f"Successfully created cloud file '{cloud_project.name}/{local_file_name}'") + elif cloud_file.content.strip() != file_content.strip(): + new_file = self._api_client.files.update(cloud_project.projectId, local_file_name, file_content) + self._project_manager.update_last_modified_time(local_file_path, new_file.modified) + self._logger.info(f"Successfully updated cloud file '{cloud_project.name}/{local_file_name}'") + except Exception as e: + self._last_file = local_file_path + raise e + + def _remove_file(self, file: QCFullFile, cloud_project: QCProject) -> None: + try: + self._api_client.files.delete(cloud_project.projectId, file.name) + self._logger.info(f"Successfully removed cloud file '{cloud_project.name}/{file.name}'") + except Exception as e: + self._last_file = Path(file.name) + raise e + def _push_files(self, project: Path, cloud_project: QCProject) -> None: """Pushes the files of a local project to the cloud. @@ -175,32 +240,15 @@ def _push_files(self, project: Path, cloud_project: QCProject) -> None: local_files = self._project_manager.get_source_files(project) local_file_names = [local_file.relative_to(project).as_posix() for local_file in local_files] - for local_file, file_name in zip(local_files, local_file_names): - self._last_file = local_file - - if "bin/" in file_name or "obj/" in file_name or ".ipynb_checkpoints/" in file_name: - continue + with Parallel(n_jobs=max(1, multiprocessing.cpu_count() - 1), prefer="threads") as parallel: + parallel(delayed(self._push_file)(local_file, file_name, cloud_files, cloud_project) + for local_file, file_name in zip(local_files, local_file_names)) - file_content = local_file.read_text(encoding="utf-8") - cloud_file = next(iter([f for f in cloud_files if f.name == file_name]), None) - - if cloud_file is None: - new_file = self._api_client.files.create(cloud_project.projectId, file_name, file_content) - self._project_manager.update_last_modified_time(local_file, new_file.modified) - self._logger.info(f"Successfully created cloud file '{cloud_project.name}/{file_name}'") - elif cloud_file.content.strip() != file_content.strip(): - new_file = self._api_client.files.update(cloud_project.projectId, file_name, file_content) - self._project_manager.update_last_modified_time(local_file, new_file.modified) - self._logger.info(f"Successfully updated cloud file '{cloud_project.name}/{file_name}'") - - # Delete locally removed files in cloud - files_to_remove = [cloud_file for cloud_file in cloud_files - if (not cloud_file.isLibrary and - not any(local_file_name == cloud_file.name for local_file_name in local_file_names))] - for file in files_to_remove: - self._last_file = Path(file.name) - self._api_client.files.delete(cloud_project.projectId, file.name) - self._logger.info(f"Successfully removed cloud file '{cloud_project.name}/{file.name}'") + # Delete locally removed files in cloud + files_to_remove = [cloud_file for cloud_file in cloud_files + if (not cloud_file.isLibrary and + not any(local_file_name == cloud_file.name for local_file_name in local_file_names))] + parallel(delayed(self._remove_file)(file, cloud_project) for file in files_to_remove) self._last_file = None @@ -224,8 +272,7 @@ def _push_metadata(self, project: Path, cloud_project: QCProject) -> None: local_lean_version = int(project_config.get("lean-engine", "-1")) cloud_lean_version = cloud_project.leanVersionId - default_lean_venv = self._default_environment - local_lean_venv = project_config.get("python-venv", default_lean_venv) + local_lean_venv = project_config.get("python-venv", None) cloud_lean_venv = cloud_project.leanEnvironment update_args = {} @@ -237,10 +284,13 @@ def _push_metadata(self, project: Path, cloud_project: QCProject) -> None: update_args["parameters"] = local_parameters if (local_lean_version != cloud_lean_version and - (local_lean_version != -1 or not cloud_project.leanPinnedToMaster)): + (local_lean_version != -1 or not cloud_project.leanPinnedToMaster)): update_args["lean_engine"] = local_lean_version - if local_lean_venv != cloud_lean_venv: + # Initially, python-venv is not defined in the config and the default one will be used. + # After it is changed, in order to use the default one again, it must not be removed from the config, + # but it should be set to the default env id explicitly instead. + if local_lean_venv is not None and local_lean_venv != cloud_lean_venv: update_args["python_venv"] = local_lean_venv if update_args != {}: @@ -254,10 +304,3 @@ def _get_cloud_project(self, project_id: int) -> QCProject: self._cloud_projects.append(project) return project - - @property - def _default_environment(self) -> List[QCLeanEnvironment]: - if self._lean_environments is None: - self._lean_environments = self._api_client.lean.environments() - - return next((env.id for env in self._lean_environments if env.path is None), None) diff --git a/tests/commands/cloud/test_push.py b/tests/commands/cloud/test_push.py index 81f38ea8..96f90003 100644 --- a/tests/commands/cloud/test_push.py +++ b/tests/commands/cloud/test_push.py @@ -117,11 +117,11 @@ def test_cloud_push_removes_locally_removed_files_in_cloud() -> None: client.files.delete = mock.Mock() client.lean.environments = mock.MagicMock(return_value=create_lean_environments()) - cloud_projects = [create_api_project(1, "Python Project")] - client.projects.get_all = mock.MagicMock(return_value=cloud_projects) + cloud_project = create_api_project(1, "Python Project") + client.projects.get = mock.MagicMock(return_value=cloud_project) project_config = mock.Mock() - project_config.get = mock.MagicMock(side_effect=[1, "", {}, cloud_projects[0].leanVersionId, None, []]) + project_config.get = mock.MagicMock(side_effect=[1, "", {}, cloud_project.leanVersionId, None, []]) project_config_manager = mock.Mock() project_config_manager.get_project_config = mock.MagicMock(return_value=project_config) @@ -139,7 +139,7 @@ def test_cloud_push_removes_locally_removed_files_in_cloud() -> None: assert result.exit_code == 0 project_config.get.assert_called() - client.projects.get_all.assert_called() + client.projects.get.assert_called_once_with(cloud_project.projectId) project_manager.get_source_files.assert_called_once() project_config_manager.get_project_config.assert_called() client.files.get_all.assert_called_once() @@ -154,16 +154,14 @@ def test_cloud_push_creates_project_with_optional_organization_id(organization_i cloud_project = create_api_project(1, path) with mock.patch.object(ProjectClient, 'create', return_value=create_api_project(1, path)) as mock_create_project,\ - mock.patch.object(ProjectClient, 'get_all', side_effect=[[], [cloud_project]]) as mock_get_all_projects,\ - mock.patch.object(LeanClient, 'environments', return_value=create_lean_environments()) as mock_get_environments: + mock.patch.object(ProjectClient, 'get', return_value=cloud_project) as mock_get_project: organization_id_option = ["--organization-id", organization_id] if organization_id is not None else [] result = CliRunner().invoke(lean, ["cloud", "push", "--project", path, *organization_id_option]) assert result.exit_code == 0 - mock_get_all_projects.assert_called() + mock_get_project.assert_called_once_with(cloud_project.projectId) mock_create_project.assert_called_once_with(path, QCLanguage.Python, organization_id) - mock_get_environments.assert_called_once() def test_cloud_push_updates_lean_config() -> None: diff --git a/tests/components/cloud/test_cloud_project_manager.py b/tests/components/cloud/test_cloud_project_manager.py index dff0aea8..7d24e58f 100644 --- a/tests/components/cloud/test_cloud_project_manager.py +++ b/tests/components/cloud/test_cloud_project_manager.py @@ -24,12 +24,10 @@ def test_get_cloud_project_pushing_new_project(): create_fake_lean_cli_directory() - cloud_projects = [create_api_project(i, f"Project {i}") for i in range(1, 11)] cloud_project = create_api_project(20, "Python Project") cloud_project.description = "" api_client = mock.Mock() - api_client.projects.get_all = mock.MagicMock(side_effect=[cloud_projects, [*cloud_projects, cloud_project]]) api_client.projects.get.return_value = cloud_project api_client.projects.create.return_value = cloud_project api_client.files.get_all.return_value = [] @@ -42,5 +40,4 @@ def test_get_cloud_project_pushing_new_project(): assert created_cloud_project == cloud_project - api_client.projects.get_all.assert_called_once() api_client.projects.get.assert_called_with(cloud_project.projectId) diff --git a/tests/components/util/test_push_manager.py b/tests/components/util/test_push_manager.py index 2323f5c3..eaa1030f 100644 --- a/tests/components/util/test_push_manager.py +++ b/tests/components/util/test_push_manager.py @@ -29,6 +29,7 @@ def _create_push_manager(api_client: mock.Mock, project_manager: mock.Mock) -> P def _get_base_cloud_projects() -> List[QCProject]: return [create_api_project(i, f"Project: number {i}") for i in range(1, 6)] + def test_push_projects_pushes_libraries_referenced_by_the_projects() -> None: create_fake_lean_cli_directory() @@ -58,7 +59,6 @@ def test_push_projects_pushes_libraries_referenced_by_the_projects() -> None: create_api_project(project_id, project_path.name), ] api_client = mock.Mock() - api_client.projects.get_all = mock.MagicMock(return_value=_get_base_cloud_projects()) api_client.projects.create = mock.MagicMock(side_effect=cloud_projects) api_client.projects.get = mock.MagicMock(side_effect=cloud_projects) api_client.files.get_all = mock.MagicMock(return_value=[]) @@ -74,12 +74,11 @@ def test_push_projects_pushes_libraries_referenced_by_the_projects() -> None: mock.call(python_library_path), mock.call(project_path)]) - api_client.projects.get_all.assert_called_once() api_client.projects.create.assert_has_calls([ mock.call(csharp_library_path.relative_to(lean_cli_root_dir).as_posix(), QCLanguage.CSharp, None), mock.call(python_library_path.relative_to(lean_cli_root_dir).as_posix(), QCLanguage.Python, None), mock.call(project_path.relative_to(lean_cli_root_dir).as_posix(), QCLanguage.Python, None) - ]) + ], any_order=True) api_client.projects.add_library.assert_has_calls([mock.call(python_library_id, csharp_library_id), mock.call(project_id, python_library_id)]) api_client.projects.delete_library.assert_not_called() @@ -101,11 +100,9 @@ def test_push_projects_removes_libraries_in_the_cloud() -> None: project_id = 1000 python_library_id = 1001 - cloud_projects = [ - create_api_project(python_library_id, python_library_relative_path), - create_api_project(project_id, project_path.name), - ] - cloud_projects[-1].libraries = [python_library_id] + cloud_project = create_api_project(project_id, project_path.name) + cloud_library = create_api_project(python_library_id, python_library_relative_path) + cloud_project.libraries = [cloud_library.projectId] project_config_manager = container.project_config_manager() project_config = project_config_manager.get_project_config(project_path) @@ -114,7 +111,7 @@ def test_push_projects_removes_libraries_in_the_cloud() -> None: library_config.set("cloud-id", python_library_id) api_client = mock.Mock() - api_client.projects.get_all = mock.MagicMock(return_value=_get_base_cloud_projects() + cloud_projects) + api_client.projects.get = mock.MagicMock(side_effect=[cloud_project, cloud_library]) api_client.files.get_all = mock.MagicMock(return_value=[]) api_client.lean.environments = mock.MagicMock(return_value=create_lean_environments()) api_client.projects.add_library = mock.Mock() @@ -126,7 +123,6 @@ def test_push_projects_removes_libraries_in_the_cloud() -> None: project_manager.get_project_libraries.assert_called_once_with(project_path) project_manager.get_source_files.assert_called_once_with(project_path) - api_client.projects.get_all.assert_called_once() api_client.projects.add_library.assert_not_called() api_client.projects.delete_library.assert_called_once_with(project_id, python_library_id) @@ -165,10 +161,13 @@ def test_push_projects_adds_and_removes_libraries_simultaneously() -> None: csharp_library_config.set("cloud-id", csharp_library_id) api_client = mock.Mock() - api_client.projects.get_all = mock.MagicMock(return_value=(_get_base_cloud_projects() + - [csharp_library_cloud_project, cloud_project])) + + def projects_get_side_effect(proj_id: int) -> QCProject: + return [p for p in [cloud_project, python_library_cloud_project, csharp_library_cloud_project] + if proj_id == p.projectId][0] + + api_client.projects.get = mock.MagicMock(side_effect=projects_get_side_effect) api_client.projects.create = mock.MagicMock(return_value=python_library_cloud_project) - api_client.projects.get = mock.MagicMock(return_value=python_library_cloud_project) api_client.files.get_all = mock.MagicMock(return_value=[]) api_client.lean.environments = mock.MagicMock(return_value=create_lean_environments()) api_client.projects.add_library = mock.Mock() @@ -180,7 +179,6 @@ def test_push_projects_adds_and_removes_libraries_simultaneously() -> None: project_manager.get_project_libraries.assert_called_once_with(project_path) project_manager.get_source_files.assert_has_calls([mock.call(python_library_path), mock.call(project_path)]) - api_client.projects.get_all.assert_called_once() api_client.projects.create.assert_called_once_with(python_library_path.relative_to(lean_cli_root_dir).as_posix(), QCLanguage.Python, None) @@ -203,9 +201,9 @@ def test_push_projects_pushes_lean_engine_version() -> None: project_config.set("lean-engine", 456) api_client = mock.Mock() - api_client.projects.get_all = mock.MagicMock(return_value=_get_base_cloud_projects() + [cloud_project]) api_client.files.get_all = mock.MagicMock(return_value=[]) api_client.lean.environments = mock.MagicMock(return_value=create_lean_environments()) + api_client.projects.get = mock.MagicMock(return_value=cloud_project) api_client.projects.update = mock.Mock() project_manager = mock.Mock() @@ -233,9 +231,9 @@ def test_push_projects_pushes_lean_engine_version_to_default() -> None: project_config.set("description", cloud_project.description) api_client = mock.Mock() - api_client.projects.get_all = mock.MagicMock(return_value=[cloud_project]) api_client.files.get_all = mock.MagicMock(return_value=[]) api_client.lean.environments = mock.MagicMock(return_value=create_lean_environments()) + api_client.projects.get = mock.MagicMock(return_value=cloud_project) api_client.projects.update = mock.Mock() project_manager = mock.Mock() @@ -264,9 +262,9 @@ def test_push_projects_pushes_lean_environment() -> None: project_config.set("python-venv", 2) api_client = mock.Mock() - api_client.projects.get_all = mock.MagicMock(return_value=[cloud_project]) api_client.files.get_all = mock.MagicMock(return_value=[]) api_client.lean.environments = mock.MagicMock(return_value=create_lean_environments()) + api_client.projects.get = mock.MagicMock(return_value=cloud_project) api_client.projects.update = mock.Mock() project_manager = mock.Mock() @@ -279,7 +277,7 @@ def test_push_projects_pushes_lean_environment() -> None: api_client.projects.update.assert_called_once_with(project_id, python_venv=2) -def test_push_projects_pushes_lean_environment_to_default() -> None: +def test_push_projects_does_not_push_lean_environment_when_unset() -> None: create_fake_lean_cli_directory() project_path = Path.cwd() / "Python Project" @@ -295,9 +293,9 @@ def test_push_projects_pushes_lean_environment_to_default() -> None: project_config.set("lean-engine", cloud_project.leanVersionId) api_client = mock.Mock() - api_client.projects.get_all = mock.MagicMock(return_value=[cloud_project]) api_client.files.get_all = mock.MagicMock(return_value=[]) api_client.lean.environments = mock.MagicMock(return_value=create_lean_environments()) + api_client.projects.get = mock.MagicMock(return_value=cloud_project) api_client.projects.update = mock.Mock() project_manager = mock.Mock() @@ -307,4 +305,4 @@ def test_push_projects_pushes_lean_environment_to_default() -> None: push_manager = _create_push_manager(api_client, project_manager) push_manager.push_projects([project_path]) - api_client.projects.update.assert_called_once_with(project_id, python_venv=1) + api_client.projects.update.assert_not_called() From 78cacdda2b2fca1fb1124b6355091a7591802267 Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Wed, 26 Oct 2022 14:00:39 -0400 Subject: [PATCH 3/4] Improve push command performance --- lean/components/api/project_client.py | 6 +- lean/components/cloud/push_manager.py | 114 ++++++++------------- tests/commands/cloud/test_push.py | 2 +- tests/components/util/test_push_manager.py | 18 +++- 4 files changed, 59 insertions(+), 81 deletions(-) diff --git a/lean/components/api/project_client.py b/lean/components/api/project_client.py index 39228109..82ea3059 100644 --- a/lean/components/api/project_client.py +++ b/lean/components/api/project_client.py @@ -14,7 +14,7 @@ from typing import List, Optional from lean.components.api.api_client import * -from lean.models.api import QCCreatedProject, QCLanguage, QCProject +from lean.models.api import QCLanguage, QCProject class ProjectClient: @@ -47,7 +47,7 @@ def get_all(self) -> List[QCProject]: data = self._api.get("projects/read") return [self._process_project(QCProject(**project)) for project in data["projects"]] - def create(self, name: str, language: QCLanguage, organization_id: Optional[str]) -> QCCreatedProject: + def create(self, name: str, language: QCLanguage, organization_id: Optional[str]) -> QCProject: """Creates a new project. :param name: the name of the project to create @@ -63,7 +63,7 @@ def create(self, name: str, language: QCLanguage, organization_id: Optional[str] parameters["organizationId"] = organization_id data = self._api.post("projects/create", parameters) - return self._process_project(QCCreatedProject(**data["projects"][0])) + return self._process_project(QCProject(**data["projects"][0])) def update(self, project_id: int, diff --git a/lean/components/cloud/push_manager.py b/lean/components/cloud/push_manager.py index 9cf770e3..a360b6dc 100644 --- a/lean/components/cloud/push_manager.py +++ b/lean/components/cloud/push_manager.py @@ -11,18 +11,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import multiprocessing import traceback from pathlib import Path from typing import List, Optional, Dict -from joblib import Parallel, delayed - from lean.components.api.api_client import APIClient from lean.components.config.project_config_manager import ProjectConfigManager from lean.components.util.logger import Logger from lean.components.util.project_manager import ProjectManager -from lean.models.api import QCLanguage, QCProject, QCFullFile +from lean.models.api import QCLanguage, QCProject from lean.models.utils import LeanLibraryReference @@ -82,38 +79,26 @@ def push_projects(self, projects_to_push: List[Path], organization_id: Optional[ for library in self._project_manager.get_project_libraries(project)] projects_paths = sorted(projects_paths) - if len(projects_paths) > 1: - parallel = Parallel(n_jobs=max(1, multiprocessing.cpu_count() - 1), prefer="threads") - pushed_projects = parallel( - delayed(self._process_push_project)(index, project, organization_id, len(projects_paths)) - for index, project in enumerate(projects_paths, start=1)) - pushed_projects = {path: project - for path, project in zip(projects_paths, pushed_projects) if project is not None} - else: - path = projects_paths[0] - project = self._process_push_project(1, path, organization_id, len(projects_paths)) - pushed_projects = {} - if project is not None: - pushed_projects[path] = project + pushed_projects = {} + for index, path in enumerate(projects_paths, start=1): + relative_path = path.relative_to(Path.cwd()) + try: + self._logger.info(f"[{index}/{len(projects_paths)}] Pushing '{relative_path}'") + pushed_projects[path] = self._push_project(path, organization_id) + except Exception as ex: + self._logger.debug(traceback.format_exc().strip()) + if self._last_file is not None: + self._logger.warn(f"Cannot push '{relative_path}' (failed on {self._last_file}): {ex}") + else: + self._logger.warn(f"Cannot push '{relative_path}': {ex}") self._update_cloud_library_references(pushed_projects) - def _update_cloud_project_references(self, path: Path, project: QCProject) -> None: - local_libraries_cloud_ids = self._get_local_libraries_cloud_ids(path) - self._add_new_libraries(project, local_libraries_cloud_ids) - self._remove_outdated_libraries(project, local_libraries_cloud_ids) - def _update_cloud_library_references(self, projects: Dict[Path, QCProject]) -> None: - if len(projects) == 0: - return - - if len(projects) > 1: - parallel = Parallel(n_jobs=max(1, multiprocessing.cpu_count() - 1), prefer="threads") - parallel(delayed(self._update_cloud_project_references)(path, project) - for path, project in projects.items()) - else: - path, project = list(projects.items())[0] - self._update_cloud_project_references(path, project) + for path, project in projects.items(): + local_libraries_cloud_ids = self._get_local_libraries_cloud_ids(path) + self._add_new_libraries(project, local_libraries_cloud_ids) + self._remove_outdated_libraries(project, local_libraries_cloud_ids) def _get_local_libraries_cloud_ids(self, project_dir: Path) -> List[int]: project_config = self._project_config_manager.get_project_config(project_dir) @@ -198,38 +183,6 @@ def _push_project(self, project: Path, organization_id: Optional[str]) -> QCProj return cloud_project - def _push_file(self, - local_file_path: Path, - local_file_name: str, - cloud_files: List[QCFullFile], - cloud_project: QCProject) -> None: - try: - if "bin/" in local_file_name or "obj/" in local_file_name or ".ipynb_checkpoints/" in local_file_name: - return - - file_content = local_file_path.read_text(encoding="utf-8") - cloud_file = next(iter([f for f in cloud_files if f.name == local_file_name]), None) - - if cloud_file is None: - new_file = self._api_client.files.create(cloud_project.projectId, local_file_name, file_content) - self._project_manager.update_last_modified_time(local_file_path, new_file.modified) - self._logger.info(f"Successfully created cloud file '{cloud_project.name}/{local_file_name}'") - elif cloud_file.content.strip() != file_content.strip(): - new_file = self._api_client.files.update(cloud_project.projectId, local_file_name, file_content) - self._project_manager.update_last_modified_time(local_file_path, new_file.modified) - self._logger.info(f"Successfully updated cloud file '{cloud_project.name}/{local_file_name}'") - except Exception as e: - self._last_file = local_file_path - raise e - - def _remove_file(self, file: QCFullFile, cloud_project: QCProject) -> None: - try: - self._api_client.files.delete(cloud_project.projectId, file.name) - self._logger.info(f"Successfully removed cloud file '{cloud_project.name}/{file.name}'") - except Exception as e: - self._last_file = Path(file.name) - raise e - def _push_files(self, project: Path, cloud_project: QCProject) -> None: """Pushes the files of a local project to the cloud. @@ -240,15 +193,32 @@ def _push_files(self, project: Path, cloud_project: QCProject) -> None: local_files = self._project_manager.get_source_files(project) local_file_names = [local_file.relative_to(project).as_posix() for local_file in local_files] - with Parallel(n_jobs=max(1, multiprocessing.cpu_count() - 1), prefer="threads") as parallel: - parallel(delayed(self._push_file)(local_file, file_name, cloud_files, cloud_project) - for local_file, file_name in zip(local_files, local_file_names)) + for local_file, file_name in zip(local_files, local_file_names): + self._last_file = local_file + + if "bin/" in file_name or "obj/" in file_name or ".ipynb_checkpoints/" in file_name: + return + + file_content = local_file.read_text(encoding="utf-8") + cloud_file = next(iter([f for f in cloud_files if f.name == file_name]), None) - # Delete locally removed files in cloud - files_to_remove = [cloud_file for cloud_file in cloud_files - if (not cloud_file.isLibrary and - not any(local_file_name == cloud_file.name for local_file_name in local_file_names))] - parallel(delayed(self._remove_file)(file, cloud_project) for file in files_to_remove) + if cloud_file is None: + new_file = self._api_client.files.create(cloud_project.projectId, file_name, file_content) + self._project_manager.update_last_modified_time(local_file, new_file.modified) + self._logger.info(f"Successfully created cloud file '{cloud_project.name}/{file_name}'") + elif cloud_file.content.strip() != file_content.strip(): + new_file = self._api_client.files.update(cloud_project.projectId, file_name, file_content) + self._project_manager.update_last_modified_time(local_file, new_file.modified) + self._logger.info(f"Successfully updated cloud file '{cloud_project.name}/{file_name}'") + + # Delete locally removed files in cloud + files_to_remove = [cloud_file for cloud_file in cloud_files + if (not cloud_file.isLibrary and + not any(local_file_name == cloud_file.name for local_file_name in local_file_names))] + for file in files_to_remove: + self._last_file = Path(file.name) + self._api_client.files.delete(cloud_project.projectId, file.name) + self._logger.info(f"Successfully removed cloud file '{cloud_project.name}/{file.name}'") self._last_file = None diff --git a/tests/commands/cloud/test_push.py b/tests/commands/cloud/test_push.py index 96f90003..96ea9892 100644 --- a/tests/commands/cloud/test_push.py +++ b/tests/commands/cloud/test_push.py @@ -112,7 +112,7 @@ def test_cloud_push_removes_locally_removed_files_in_cloud() -> None: create_fake_lean_cli_directory() client = mock.Mock() - fake_cloud_files = [QCFullFile(name="removed_file.py", content="", modified=datetime.now(), isLibrary=False)] + fake_cloud_files = [QCFullFile(name="removed_file.py", content="SomeContent", modified=datetime.now(), isLibrary=False)] client.files.get_all = mock.MagicMock(return_value=fake_cloud_files) client.files.delete = mock.Mock() client.lean.environments = mock.MagicMock(return_value=create_lean_environments()) diff --git a/tests/components/util/test_push_manager.py b/tests/components/util/test_push_manager.py index eaa1030f..7d003d09 100644 --- a/tests/components/util/test_push_manager.py +++ b/tests/components/util/test_push_manager.py @@ -59,10 +59,16 @@ def test_push_projects_pushes_libraries_referenced_by_the_projects() -> None: create_api_project(project_id, project_path.name), ] api_client = mock.Mock() - api_client.projects.create = mock.MagicMock(side_effect=cloud_projects) - api_client.projects.get = mock.MagicMock(side_effect=cloud_projects) + + def create_project(proj_name, *args): + return next(iter(p for p in cloud_projects if p.name == proj_name)) + + def get_project(proj_id, *args): + return next(iter(p for p in cloud_projects if p.projectId == proj_id)) + + api_client.projects.create = mock.MagicMock(side_effect=create_project) + api_client.projects.get = mock.MagicMock(side_effect=get_project) api_client.files.get_all = mock.MagicMock(return_value=[]) - api_client.lean.environments = mock.MagicMock(return_value=create_lean_environments()) api_client.projects.add_library = mock.Mock() api_client.projects.delete_library = mock.Mock() @@ -72,7 +78,8 @@ def test_push_projects_pushes_libraries_referenced_by_the_projects() -> None: project_manager.get_project_libraries.assert_called_once_with(project_path) project_manager.get_source_files.assert_has_calls([mock.call(csharp_library_path), mock.call(python_library_path), - mock.call(project_path)]) + mock.call(project_path)], + any_order=True) api_client.projects.create.assert_has_calls([ mock.call(csharp_library_path.relative_to(lean_cli_root_dir).as_posix(), QCLanguage.CSharp, None), @@ -177,7 +184,8 @@ def projects_get_side_effect(proj_id: int) -> QCProject: push_manager.push_projects([project_path]) project_manager.get_project_libraries.assert_called_once_with(project_path) - project_manager.get_source_files.assert_has_calls([mock.call(python_library_path), mock.call(project_path)]) + project_manager.get_source_files.assert_has_calls([mock.call(python_library_path), mock.call(project_path)], + any_order=True) api_client.projects.create.assert_called_once_with(python_library_path.relative_to(lean_cli_root_dir).as_posix(), QCLanguage.Python, From 3ea97a94b048070d3431a2a2a7c54c341e48a1fa Mon Sep 17 00:00:00 2001 From: Jhonathan Abreu Date: Wed, 26 Oct 2022 16:33:17 -0400 Subject: [PATCH 4/4] Remove another project.get call to improve push performance --- lean/components/cloud/push_manager.py | 38 +++++---------------------- tests/commands/cloud/test_push.py | 4 +-- 2 files changed, 8 insertions(+), 34 deletions(-) diff --git a/lean/components/cloud/push_manager.py b/lean/components/cloud/push_manager.py index a360b6dc..60cf4865 100644 --- a/lean/components/cloud/push_manager.py +++ b/lean/components/cloud/push_manager.py @@ -45,24 +45,6 @@ def __init__(self, self._last_file = None self._cloud_projects = [] - def _process_push_project(self, - index: int, - project: Path, - organization_id: Optional[str], - total_projects: int) -> Optional[QCProject]: - relative_path = project.relative_to(Path.cwd()) - try: - self._logger.info(f"[{index}/{total_projects}] Pushing '{relative_path}'") - return self._push_project(project, organization_id) - except Exception as ex: - self._logger.debug(traceback.format_exc().strip()) - if self._last_file is not None: - self._logger.warn(f"Cannot push '{relative_path}' (failed on {self._last_file}): {ex}") - else: - self._logger.warn(f"Cannot push '{relative_path}': {ex}") - - return None - def push_projects(self, projects_to_push: List[Path], organization_id: Optional[str] = None) -> None: """Pushes the given projects from the local drive to the cloud. @@ -159,22 +141,16 @@ def _push_project(self, project: Path, organization_id: Optional[str]) -> QCProj cloud_project = self._get_cloud_project(cloud_id) else: # Project has invalid cloud id or no cloud id at all, create new cloud project - new_project = self._api_client.projects.create(project_name, - QCLanguage[project_config.get("algorithm-language")], - organization_id) + cloud_project = self._api_client.projects.create(project_name, + QCLanguage[project_config.get("algorithm-language")], + organization_id) + self._cloud_projects.append(cloud_project) + project_config.set("cloud-id", cloud_project.projectId) + project_config.set("organization-id", cloud_project.organizationId) organization_message_part = f" in organization '{organization_id}'" if organization_id is not None else "" self._logger.info(f"Successfully created cloud project '{project_name}'{organization_message_part}") - project_config.set("cloud-id", new_project.projectId) - - # We need to retrieve the created project again to get all project details - cloud_project = self._api_client.projects.get(new_project.projectId) - self._cloud_projects.append(cloud_project) - - # set organization-id in project config - project_config.set("organization-id", cloud_project.organizationId) - # Push local files to cloud self._push_files(project, cloud_project) @@ -197,7 +173,7 @@ def _push_files(self, project: Path, cloud_project: QCProject) -> None: self._last_file = local_file if "bin/" in file_name or "obj/" in file_name or ".ipynb_checkpoints/" in file_name: - return + continue file_content = local_file.read_text(encoding="utf-8") cloud_file = next(iter([f for f in cloud_files if f.name == file_name]), None) diff --git a/tests/commands/cloud/test_push.py b/tests/commands/cloud/test_push.py index 96ea9892..0d8aaa7a 100644 --- a/tests/commands/cloud/test_push.py +++ b/tests/commands/cloud/test_push.py @@ -153,14 +153,12 @@ def test_cloud_push_creates_project_with_optional_organization_id(organization_i path = "Python Project" cloud_project = create_api_project(1, path) - with mock.patch.object(ProjectClient, 'create', return_value=create_api_project(1, path)) as mock_create_project,\ - mock.patch.object(ProjectClient, 'get', return_value=cloud_project) as mock_get_project: + with mock.patch.object(ProjectClient, 'create', return_value=create_api_project(1, path)) as mock_create_project: organization_id_option = ["--organization-id", organization_id] if organization_id is not None else [] result = CliRunner().invoke(lean, ["cloud", "push", "--project", path, *organization_id_option]) assert result.exit_code == 0 - mock_get_project.assert_called_once_with(cloud_project.projectId) mock_create_project.assert_called_once_with(path, QCLanguage.Python, organization_id)