diff --git a/lean/components/cloud/data_downloader.py b/lean/components/cloud/data_downloader.py index a83ddc56..f4fe1040 100644 --- a/lean/components/cloud/data_downloader.py +++ b/lean/components/cloud/data_downloader.py @@ -27,19 +27,38 @@ def _store_local_file(file_content: bytes, file_path: Path): f.write(file_content) +def parse_timedelta(database_update_frequency: str): + if '.' not in database_update_frequency and ':' not in database_update_frequency: + return None + if database_update_frequency.count(".") == 1: # Ideally, the format is DD.HH:MM:SS + days_component, time_component = map(str, database_update_frequency.split(".")) + days = int(days_component) + hours, minutes, seconds = map(int, time_component.split(":")) + else: # However, the format can also be HH:MM:SS + days = 0 + hours, minutes, seconds = map(int, database_update_frequency.split(":")) + return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds) + + class DataDownloader: """The DataDownloader is responsible for downloading data from QuantConnect Datasets.""" - def __init__(self, logger: Logger, api_client: APIClient, lean_config_manager: LeanConfigManager): + def __init__(self, + logger: Logger, + api_client: APIClient, + lean_config_manager: LeanConfigManager, + database_update_frequency: str): """Creates a new CloudBacktestRunner instance. :param logger: the logger to use to log messages with :param api_client: the APIClient instance to use when communicating with the QuantConnect API :param lean_config_manager: the LeanConfigManager instance to retrieve the data directory from + :param database_update_frequency: the value of the config option database-update-frequency """ self._logger = logger self._api_client = api_client self._lean_config_manager = lean_config_manager + self.database_update_frequency = database_update_frequency def update_database_files(self): """Will update lean data folder database files if required @@ -49,9 +68,21 @@ def update_database_files(self): now = datetime.now() config = self._lean_config_manager.get_lean_config() last_update = config["file-database-last-update"] if "file-database-last-update" in config else '' - if not last_update or now - datetime.strptime(last_update, '%m/%d/%Y') > timedelta(days=1): + + # The last update date can be in '%m/%d/%Y'(old format) or '%m/%d/%Y %H:%M:%S'(new format) + last_update = self.parse_last_update_date(last_update) + if self.database_update_frequency is None: # The user has not set this parameter yet + self.database_update_frequency = "1.00:00:00" + + frequency = parse_timedelta(self.database_update_frequency) + if not frequency: + self._logger.debug(f"Skipping database-update-frequency, frequency is:" + f" {str(self.database_update_frequency)}") + return + self._logger.debug(f"database-update-frequency is: {str(frequency)}") + if not last_update or now - last_update > frequency: data_dir = self._lean_config_manager.get_data_directory() - self._lean_config_manager.set_properties({"file-database-last-update": now.strftime('%m/%d/%Y')}) + self._lean_config_manager.set_properties({"file-database-last-update": now.strftime('%m/%d/%Y %H:%M:%S')}) _store_local_file(self._api_client.data.download_public_file( "https://raw.githubusercontent.com/QuantConnect/Lean/master/Data/symbol-properties/symbol-properties-database.csv"), @@ -64,6 +95,9 @@ def update_database_files(self): pass else: self._logger.error(str(e)) + except ValueError as e: + self._logger.debug(f"Value of config option database-update-frequency is invalid: {str(e)}. " + f"Database update will be skipped") except Exception as e: self._logger.error(str(e)) @@ -113,6 +147,15 @@ def _process_bulk(self, file: Path, destination: Path): from os import remove remove(file) + def parse_last_update_date(self, last_update_date: str) -> datetime: + formats = ['%m/%d/%Y', '%m/%d/%Y %H:%M:%S'] + + for fmt in formats: + try: + return datetime.strptime(last_update_date, fmt) + except ValueError: + continue + def remove_suffix(self,input_string, suffix): if suffix and input_string.endswith(suffix): return input_string[:-len(suffix)] diff --git a/lean/components/config/cli_config_manager.py b/lean/components/config/cli_config_manager.py index 3715fb77..7214b059 100644 --- a/lean/components/config/cli_config_manager.py +++ b/lean/components/config/cli_config_manager.py @@ -55,13 +55,23 @@ def __init__(self, general_storage: Storage, credentials_storage: Storage) -> No f"The Docker image used when running the research environment ({DEFAULT_RESEARCH_IMAGE} if not set).", False, general_storage) + self.database_update_frequency = Option("database-update-frequency", + "How often the databases are updated. " + "The format is DD.HH:MM:SS. If the frequency " + "is less than a day can just be HH:MM:SS. " + "Update can be disabled by setting this option to a non-date" + " value (-, _, ..., etc.). " + "If unset, default value is 1 day", + False, + general_storage) self.all_options = [ self.user_id, self.api_token, self.default_language, self.engine_image, - self.research_image + self.research_image, + self.database_update_frequency ] def get_option_by_key(self, key: str) -> Option: diff --git a/lean/container.py b/lean/container.py index 234f126d..e2cc918f 100644 --- a/lean/container.py +++ b/lean/container.py @@ -141,7 +141,10 @@ def initialize(self, self.project_manager, self.project_config_manager, self.organization_manager) - self.data_downloader = DataDownloader(self.logger, self.api_client, self.lean_config_manager) + self.data_downloader = DataDownloader(self.logger, + self.api_client, + self.lean_config_manager, + self.cli_config_manager.database_update_frequency.get_value()) self.cloud_project_manager = CloudProjectManager(self.api_client, self.project_config_manager, self.pull_manager, diff --git a/tests/commands/config/test_set.py b/tests/commands/config/test_set.py index 8ff00f94..227fb2aa 100644 --- a/tests/commands/config/test_set.py +++ b/tests/commands/config/test_set.py @@ -14,7 +14,10 @@ from click.testing import CliRunner from lean.commands import lean +from lean.components.cloud.data_downloader import parse_timedelta from lean.container import container +from datetime import timedelta +import pytest def test_config_set_updates_the_value_of_the_option() -> None: @@ -25,6 +28,48 @@ def test_config_set_updates_the_value_of_the_option() -> None: assert container.cli_config_manager.user_id.get_value() == "12345" +@pytest.mark.parametrize("raw_frequency, expected", [("_", None), ("=", None), ("_", None), + ("1.0:0:0", timedelta(days=1)), + ("0.1:0:0", timedelta(hours=1)), + ("0.0:1:0", timedelta(minutes=1)), + ("0.0:0:1", timedelta(seconds=1)), + ("1:0:0", timedelta(hours=1)), + ("0:1:0", timedelta(minutes=1)), + ("0:0:1", timedelta(seconds=1)), + ("01.00:00:00", timedelta(days=1)), + ("00.01:00:00", timedelta(hours=1)), + ("00.00:01:00", timedelta(minutes=1)), + ("00.00:00:01", timedelta(seconds=1)), + ("01:00:00", timedelta(hours=1)), + ("00:01:00", timedelta(minutes=1)), + ("00:00:01", timedelta(seconds=1)), + ("1.00:00:00", timedelta(days=1)), + ("00.1:00:00", timedelta(hours=1)), + ("00.00:1:00", timedelta(minutes=1)), + ("00.00:00:1", timedelta(seconds=1)), + ("1:00:00", timedelta(hours=1)), + ("00:1:00", timedelta(minutes=1)), + ("00:00:1", timedelta(seconds=1)), + ("00.1:00:1", timedelta(hours=1, seconds=1)), + ("00.1:1:1", timedelta(hours=1, minutes=1, seconds=1)), + ("1:00:1", timedelta(hours=1, seconds=1)), + ("1:1:1", timedelta(hours=1, minutes=1, seconds=1)), + ("1.1:1:1", timedelta(days=1, hours=1, minutes=1, seconds=1)), + ("10.20:30:40", timedelta(days=10, hours=20, minutes=30, seconds=40)), + ("30.23:59:59", timedelta(days=30, hours=23, minutes=59, seconds=59)), + ("60.23:59:59", timedelta(days=60, hours=23, minutes=59, seconds=59)), + ("20:30:40", timedelta(hours=20, minutes=30, seconds=40)), + ("00:59:59", timedelta(minutes=59, seconds=59)), + ("00:00:59", timedelta(seconds=59))]) +def test_set_database_update_frequency_works_with_different_timespans(raw_frequency: str, expected: timedelta) -> None: + result = CliRunner().invoke(lean, ["config", "set", "database-update-frequency", raw_frequency]) + + assert result.exit_code == 0 + + frequency = parse_timedelta(raw_frequency) + assert frequency == expected + + def test_config_set_aborts_when_no_option_with_given_key_exists() -> None: result = CliRunner().invoke(lean, ["config", "set", "this-option-does-not-exist", "value"])