Skip to content

Commit

Permalink
Add config option to define database update frequency (#540)
Browse files Browse the repository at this point in the history
* First attempt to solve the bug

* Address suggestions

* Fix failing unit test

* Few tweaks

* Nit changes

* Address requested changes

* Address requested changes

* Nit change

* Nit change

* Address review

---------

Co-authored-by: Martin Molinero <[email protected]>
  • Loading branch information
Marinovsky and Martin-Molinero authored Feb 5, 2025
1 parent 6c6fa92 commit ba8da2f
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 5 deletions.
49 changes: 46 additions & 3 deletions lean/components/cloud/data_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,38 @@ def _store_local_file(file_content: bytes, file_path: Path):
f.write(file_content)


def parse_timedelta(database_update_frequency: str):
if '.' not in database_update_frequency and ':' not in database_update_frequency:
return None
if database_update_frequency.count(".") == 1: # Ideally, the format is DD.HH:MM:SS
days_component, time_component = map(str, database_update_frequency.split("."))
days = int(days_component)
hours, minutes, seconds = map(int, time_component.split(":"))
else: # However, the format can also be HH:MM:SS
days = 0
hours, minutes, seconds = map(int, database_update_frequency.split(":"))
return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)


class DataDownloader:
"""The DataDownloader is responsible for downloading data from QuantConnect Datasets."""

def __init__(self, logger: Logger, api_client: APIClient, lean_config_manager: LeanConfigManager):
def __init__(self,
logger: Logger,
api_client: APIClient,
lean_config_manager: LeanConfigManager,
database_update_frequency: str):
"""Creates a new CloudBacktestRunner instance.
:param logger: the logger to use to log messages with
:param api_client: the APIClient instance to use when communicating with the QuantConnect API
:param lean_config_manager: the LeanConfigManager instance to retrieve the data directory from
:param database_update_frequency: the value of the config option database-update-frequency
"""
self._logger = logger
self._api_client = api_client
self._lean_config_manager = lean_config_manager
self.database_update_frequency = database_update_frequency

def update_database_files(self):
"""Will update lean data folder database files if required
Expand All @@ -49,9 +68,21 @@ def update_database_files(self):
now = datetime.now()
config = self._lean_config_manager.get_lean_config()
last_update = config["file-database-last-update"] if "file-database-last-update" in config else ''
if not last_update or now - datetime.strptime(last_update, '%m/%d/%Y') > timedelta(days=1):

# The last update date can be in '%m/%d/%Y'(old format) or '%m/%d/%Y %H:%M:%S'(new format)
last_update = self.parse_last_update_date(last_update)
if self.database_update_frequency is None: # The user has not set this parameter yet
self.database_update_frequency = "1.00:00:00"

frequency = parse_timedelta(self.database_update_frequency)
if not frequency:
self._logger.debug(f"Skipping database-update-frequency, frequency is:"
f" {str(self.database_update_frequency)}")
return
self._logger.debug(f"database-update-frequency is: {str(frequency)}")
if not last_update or now - last_update > frequency:
data_dir = self._lean_config_manager.get_data_directory()
self._lean_config_manager.set_properties({"file-database-last-update": now.strftime('%m/%d/%Y')})
self._lean_config_manager.set_properties({"file-database-last-update": now.strftime('%m/%d/%Y %H:%M:%S')})

_store_local_file(self._api_client.data.download_public_file(
"https://raw.githubusercontent.com/QuantConnect/Lean/master/Data/symbol-properties/symbol-properties-database.csv"),
Expand All @@ -64,6 +95,9 @@ def update_database_files(self):
pass
else:
self._logger.error(str(e))
except ValueError as e:
self._logger.debug(f"Value of config option database-update-frequency is invalid: {str(e)}. "
f"Database update will be skipped")
except Exception as e:
self._logger.error(str(e))

Expand Down Expand Up @@ -113,6 +147,15 @@ def _process_bulk(self, file: Path, destination: Path):
from os import remove
remove(file)

def parse_last_update_date(self, last_update_date: str) -> datetime:
formats = ['%m/%d/%Y', '%m/%d/%Y %H:%M:%S']

for fmt in formats:
try:
return datetime.strptime(last_update_date, fmt)
except ValueError:
continue

def remove_suffix(self,input_string, suffix):
if suffix and input_string.endswith(suffix):
return input_string[:-len(suffix)]
Expand Down
12 changes: 11 additions & 1 deletion lean/components/config/cli_config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,23 @@ def __init__(self, general_storage: Storage, credentials_storage: Storage) -> No
f"The Docker image used when running the research environment ({DEFAULT_RESEARCH_IMAGE} if not set).",
False,
general_storage)
self.database_update_frequency = Option("database-update-frequency",
"How often the databases are updated. "
"The format is DD.HH:MM:SS. If the frequency "
"is less than a day can just be HH:MM:SS. "
"Update can be disabled by setting this option to a non-date"
" value (-, _, ..., etc.). "
"If unset, default value is 1 day",
False,
general_storage)

self.all_options = [
self.user_id,
self.api_token,
self.default_language,
self.engine_image,
self.research_image
self.research_image,
self.database_update_frequency
]

def get_option_by_key(self, key: str) -> Option:
Expand Down
5 changes: 4 additions & 1 deletion lean/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ def initialize(self,
self.project_manager,
self.project_config_manager,
self.organization_manager)
self.data_downloader = DataDownloader(self.logger, self.api_client, self.lean_config_manager)
self.data_downloader = DataDownloader(self.logger,
self.api_client,
self.lean_config_manager,
self.cli_config_manager.database_update_frequency.get_value())
self.cloud_project_manager = CloudProjectManager(self.api_client,
self.project_config_manager,
self.pull_manager,
Expand Down
45 changes: 45 additions & 0 deletions tests/commands/config/test_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
from click.testing import CliRunner

from lean.commands import lean
from lean.components.cloud.data_downloader import parse_timedelta
from lean.container import container
from datetime import timedelta
import pytest


def test_config_set_updates_the_value_of_the_option() -> None:
Expand All @@ -25,6 +28,48 @@ def test_config_set_updates_the_value_of_the_option() -> None:
assert container.cli_config_manager.user_id.get_value() == "12345"


@pytest.mark.parametrize("raw_frequency, expected", [("_", None), ("=", None), ("_", None),
("1.0:0:0", timedelta(days=1)),
("0.1:0:0", timedelta(hours=1)),
("0.0:1:0", timedelta(minutes=1)),
("0.0:0:1", timedelta(seconds=1)),
("1:0:0", timedelta(hours=1)),
("0:1:0", timedelta(minutes=1)),
("0:0:1", timedelta(seconds=1)),
("01.00:00:00", timedelta(days=1)),
("00.01:00:00", timedelta(hours=1)),
("00.00:01:00", timedelta(minutes=1)),
("00.00:00:01", timedelta(seconds=1)),
("01:00:00", timedelta(hours=1)),
("00:01:00", timedelta(minutes=1)),
("00:00:01", timedelta(seconds=1)),
("1.00:00:00", timedelta(days=1)),
("00.1:00:00", timedelta(hours=1)),
("00.00:1:00", timedelta(minutes=1)),
("00.00:00:1", timedelta(seconds=1)),
("1:00:00", timedelta(hours=1)),
("00:1:00", timedelta(minutes=1)),
("00:00:1", timedelta(seconds=1)),
("00.1:00:1", timedelta(hours=1, seconds=1)),
("00.1:1:1", timedelta(hours=1, minutes=1, seconds=1)),
("1:00:1", timedelta(hours=1, seconds=1)),
("1:1:1", timedelta(hours=1, minutes=1, seconds=1)),
("1.1:1:1", timedelta(days=1, hours=1, minutes=1, seconds=1)),
("10.20:30:40", timedelta(days=10, hours=20, minutes=30, seconds=40)),
("30.23:59:59", timedelta(days=30, hours=23, minutes=59, seconds=59)),
("60.23:59:59", timedelta(days=60, hours=23, minutes=59, seconds=59)),
("20:30:40", timedelta(hours=20, minutes=30, seconds=40)),
("00:59:59", timedelta(minutes=59, seconds=59)),
("00:00:59", timedelta(seconds=59))])
def test_set_database_update_frequency_works_with_different_timespans(raw_frequency: str, expected: timedelta) -> None:
result = CliRunner().invoke(lean, ["config", "set", "database-update-frequency", raw_frequency])

assert result.exit_code == 0

frequency = parse_timedelta(raw_frequency)
assert frequency == expected


def test_config_set_aborts_when_no_option_with_given_key_exists() -> None:
result = CliRunner().invoke(lean, ["config", "set", "this-option-does-not-exist", "value"])

Expand Down

0 comments on commit ba8da2f

Please sign in to comment.