From cf20a5394a3e769cbe753b99a21c97d39ae33ad5 Mon Sep 17 00:00:00 2001 From: Roman Yavnikov <45608740+Romazes@users.noreply.github.com> Date: Wed, 1 May 2024 18:34:38 +0300 Subject: [PATCH] Add new `data download` command to lean-cli for data retrieval (#447) * feat: download file in json format * feat: download no-/interactive with new command data-provider-historical * feat: constant data types * feat: new property in JsonModule * feat: validate of no-/interactive in download command * feat: constant data folder path * feat: entry point to new download project * feat: working_dir in docker remove: not used path mount feat: const for download config key * feat: get full title with namespace remove: extra const * feat: get basic docker config without path refactor: param target_path in set up csharp options feat: run download project * refactor: split up get_basic_docker_config on small parts * revert: test_downloader file * feat: support additional config in command line when user run `data download` * rename: remove historical prefix in command names * feat: download run test * refactor: switch off interactive mode for `data-provider` param * remove: extra resolution * refactor: download json return like Dict[str, Any] * refactor: no-/interactive data download params * feat: test mock REST get request * refactor: put local methods above main method rename: local methods * feat: new config param options `download` * remove: extra log.info * fix:bug: if provider has more than 2 download providers * feat: additional test download process refactor: encapsulate data download data runner in tests * fix: wrong condition tabulation * refactor: use provider_config live internal variable * Revert "refactor: use provider_config live internal variable" This reverts commit 3c9b704423f43099fc87c0951856bddcbc81ce20. * fix: test initialization of new property in mock config file * feat: add market param rename: input command market/security-type refactor: add prompt msg helper remove: prompt use selection method * refactor:test: data download * feat: create config file to run download provider project + mount rename: data_provider => data_downloader remove: destination-dir args * feat: mock get_organization refactor: switch off interactive for data providers feat: description of some param get_basic_docker_config_without_algo * refactor: styling * fix: test_dataset_requirements * refactor: input key's DataProvider interactive=False * rename: help message Select a historical Provider rename: variable to not confused with data_type (Trade, Quote, etc.) style: additional spaces and new lines * feat: interactive param test * feat: new class QCDataType feat: new Security types in QCSecurityType feat: get_all_members for QC's classes * refactor: use QC default types remove: extra constants fix: tests * remove: extra argument data-download-name in pass to exe dll * refactor: --no-update flag in local using * fix: back compatibility assert test * rename: DownloaderDataProvider.dll * feat: update readme * feat: description of data download command * fix: mounting path of Data Providers' config * feat: if lean_config without environment use lean_config in mount_paths * feat: info to debug log in mount * feat: user friendly prompt end-date * fix: get all config for data downloader refactor: get_complete_lean_config without environment and algo_file remove: test, cuz we have used interactive input for user * refactor: specifications_url (like property) feat: get access to inner support of data sources test: refactor by new format * rename: module specification properties --- README.md | 108 +++++- lean/commands/data/download.py | 294 +++++++++++++-- lean/components/api/data_client.py | 10 +- lean/components/config/lean_config_manager.py | 54 +-- lean/components/docker/lean_runner.py | 354 +++++++++++------- lean/models/api.py | 54 +++ lean/models/click_options.py | 2 + lean/models/data.py | 5 + lean/models/json_module.py | 5 + tests/commands/data/test_download.py | 201 +++++++++- 10 files changed, 885 insertions(+), 202 deletions(-) diff --git a/README.md b/README.md index 4d64a20b..acb08d97 100644 --- a/README.md +++ b/README.md @@ -781,12 +781,26 @@ _See code: [lean/commands/create_project.py](lean/commands/create_project.py)_ ### `lean data download` -Purchase and download data from QuantConnect Datasets. +Purchase and download data directly from QuantConnect or download from Support Data Providers ``` Usage: lean data download [OPTIONS] - Purchase and download data from QuantConnect Datasets. + Purchase and download data directly from QuantConnect or download from Support Data Providers + + 1. Acquire Data from QuantConnect Datasets: Purchase and seamlessly download data directly from QuantConnect. + + 2. Streamlined Access from Support Data Providers: + + - Choose your preferred historical data provider. + + - Initiate hassle-free downloads from our supported providers. + + We have 2 options: + + - interactive (follow instruction in lean-cli) + + - no interactive (write arguments in command line) An interactive wizard will show to walk you through the process of selecting data, accepting the CLI API Access and Data Agreement and payment. After this wizard the selected data will be downloaded automatically. @@ -799,12 +813,90 @@ Usage: lean data download [OPTIONS] https://www.quantconnect.com/datasets Options: - --dataset TEXT The name of the dataset to download non-interactively - --overwrite Overwrite existing local data - -y, --yes Automatically confirm payment confirmation prompts - --lean-config FILE The Lean configuration file that should be used (defaults to the nearest lean.json) - --verbose Enable debug logging - --help Show this message and exit. + --data-provider-historical [Interactive Brokers|Oanda|Bitfinex|Coinbase Advanced Trade|Binance|Kraken|IQFeed|Polygon|FactSet|IEX|AlphaVantage|CoinApi|ThetaData|QuantConnect|Local|Terminal Link|Bybit] + The name of the downloader data provider. + --ib-user-name TEXT Your Interactive Brokers username + --ib-account TEXT Your Interactive Brokers account id + --ib-password TEXT Your Interactive Brokers password + --ib-weekly-restart-utc-time TEXT + Weekly restart UTC time (hh:mm:ss). Each week on Sunday your algorithm is restarted at + this time, and will require 2FA verification. This is required by Interactive Brokers. + Use this option explicitly to override the default value. + --oanda-account-id TEXT Your OANDA account id + --oanda-access-token TEXT Your OANDA API token + --oanda-environment [Practice|Trade] + The environment to run in, Practice for fxTrade Practice, Trade for fxTrade + --bitfinex-api-key TEXT Your Bitfinex API key + --bitfinex-api-secret TEXT Your Bitfinex API secret + --coinbase-api-key TEXT Your Coinbase Advanced Trade API key + --coinbase-api-secret TEXT Your Coinbase Advanced Trade API secret + --binance-exchange-name [Binance|BinanceUS|Binance-USDM-Futures|Binance-COIN-Futures] + Binance exchange name [Binance, BinanceUS, Binance-USDM-Futures, Binance-COIN-Futures] + --binance-api-key TEXT Your Binance API key + --binanceus-api-key TEXT Your Binance API key + --binance-api-secret TEXT Your Binance API secret + --binanceus-api-secret TEXT Your Binance API secret + --kraken-api-key TEXT Your Kraken API key + --kraken-api-secret TEXT Your Kraken API secret + --kraken-verification-tier [Starter|Intermediate|Pro] + Your Kraken Verification Tier + --iqfeed-iqconnect TEXT The path to the IQConnect binary + --iqfeed-username TEXT Your IQFeed username + --iqfeed-password TEXT Your IQFeed password + --iqfeed-version TEXT The product version of your IQFeed developer account + --iqfeed-host TEXT The IQFeed host address + --polygon-api-key TEXT Your Polygon.io API Key + --factset-auth-config-file FILE + The path to the FactSet authentication configuration file + --iex-cloud-api-key TEXT Your iexcloud.io API token publishable key + --iex-price-plan [Launch|Grow|Enterprise] + Your IEX Cloud Price plan + --alpha-vantage-api-key TEXT Your Alpha Vantage Api Key + --alpha-vantage-price-plan [Free|Plan30|Plan75|Plan150|Plan300|Plan600|Plan1200] + Your Alpha Vantage Premium API Key plan + --coinapi-api-key TEXT Your coinapi.io Api Key + --coinapi-product [Free|Startup|Streamer|Professional|Enterprise] + CoinApi pricing plan (https://www.coinapi.io/market-data-api/pricing) + --thetadata-ws-url TEXT The ThetaData host address + --thetadata-rest-url TEXT The ThetaData host address + --thetadata-subscription-plan [Free|Value|Standard|Pro] + Your ThetaData subscription price plan + --terminal-link-connection-type [DAPI|SAPI] + Terminal Link Connection Type [DAPI, SAPI] + --terminal-link-server-auth-id TEXT + The Auth ID of the TerminalLink server + --terminal-link-environment [Production|Beta] + The environment to run in + --terminal-link-server-host TEXT + The host of the TerminalLink server + --terminal-link-server-port INTEGER + The port of the TerminalLink server + --terminal-link-openfigi-api-key TEXT + The Open FIGI API key to use for mapping options + --bybit-api-key TEXT Your Bybit API key + --bybit-api-secret TEXT Your Bybit API secret + --bybit-vip-level [VIP0|VIP1|VIP2|VIP3|VIP4|VIP5|SupremeVIP|Pro1|Pro2|Pro3|Pro4|Pro5] + Your Bybit VIP Level + --dataset TEXT The name of the dataset to download non-interactively + --overwrite Overwrite existing local data + -y, --yes Automatically confirm payment confirmation prompts + --data-type [Trade|Quote|OpenInterest] + Specify the type of historical data + --resolution [Tick|Second|Minute|Hour|Daily] + Specify the resolution of the historical data + --security-type [Equity|Index|Forex|Cfd|Future|Crypto|CryptoFuture|Option|IndexOption|Commodity|FutureOption] + Specify the security type of the historical data + --market TEXT Specify the market name for tickers (e.g., 'USA', 'NYMEX', 'Binance') + --tickers TEXT Specify comma separated list of tickers to use for historical data request. + --start-date TEXT Specify the start date for the historical data request in the format yyyyMMdd. + --end-date TEXT Specify the end date for the historical data request in the format yyyyMMdd. (defaults + to today) + --image TEXT The LEAN engine image to use (defaults to quantconnect/lean:latest) + --update Pull the LEAN engine image before running the Downloader Data Provider + --no-update Use the local LEAN engine image instead of pulling the latest version + --lean-config FILE The Lean configuration file that should be used (defaults to the nearest lean.json) + --verbose Enable debug logging + --help Show this message and exit. ``` _See code: [lean/commands/data/download.py](lean/commands/data/download.py)_ diff --git a/lean/commands/data/download.py b/lean/commands/data/download.py index 59209053..bbc24a17 100644 --- a/lean/commands/data/download.py +++ b/lean/commands/data/download.py @@ -10,15 +10,21 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from datetime import datetime +from json import dump -from typing import Iterable, List, Optional -from click import command, option, confirm, pass_context, Context - +from docker.types import Mount +from typing import Any, Dict, Iterable, List, Optional +from click import command, option, confirm, pass_context, Context, Choice from lean.click import LeanCommand, ensure_options +from lean.components.util.json_modules_handler import config_build_for_name +from lean.constants import DEFAULT_ENGINE_IMAGE from lean.container import container -from lean.models.api import QCDataInformation, QCDataVendor, QCFullOrganization, QCDatasetDelivery -from lean.models.data import Dataset, DataFile, Product +from lean.models.api import QCDataInformation, QCDataVendor, QCFullOrganization, QCDatasetDelivery, QCResolution, QCSecurityType, QCDataType +from lean.models.click_options import get_configs_for_options, options_from_json +from lean.models.data import Dataset, DataFile, DatasetDateOption, DatasetTextOption, DatasetTextOptionTransform,OptionResult, Product from lean.models.logger import Option +from lean.models.cli import cli_data_downloaders _data_information: Optional[QCDataInformation] = None _presigned_terms=""" @@ -95,7 +101,6 @@ def _get_data_files(organization: QCFullOrganization, products: List[Product]) - unique_data_files = sorted(list(set(chain(*[product.get_data_files() for product in products])))) return _map_data_files_to_vendors(organization, unique_data_files) - def _display_products(organization: QCFullOrganization, products: List[Product]) -> None: """Previews a list of products in pretty tables. @@ -159,7 +164,6 @@ def _get_security_master_warn(url: str) -> str: f"You can add the subscription at https://www.quantconnect.com/datasets/{url}/pricing" ]) - def _select_products_interactive(organization: QCFullOrganization, datasets: List[Dataset], force: bool, ask_for_more_data: bool) -> List[Product]: """Asks the user for the products that should be purchased and downloaded. @@ -408,16 +412,154 @@ def _get_available_datasets(organization: QCFullOrganization) -> List[Dataset]: return available_datasets +def _get_historical_data_provider() -> str: + return container.logger.prompt_list("Select a historical data provider", [Option(id=data_downloader.get_name(), label=data_downloader.get_name()) for data_downloader in cli_data_downloaders]) + + +def _get_download_specification_from_config(data_provider_config_json: Dict[str, Any], default_param: List[str], + key_config_data: str) -> List[str]: + """ + Get parameter from data provider config JSON or return default parameters. + + Args: + - data_provider_config_json (Dict[str, Any]): Configuration JSON. + - default_param (List[str]): Default parameters. + - key_config_data (str): Key to look for in the config JSON. + + Returns: + - List[str]: List of parameters. + """ + + if data_provider_config_json and "module-specification" in data_provider_config_json: + if "download" in data_provider_config_json["module-specification"]: + return data_provider_config_json["module-specification"]["download"].get(key_config_data, default_param) + + return default_param + + +def _get_user_input_or_prompt(user_input_data: str, available_input_data: List[str], data_provider_name: str, + prompt_message_helper: str) -> str: + """ + Get user input or prompt for selection based on data types. + + Args: + - user_input_data (str): User input data. + - available_input_data (List[str]): List of available input data options. + - data_provider_name (str): Name of the data provider. + + Returns: + - str: Selected data type or prompted choice. + + Raises: + - ValueError: If user input data is not in supported data types. + """ + + if not user_input_data: + # Prompt user to select a ticker's security type + options = [Option(id=data_type, label=data_type) for data_type in available_input_data] + return container.logger.prompt_list(prompt_message_helper, options) + + elif user_input_data.lower() not in [available_data.lower() for available_data in available_input_data]: + # Raise ValueError for unsupported data type + raise ValueError( + f"The {data_provider_name} data provider does not support {user_input_data}. " + f"Please choose a supported data from: {available_input_data}." + ) -@command(cls=LeanCommand, requires_lean_config=True, allow_unknown_options=True) + return user_input_data + + +def _configure_date_option(date_value: str, option_id: str, option_label: str) -> OptionResult: + """ + Configure the date based on the provided date value, option ID, and option label. + + Args: + - date_value (str): Existing date value. + - option_id (str): Identifier for the date option. + - option_label (str): Label for the date option. + + Returns: + - str: Configured date. + """ + + date_option = DatasetDateOption(id=option_id, label=option_label, + description=f"Enter the {option_label} " + f"for the historical data request in the format YYYYMMDD.") + + if not date_value: + if option_id == "end": + return date_option.configure_interactive_with_default(datetime.today().strftime("%Y%m%d")) + else: + return date_option.configure_interactive() + + return date_option.configure_non_interactive(date_value) + + +@command(cls=LeanCommand, requires_lean_config=True, allow_unknown_options=True, name="download") +@option("--data-provider-historical", + type=Choice([data_downloader.get_name() for data_downloader in cli_data_downloaders], case_sensitive=False), + help="The name of the downloader data provider.") +@options_from_json(get_configs_for_options("download")) @option("--dataset", type=str, help="The name of the dataset to download non-interactively") @option("--overwrite", is_flag=True, default=False, help="Overwrite existing local data") @option("--force", is_flag=True, default=False, hidden=True) @option("--yes", "-y", "auto_confirm", is_flag=True, default=False, help="Automatically confirm payment confirmation prompts") +@option("--data-type", type=Choice(QCDataType.get_all_members(), case_sensitive=False), help="Specify the type of historical data") +@option("--resolution", type=Choice(QCResolution.get_all_members(), case_sensitive=False), + help="Specify the resolution of the historical data") +@option("--security-type", type=Choice(QCSecurityType.get_all_members(), case_sensitive=False), + help="Specify the security type of the historical data") +@option("--market", type=str, default="USA", + help="Specify the market name for tickers (e.g., 'USA', 'NYMEX', 'Binance')") +@option("--tickers", + type=str, + help="Specify comma separated list of tickers to use for historical data request.") +@option("--start-date", + type=str, + help="Specify the start date for the historical data request in the format yyyyMMdd.") +@option("--end-date", + type=str, + help="Specify the end date for the historical data request in the format yyyyMMdd. (defaults to today)") +@option("--image", + type=str, + help=f"The LEAN engine image to use (defaults to {DEFAULT_ENGINE_IMAGE})") +@option("--update", + is_flag=True, + default=False, + help="Pull the LEAN engine image before running the Downloader Data Provider") +@option("--no-update", + is_flag=True, + default=False, + help="Use the local LEAN engine image instead of pulling the latest version") @pass_context -def download(ctx: Context, dataset: Optional[str], overwrite: bool, force: bool, auto_confirm: bool, **kwargs) -> None: - """Purchase and download data from QuantConnect Datasets. +def download(ctx: Context, + data_provider_historical: Optional[str], + dataset: Optional[str], + overwrite: bool, + force: bool, + auto_confirm: bool, + data_type: Optional[str], + resolution: Optional[str], + security_type: Optional[str], + market: Optional[str], + tickers: Optional[str], + start_date: Optional[str], + end_date: Optional[str], + image: Optional[str], + update: bool, + no_update: bool, + **kwargs) -> None: + """Purchase and download data directly from QuantConnect or download from Support Data Providers + + 1. Acquire Data from QuantConnect Datasets: Purchase and seamlessly download data directly from QuantConnect.\n + 2. Streamlined Access from Support Data Providers:\n + - Choose your preferred historical data provider.\n + - Initiate hassle-free downloads from our supported providers. + + We have 2 options:\n + - interactive (follow instruction in lean-cli)\n + - no interactive (write arguments in command line) An interactive wizard will show to walk you through the process of selecting data, accepting the CLI API Access and Data Agreement and payment. @@ -433,20 +575,124 @@ def download(ctx: Context, dataset: Optional[str], overwrite: bool, force: bool, """ organization = _get_organization() - is_interactive = dataset is None - if not is_interactive: - ensure_options(["dataset"]) - datasets = _get_available_datasets(organization) - products = _select_products_non_interactive(organization, datasets, ctx, force) - else: - datasets = _get_available_datasets(organization) - products = _select_products_interactive(organization, datasets, force, ask_for_more_data=not auto_confirm) + if data_provider_historical is None: + data_provider_historical = _get_historical_data_provider() - _confirm_organization_balance(organization, products) - _verify_accept_agreement(organization, is_interactive) + if data_provider_historical == 'QuantConnect': + is_interactive = dataset is None + if not is_interactive: + ensure_options(["dataset"]) + datasets = _get_available_datasets(organization) + products = _select_products_non_interactive(organization, datasets, ctx, force) + else: + datasets = _get_available_datasets(organization) + products = _select_products_interactive(organization, datasets, force, ask_for_more_data=not auto_confirm) - if is_interactive and not auto_confirm: - _confirm_payment(organization, products) + _confirm_organization_balance(organization, products) + _verify_accept_agreement(organization, is_interactive) - all_data_files = _get_data_files(organization, products) - container.data_downloader.download_files(all_data_files, overwrite, organization.id) + if is_interactive and not auto_confirm: + _confirm_payment(organization, products) + + all_data_files = _get_data_files(organization, products) + container.data_downloader.download_files(all_data_files, overwrite, organization.id) + else: + data_downloader_provider = next(data_downloader for data_downloader in cli_data_downloaders + if data_downloader.get_name() == data_provider_historical) + + data_provider_config_json = None + if data_downloader_provider.specifications_url is not None: + data_provider_config_json = container.api_client.data.download_public_file_json( + data_downloader_provider.specifications_url) + + data_provider_support_security_types = _get_download_specification_from_config(data_provider_config_json, + QCSecurityType.get_all_members(), + "security-types") + data_provider_support_data_types = _get_download_specification_from_config(data_provider_config_json, + QCDataType.get_all_members(), + "data-types") + data_provider_support_resolutions = _get_download_specification_from_config(data_provider_config_json, + QCResolution.get_all_members(), + "resolutions") + data_provider_support_markets = _get_download_specification_from_config(data_provider_config_json, + [market], "markets") + + security_type = _get_user_input_or_prompt(security_type, data_provider_support_security_types, + data_provider_historical, "Select a Ticker's security type") + data_type = _get_user_input_or_prompt(data_type, data_provider_support_data_types, + data_provider_historical, "Select a Data type") + resolution = _get_user_input_or_prompt(resolution, data_provider_support_resolutions, + data_provider_historical, "Select a Resolution") + market = _get_user_input_or_prompt(market, data_provider_support_markets, + data_provider_historical,"Select a Market") + + if not tickers: + tickers = ','.join(DatasetTextOption(id="id", + label="Enter comma separated list of tickers to use for historical data request.", + description="description", + transform=DatasetTextOptionTransform.Lowercase, + multiple=True).configure_interactive().value) + + start_date = _configure_date_option(start_date, "start", "Please enter a Start Date in the format") + end_date = _configure_date_option(end_date, "end", "Please enter a End Date in the format") + + if start_date.value >= end_date.value: + raise ValueError("Historical start date cannot be greater than or equal to historical end date.") + + logger = container.logger + lean_config = container.lean_config_manager.get_complete_lean_config(None, None, None) + + data_downloader_provider = config_build_for_name(lean_config, data_downloader_provider.get_name(), + cli_data_downloaders, kwargs, logger, interactive=True) + data_downloader_provider.ensure_module_installed(organization.id) + container.lean_config_manager.set_properties(data_downloader_provider.get_settings()) + # mounting additional data_downloader config files + paths_to_mount = data_downloader_provider.get_paths_to_mount() + + engine_image = container.cli_config_manager.get_engine_image(image) + + if str(engine_image) != DEFAULT_ENGINE_IMAGE: + # Custom engine image should not be updated. + logger.warn(f'A custom engine image: "{engine_image}" is being used!') + + container.update_manager.pull_docker_image_if_necessary(engine_image, update, no_update) + + downloader_data_provider_path_dll = "/Lean/DownloaderDataProvider/bin/Debug" + + run_options = container.lean_runner.get_basic_docker_config_without_algo(lean_config, + debugging_method=None, + detach=False, + image=engine_image, + target_path=downloader_data_provider_path_dll, + paths_to_mount=paths_to_mount) + + config_path = container.temp_manager.create_temporary_directory() / "config.json" + with config_path.open("w+", encoding="utf-8") as file: + dump(lean_config, file) + + run_options["working_dir"] = downloader_data_provider_path_dll + + dll_arguments = ["dotnet", "QuantConnect.DownloaderDataProvider.Launcher.dll", + "--data-type", data_type, + "--start-date", start_date.value.strftime("%Y%m%d"), + "--end-date", end_date.value.strftime("%Y%m%d"), + "--security-type", security_type, + "--market", market, + "--resolution", resolution, + "--tickers", tickers] + + run_options["commands"].append(' '.join(dll_arguments)) + + # mount our created above config with work directory + run_options["mounts"].append( + Mount(target=f"{downloader_data_provider_path_dll}/config.json", + source=str(config_path), + type="bind", + read_only=True) + ) + + success = container.docker_manager.run_image(engine_image, **run_options) + + if not success: + raise RuntimeError( + "Something went wrong while running the downloader data provider, see the logs above for more information") diff --git a/lean/components/api/data_client.py b/lean/components/api/data_client.py index 60355fab..da50fb93 100644 --- a/lean/components/api/data_client.py +++ b/lean/components/api/data_client.py @@ -13,7 +13,7 @@ from lean.components.api.api_client import * from lean.models.api import QCDataInformation -from typing import List, Callable +from typing import List, Callable, cast class DataClient: @@ -92,6 +92,14 @@ def download_public_file(self, data_endpoint: str) -> bytes: :return: the content of the file """ return self._http_client.get(data_endpoint).content + + def download_public_file_json(self, data_endpoint: str) -> Dict[str, Any]: + """Downloads the content of a downloadable public file in json format. + + :param data_endpoint: the url of the public file + :return: the content of the file in json format + """ + return cast(Dict[str, Any], self._http_client.get(data_endpoint).json()) def list_files(self, prefix: str) -> List[str]: """Lists all remote files with a given prefix. diff --git a/lean/components/config/lean_config_manager.py b/lean/components/config/lean_config_manager.py index a33e7bc7..5b533386 100644 --- a/lean/components/config/lean_config_manager.py +++ b/lean/components/config/lean_config_manager.py @@ -223,7 +223,9 @@ def get_complete_lean_config(self, """ config = self.get_lean_config() - config["environment"] = environment + if environment and len(environment) > 0: + config["environment"] = environment + config["close-automatically"] = True config["composer-dll-directory"] = "." @@ -241,7 +243,6 @@ def get_complete_lean_config(self, config_defaults = { "job-user-id": self._cli_config_manager.user_id.get_value(default="0"), "api-access-token": self._cli_config_manager.api_token.get_value(default=""), - "job-project-id": self._project_config_manager.get_local_id(algorithm_file.parent), "job-organization-id": get_organization(config), "ib-host": "127.0.0.1", @@ -256,29 +257,32 @@ def get_complete_lean_config(self, if config.get(key, "") == "": config[key] = value - if algorithm_file.name.endswith(".py"): - config["algorithm-type-name"] = algorithm_file.name.split(".")[0] - config["algorithm-language"] = "Python" - config["algorithm-location"] = f"/LeanCLI/{algorithm_file.name}" - else: - from re import findall - algorithm_text = algorithm_file.read_text(encoding="utf-8") - config["algorithm-type-name"] = findall(r"class\s*([^\s:]+)\s*:\s*QCAlgorithm", algorithm_text)[0] - config["algorithm-language"] = "CSharp" - config["algorithm-location"] = f"{algorithm_file.parent.name}.dll" - - project_config = self._project_config_manager.get_project_config(algorithm_file.parent) - config["parameters"] = project_config.get("parameters", {}) - - # Add libraries paths to python project - project_language = project_config.get("algorithm-language", None) - if project_language == "Python": - library_references = project_config.get("libraries", []) - python_paths = config.get("python-additional-paths", []) - python_paths.extend([(Path("/") / library["path"]).as_posix() for library in library_references]) - if len(python_paths) > 0: - python_paths.append("/Library") - config["python-additional-paths"] = python_paths + if algorithm_file and len(algorithm_file.name) > 0: + config.get("job-project-id", self._project_config_manager.get_local_id(algorithm_file.parent)) + + if algorithm_file.name.endswith(".py"): + config["algorithm-type-name"] = algorithm_file.name.split(".")[0] + config["algorithm-language"] = "Python" + config["algorithm-location"] = f"/LeanCLI/{algorithm_file.name}" + else: + from re import findall + algorithm_text = algorithm_file.read_text(encoding="utf-8") + config["algorithm-type-name"] = findall(r"class\s*([^\s:]+)\s*:\s*QCAlgorithm", algorithm_text)[0] + config["algorithm-language"] = "CSharp" + config["algorithm-location"] = f"{algorithm_file.parent.name}.dll" + + project_config = self._project_config_manager.get_project_config(algorithm_file.parent) + config["parameters"] = project_config.get("parameters", {}) + + # Add libraries paths to python project + project_language = project_config.get("algorithm-language", None) + if project_language == "Python": + library_references = project_config.get("libraries", []) + python_paths = config.get("python-additional-paths", []) + python_paths.extend([(Path("/") / library["path"]).as_posix() for library in library_references]) + if len(python_paths) > 0: + python_paths.append("/Library") + config["python-additional-paths"] = python_paths # No real limit for the object store by default if "storage-limit-mb" not in config: diff --git a/lean/components/docker/lean_runner.py b/lean/components/docker/lean_runner.py index 4c247869..4bbe3849 100644 --- a/lean/components/docker/lean_runner.py +++ b/lean/components/docker/lean_runner.py @@ -197,9 +197,6 @@ def get_basic_docker_config(self, :return: the Docker configuration containing basic configuration to run Lean :param paths_to_mount: additional paths to mount to the container """ - from docker.types import Mount - from uuid import uuid4 - from json import dumps project_dir = algorithm_file.parent project_config = self._project_config_manager.get_project_config(project_dir) @@ -207,92 +204,88 @@ def get_basic_docker_config(self, # Force the use of the LocalDisk map/factor providers if no recent zip present and not using ApiDataProvider data_dir = self._lean_config_manager.get_data_directory() - if lean_config.get("data-provider", None) != "QuantConnect.Lean.Engine.DataFeeds.ApiDataProvider": - self._force_disk_provider_if_necessary(lean_config, - "map-file-provider", - "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider", - "QuantConnect.Data.Auxiliary.LocalDiskMapFileProvider", - data_dir / "equity" / "usa" / "map_files") - self._force_disk_provider_if_necessary(lean_config, - "factor-file-provider", - "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider", - "QuantConnect.Data.Auxiliary.LocalDiskFactorFileProvider", - data_dir / "equity" / "usa" / "factor_files") + self._handle_data_providers(lean_config, data_dir) - # Create the output directory if it doesn't exist yet - if not output_dir.exists(): - output_dir.mkdir(parents=True) + storage_dir = self._lean_config_manager.get_cli_root_directory() / "storage" + # Create the output directory if it doesn't exist yet # Create the storage directory if it doesn't exist yet - storage_dir = self._lean_config_manager.get_cli_root_directory() / "storage" - if not storage_dir.exists(): - storage_dir.mkdir(parents=True) + self._ensure_directories_exist([output_dir, storage_dir]) - lean_config["debug-mode"] = self._logger.debug_logging_enabled - lean_config["data-folder"] = "/Lean/Data" - lean_config["results-destination-folder"] = "/Results" - lean_config["object-store-root"] = "/Storage" + lean_config.update({ + "debug-mode": self._logger.debug_logging_enabled, + "data-folder": "/Lean/Data", + "results-destination-folder": "/Results", + "object-store-root": "/Storage" + }) - # The dict containing all options passed to `docker run` - # See all available options at https://docker-py.readthedocs.io/en/stable/containers.html - run_options: Dict[str, Any] = { - "detach": detach, - "commands": [], - "environment": docker_project_config.get("environment", {}), - "stop_signal": "SIGINT" if debugging_method is None else "SIGKILL", - "mounts": [], - "volumes": {}, - "ports": docker_project_config.get("ports", {}) - } + run_options = self._initialize_run_options(detach, docker_project_config, debugging_method) - # mount the paths passed in - self.mount_paths(paths_to_mount, lean_config, run_options) + self._mount_common_directories(run_options, paths_to_mount, lean_config, data_dir, storage_dir, project_dir, output_dir) - # mount the project and library directories - self.mount_project_and_library_directories(project_dir, run_options) + # Update all hosts that need to point to the host's localhost to host.docker.internal so they resolve properly + # TODO: we should remove it or add to config json + for key in ["terminal-link-server-host"]: + if key not in lean_config: + continue - # Mount the data directory - run_options["volumes"][str(data_dir)] = { - "bind": "/Lean/Data", - "mode": "rw" - } + if lean_config[key] == "localhost" or lean_config[key] == "127.0.0.1": + lean_config[key] = "host.docker.internal" - # Mount the output directory - run_options["volumes"][str(output_dir)] = { - "bind": "/Results", - "mode": "rw" - } + # Set up modules + set_up_common_csharp_options_called = self._setup_installed_packages(run_options, image) - # Mount the local object store directory - run_options["volumes"][str(storage_dir)] = { - "bind": "/Storage", - "mode": "rw" - } + # Set up language-specific run options + self.setup_language_specific_run_options(run_options, project_dir, algorithm_file, + set_up_common_csharp_options_called, release, + image) - # Mount all local files referenced in the Lean config - cli_root_dir = self._lean_config_manager.get_cli_root_directory() - files_to_mount = [ - ("transaction-log", cli_root_dir), - ("terminal-link-symbol-map-file", cli_root_dir / DEFAULT_DATA_DIRECTORY_NAME / "symbol-properties") - ] - for key, base_path in files_to_mount: - if key not in lean_config or lean_config[key] == "": - continue + self._mount_lean_config_and_finalize(run_options, lean_config, output_dir) - lean_config_entry = Path(lean_config[key]) - local_path = lean_config_entry if lean_config_entry.is_absolute() else base_path / lean_config_entry - if not local_path.exists(): - local_path.parent.mkdir(parents=True, exist_ok=True) - local_path.touch() + return run_options - run_options["mounts"].append(Mount(target=f"/Files/{key}", - source=str(local_path), - type="bind", - read_only=False)) + def get_basic_docker_config_without_algo(self, + lean_config: Dict[str, Any], + debugging_method: Optional[DebuggingMethod], + detach: bool, + image: DockerImage, + target_path: str, + paths_to_mount: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + """Creates a basic Docker config to run the engine with. - lean_config[key] = f"/Files/{key}" + This method constructs the parts of the Docker config that is the same for both the engine and the optimizer. + + :param lean_config: the LEAN configuration to use + :param debugging_method: the debugging method if debugging needs to be enabled, None if not + :param detach: whether LEAN should run in a detached container + :param image: The docker image that will be used + :param target_path: The target path inside the Docker container where the C# project should be located. + :param paths_to_mount: additional paths to mount to the container + :return: the Docker configuration containing basic configuration to run Lean + """ + + docker_project_config = {"docker": {}} + # Force the use of the LocalDisk map/factor providers if no recent zip present and not using ApiDataProvider + data_dir = self._lean_config_manager.get_data_directory() + self._handle_data_providers(lean_config, data_dir) + + # Create the storage directory if it doesn't exist yet + storage_dir = self._lean_config_manager.get_cli_root_directory() / "storage" + self._ensure_directories_exist([storage_dir]) + + lean_config.update({ + "debug-mode": self._logger.debug_logging_enabled, + "data-folder": "/Lean/Data", + "results-destination-folder": "/Results", + "object-store-root": "/Storage" + }) + + run_options = self._initialize_run_options(detach, docker_project_config, debugging_method) + + self._mount_common_directories(run_options, paths_to_mount, lean_config, data_dir, storage_dir, None, None) # Update all hosts that need to point to the host's localhost to host.docker.internal so they resolve properly + # TODO: we should remove it or add to config json for key in ["terminal-link-server-host"]: if key not in lean_config: continue @@ -300,80 +293,174 @@ def get_basic_docker_config(self, if lean_config[key] == "localhost" or lean_config[key] == "127.0.0.1": lean_config[key] = "host.docker.internal" - set_up_common_csharp_options_called = False - # Set up modules + self._setup_installed_packages(run_options, image, target_path) + + self._mount_lean_config_and_finalize(run_options, lean_config, None) + + return run_options + + def _mount_lean_config_and_finalize(self, run_options: Dict[str, Any], lean_config: Dict[str, Any], output_dir: Optional[Path]): + """Mounts Lean config and finalizes.""" + from docker.types import Mount + from uuid import uuid4 + from json import dumps + + # Save the final Lean config to a temporary file so we can mount it into the container + config_path = self._temp_manager.create_temporary_directory() / "config.json" + with config_path.open("w+", encoding="utf-8") as file: + file.write(dumps(lean_config, indent=4)) + + # Mount the Lean config + run_options["mounts"].append(Mount(target=f"{LEAN_ROOT_PATH}/config.json", + source=str(config_path), + type="bind", + read_only=True)) + + # Assign the container a name and store it in the output directory's configuration + run_options["name"] = lean_config.get("container-name", f"lean_cli_{str(uuid4()).replace('-', '')}") + + # set the hostname + if "hostname" in lean_config: + run_options["hostname"] = lean_config["hostname"] + + if output_dir: + output_config = self._output_config_manager.get_output_config(output_dir) + output_config.set("container", run_options["name"]) + if "backtest-name" in lean_config: + output_config.set("backtest-name", lean_config["backtest-name"]) + if "environment" in lean_config and "environments" in lean_config: + environment = lean_config["environments"][lean_config["environment"]] + if "live-mode-brokerage" in environment: + output_config.set("brokerage", environment["live-mode-brokerage"].split(".")[-1]) + + def _setup_installed_packages(self, run_options: Dict[str, Any], image: DockerImage, target_path: str = "/Lean/Launcher/bin/Debug"): + """Sets up installed packages.""" installed_packages = self._module_manager.get_installed_packages() - if len(installed_packages) > 0: - self._logger.debug(f"LeanRunner.run_lean(): installed packages {len(installed_packages)}") - self.set_up_common_csharp_options(run_options) - set_up_common_csharp_options_called = True + if installed_packages: + self._logger.debug(f"LeanRunner._setup_installed_packages(): installed packages {len(installed_packages)}") + self.set_up_common_csharp_options(run_options, target_path) # Mount the modules directory - run_options["volumes"][MODULES_DIRECTORY] = { - "bind": "/Modules", - "mode": "ro" - } + run_options["volumes"][MODULES_DIRECTORY] = {"bind": "/Modules", "mode": "ro"} # Add the modules directory as a NuGet source root run_options["commands"].append("dotnet nuget add source /Modules") - # Create a C# project used to resolve the dependencies of the modules run_options["commands"].append("mkdir /ModulesProject") run_options["commands"].append("dotnet new sln -o /ModulesProject") - framework_ver = self._docker_manager.get_image_label(image, 'target_framework', - DEFAULT_LEAN_DOTNET_FRAMEWORK) + framework_ver = self._docker_manager.get_image_label(image, 'target_framework', DEFAULT_LEAN_DOTNET_FRAMEWORK) run_options["commands"].append(f"dotnet new classlib -o /ModulesProject -f {framework_ver} --no-restore") run_options["commands"].append("rm /ModulesProject/Class1.cs") # Add all modules to the project, automatically resolving all dependencies for package in installed_packages: - self._logger.debug(f"LeanRunner.run_lean(): Adding module {package} to the project") + self._logger.debug(f"LeanRunner._setup_installed_packages(): Adding module {package} to the project") run_options["commands"].append(f"rm -rf /root/.nuget/packages/{package.name.lower()}") - run_options["commands"].append( - f"dotnet add /ModulesProject package {package.name} --version {package.version}") + run_options["commands"].append(f"dotnet add /ModulesProject package {package.name} --version {package.version}") # Copy all module files to /Lean/Launcher/bin/Debug, but don't overwrite anything that already exists - run_options["commands"].append( - "python /copy_csharp_dependencies.py /Compile/obj/ModulesProject/project.assets.json") + run_options["commands"].append("python /copy_csharp_dependencies.py /Compile/obj/ModulesProject/project.assets.json") + + return bool(installed_packages) + + def _mount_common_directories(self, + run_options: Dict[str, Any], + paths_to_mount: Optional[Dict[str, str]], + lean_config: Dict[str, Any], + data_dir: Path, + storage_dir: Path, + project_dir: Optional[Path], + output_dir: Optional[Path]): + """ + Mounts common directories. + + 1. mount the paths passed in + 2. mount the project and library directories (param: `project_dir` is not None) + 3. mount the data directory + 4. mount the output directory (param: `output_dir` is not None) + 5. mount the local object store directory + 6. mount all local files referenced in the Lean config + """ + from docker.types import Mount - # Set up language-specific run options - self.setup_language_specific_run_options(run_options, project_dir, algorithm_file, - set_up_common_csharp_options_called, release, - image) + # 1 + self.mount_paths(paths_to_mount, lean_config, run_options) - # Save the final Lean config to a temporary file so we can mount it into the container - config_path = self._temp_manager.create_temporary_directory() / "config.json" - with config_path.open("w+", encoding="utf-8") as file: - file.write(dumps(lean_config, indent=4)) + # 2 + if project_dir: + self.mount_project_and_library_directories(project_dir, run_options) - # Mount the Lean config - run_options["mounts"].append(Mount(target=f"{LEAN_ROOT_PATH}/config.json", - source=str(config_path), - type="bind", - read_only=True)) + # 3 + run_options["volumes"][str(data_dir)] = {"bind": "/Lean/Data", "mode": "rw"} - # Assign the container a name and store it in the output directory's configuration - if "container-name" in lean_config: - run_options["name"] = lean_config["container-name"] - else: - run_options["name"] = f"lean_cli_{str(uuid4()).replace('-', '')}" + # 4 + if output_dir: + run_options["volumes"][str(output_dir)] = {"bind": "/Results", "mode": "rw"} + # 5 + run_options["volumes"][str(storage_dir)] = {"bind": "/Storage", "mode": "rw"} - # set the hostname - if "hostname" in lean_config: - run_options["hostname"] = lean_config["hostname"] + # 6 + cli_root_dir = self._lean_config_manager.get_cli_root_directory() + files_to_mount = [ + ("transaction-log", cli_root_dir), + ("terminal-link-symbol-map-file", cli_root_dir / DEFAULT_DATA_DIRECTORY_NAME / "symbol-properties") + ] + for key, base_path in files_to_mount: + if key not in lean_config or lean_config[key] == "": + continue - output_config = self._output_config_manager.get_output_config(output_dir) - output_config.set("container", run_options["name"]) - if "backtest-name" in lean_config: - output_config.set("backtest-name", lean_config["backtest-name"]) - if "environment" in lean_config and "environments" in lean_config: - environment = lean_config["environments"][lean_config["environment"]] - if "live-mode-brokerage" in environment: - output_config.set("brokerage", environment["live-mode-brokerage"].split(".")[-1]) + lean_config_entry = Path(lean_config[key]) + local_path = lean_config_entry if lean_config_entry.is_absolute() else base_path / lean_config_entry + if not local_path.exists(): + local_path.parent.mkdir(parents=True, exist_ok=True) + local_path.touch() - return run_options + run_options["mounts"].append(Mount(target=f"/Files/{key}", + source=str(local_path), + type="bind", + read_only=False)) + + lean_config[key] = f"/Files/{key}" + + + def _initialize_run_options(self, detach: bool, docker_project_config: Dict[str, Any], debugging_method: Optional[DebuggingMethod]): + """ + Initializes run options. + + The dict containing all options passed to `docker run` + See all available options at https://docker-py.readthedocs.io/en/stable/containers.html + """ + return { + "detach": detach, + "commands": [], + "environment": docker_project_config.get("environment", {}), + "stop_signal": "SIGINT" if debugging_method is None else "SIGKILL", + "mounts": [], + "volumes": {}, + "ports": docker_project_config.get("ports", {}) + } + + def _ensure_directories_exist(self, dirs: List[Path]): + """Ensures directories exist.""" + for dir_path in dirs: + if not dir_path.exists(): + dir_path.mkdir(parents=True) + + def _handle_data_providers(self, lean_config: Dict[str, Any], data_dir: Path): + """Handles data provider logic.""" + if lean_config.get("data-provider", None) != "QuantConnect.Lean.Engine.DataFeeds.ApiDataProvider": + self._force_disk_provider_if_necessary(lean_config, + "map-file-provider", + "QuantConnect.Data.Auxiliary.LocalZipMapFileProvider", + "QuantConnect.Data.Auxiliary.LocalDiskMapFileProvider", + data_dir / "equity" / "usa" / "map_files") + self._force_disk_provider_if_necessary(lean_config, + "factor-file-provider", + "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider", + "QuantConnect.Data.Auxiliary.LocalDiskFactorFileProvider", + data_dir / "equity" / "usa" / "factor_files") def set_up_python_options(self, project_dir: Path, run_options: Dict[str, Any], image: DockerImage) -> None: """Sets up Docker run options specific to Python projects. @@ -547,12 +634,22 @@ def set_up_csharp_options(self, project_dir: Path, run_options: Dict[str, Any], run_options["commands"].append( f'python /copy_csharp_dependencies.py "/Compile/obj/{project_file.stem}/project.assets.json"') - def set_up_common_csharp_options(self, run_options: Dict[str, Any]) -> None: - """Sets up common Docker run options that is needed for all C# work. + def set_up_common_csharp_options(self, run_options: Dict[str, Any], target_path: str = "/Lean/Launcher/bin/Debug") -> None: + """ + Sets up common Docker run options that is needed for all C# work. - This method is only called if the user has installed modules and/or if the project to run is written in C#. + This method prepares the Docker run options required to run C# projects inside a Docker container. It is called + when the user has installed specific modules or when the project to run is written in C#. - :param run_options: the dictionary to append run options to + Parameters: + - run_options (Dict[str, Any]): A dictionary to which the Docker run options will be appended. + - target_path (str, optional): The target path inside the Docker container where the C# project should be located. + Default value is "/Lean/Launcher/bin/Debug". + A Python script is typically used to copy the right C# dependencies to this path. + This script ensures that the correct DLLs are copied, even if they are OS-specific. + + Returns: + - None: This function does not return anything. It modifies the `run_options` dictionary in place. """ from docker.types import Mount # Mount a volume to NuGet's cache directory so we only download packages once @@ -623,7 +720,7 @@ def copy_file(library_id, partial_path, file_data): output_name = file_data.get("outputPath", full_path.name) - target_path = Path("/Lean/Launcher/bin/Debug") / output_name + target_path = Path(""" + f'"{target_path}"' + """) / output_name if not target_path.exists(): target_path.parent.mkdir(parents=True, exist_ok=True) shutil.copy(full_path, target_path) @@ -799,14 +896,15 @@ def mount_paths(self, paths_to_mount, lean_config, run_options): environment = {} if "environment" in lean_config and "environments" in lean_config: environment = lean_config["environments"][lean_config["environment"]] - + else: + environment = lean_config mounts = run_options["mounts"] for key, pathStr in paths_to_mount.items(): path = Path(pathStr).resolve() target = f"/Files/{Path(path).name}" - self._logger.info(f"Mounting {path} to {target}") + self._logger.debug(f"Mounting {path} to {target}") mounts.append(Mount(target=target, source=str(path), diff --git a/lean/models/api.py b/lean/models/api.py index 3033815c..4d552a6e 100644 --- a/lean/models/api.py +++ b/lean/models/api.py @@ -407,15 +407,54 @@ class QCMinimalOrganization(WrappedBaseModel): preferred: bool +class QCDataType(str, Enum): + Trade = "Trade" + Quote = "Quote" + OpenInterest = "OpenInterest" + + @classmethod + def get_all_members(cls): + """ + Retrieve all members (values) of the QCDataType enumeration. + + Returns: + list: A list containing all the values of the QCDataType enumeration. + + Example: + >>> all_data_types = QCDataType.get_all_members() + >>> print(all_data_types) + ['Trade', 'Quote', 'OpenInterest'] + """ + return list(cls.__members__.values()) + class QCSecurityType(str, Enum): Equity = "Equity" + Index = "Index" Forex = "Forex" CFD = "Cfd" Future = "Future" Crypto = "Crypto" + CryptoFuture = "CryptoFuture" Option = "Option" + IndexOption = "IndexOption" + Commodity = "Commodity" FutureOption = "FutureOption" + @classmethod + def get_all_members(cls): + """ + Retrieve all members (values) of the QCSecurityType enumeration. + + Returns: + list: A list containing all the values of the QCSecurityType enumeration. + + Example: + >>> all_security_types = QCSecurityType.get_all_members() + >>> print(all_security_types) + ['Equity', 'Index', 'Forex', 'Cfd', 'Future', 'Crypto', 'CryptoFuture', 'Option', 'IndexOption', 'Commodity', 'FutureOption'] + """ + return list(cls.__members__.values()) + class QCResolution(str, Enum): Tick = "Tick" @@ -436,6 +475,21 @@ def by_name(cls, name: str) -> 'QCResolution': return v raise ValueError(f"QCResolution has no member named '{name}'") + @classmethod + def get_all_members(cls): + """ + Retrieve all members (values) of the QCResolution enumeration. + + Returns: + list: A list containing all the values of the QCResolution enumeration. + + Example: + >>> all_resolutions = QCResolution.get_all_members() + >>> print(all_resolutions) + ['Tick', 'Second', 'Minute', 'Hour', 'Daily'] + """ + return list(cls.__members__.values()) + class QCLink(WrappedBaseModel): link: str diff --git a/lean/models/click_options.py b/lean/models/click_options.py index 5e56f444..15d95349 100644 --- a/lean/models/click_options.py +++ b/lean/models/click_options.py @@ -29,6 +29,8 @@ def get_configs_for_options(env: str) -> List[Configuration]: brokerage = cli_data_downloaders elif env == "research": brokerage = cli_data_downloaders + elif env == "download": + brokerage = cli_data_downloaders else: raise ValueError("Acceptable values for 'env' are: 'live-cloud', 'live-cli', 'backtest', 'research'") diff --git a/lean/models/data.py b/lean/models/data.py index 19d4dcec..4e9e72f8 100644 --- a/lean/models/data.py +++ b/lean/models/data.py @@ -230,6 +230,11 @@ def configure_interactive(self) -> OptionResult: date = prompt(f"{self.label} (yyyyMMdd)", type=DateParameter()) return OptionResult(value=date, label=date.strftime("%Y-%m-%d")) + def configure_interactive_with_default(self, default_date: str) -> OptionResult: + date = prompt(f"{self.label} (yyyyMMdd) or just press Enter to use the default date [{default_date}]", + show_default=False, default=default_date, type=DateParameter()) + return OptionResult(value=date, label=date.strftime("%Y-%m-%d")) + def configure_non_interactive(self, user_input: str) -> OptionResult: for date_format in ["%Y%m%d", "%Y-%m-%d"]: try: diff --git a/lean/models/json_module.py b/lean/models/json_module.py index dca7b556..14bde708 100644 --- a/lean/models/json_module.py +++ b/lean/models/json_module.py @@ -37,6 +37,7 @@ def __init__(self, json_module_data: Dict[str, Any], module_type: str, platform: self._product_id: int = json_module_data["product-id"] if "product-id" in json_module_data else 0 self._id: str = json_module_data["id"] self._display_name: str = json_module_data["display-id"] + self._specifications_url: str = json_module_data["specifications"] if "specifications" in json_module_data else None self._installs: bool = json_module_data["installs"] if ("installs" in json_module_data and platform == MODULE_CLI_PLATFORM) else False self._lean_configs: List[Configuration] = [] @@ -261,6 +262,10 @@ def _save_property(self, settings: Dict[str, Any]): from lean.container import container container.lean_config_manager.set_properties(settings) + @property + def specifications_url(self): + return self._specifications_url + class LiveInitialStateInput(str, Enum): Required = "required" diff --git a/tests/commands/data/test_download.py b/tests/commands/data/test_download.py index be34f851..a5c1e6b5 100644 --- a/tests/commands/data/test_download.py +++ b/tests/commands/data/test_download.py @@ -11,15 +11,21 @@ from lean.container import container from lean.models.api import QCDataset, QCOrganizationCredit, QCOrganizationData from tests.test_helpers import create_api_organization +from click.testing import CliRunner +from lean.commands import lean +from tests.test_helpers import create_fake_lean_cli_directory +from tests.conftest import initialize_container test_files = Path(os.path.join(os.path.dirname(os.path.realpath(__file__)), "testFiles")) + # Load in our test files into fake filesystem @pytest.fixture def setup(fs): fs.add_real_directory(test_files, read_only=False) yield fs + def test_bulk_extraction(setup): fake_tar = Path(os.path.join(test_files, "20220222_coinapi_crypto_ftx_price_aggregation.tar")) out = Path("/tmp/out") @@ -32,10 +38,170 @@ def test_bulk_extraction(setup): assert os.path.exists(file) +def _get_data_provider_config(is_crypto_configs: bool = False) -> Dict[str, Any]: + """ + Retrieve the configuration settings for a financial data provider. + + This method encapsulates the configuration settings typically found in a data provider config JSON file, + as referenced by a file named .json in an example from a GitHub repository. + + Returns: + Dict[str, Any]: Configuration settings including supported data types, resolutions, and asset classes. + """ + + if is_crypto_configs: + return { + "module-specification": { + "download": { + "data-types": ["Trade", "Quote"], + "resolutions": ["Minute", "Hour", "Daily"], + "security-types": ["Crypto", "CryptoFuture"], + "markets": ["Binance", "Kraken"] + } + } + } + + data_provider_config_file_json: Dict[str, Any] = { + "module-specification": { + "download": { + "data-types": ["Trade", "Quote"], + "resolutions": ["Second", "Minute", "Hour", "Daily"], + "security-types": ["Equity", "Option", "Index", "IndexOption"], + "markets": ["NYSE", "USA"] + } + } + } + + return data_provider_config_file_json + + +def _create_lean_data_download(data_provider_name: str, + data_type: str, + resolution: str, + security_type: str, + tickers: List[str], + start_date: str, + end_date: str, + data_provider_config_file_json: Dict[str, Any], + market: str = None, + extra_run_command: List[str] = None): + """ + Create a data download command for the Lean algorithmic trading engine. + + This method constructs and invokes a Lean CLI command to download historical data from a specified data provider. + It utilizes a mock data provider configuration JSON and may include extra run commands if provided. + + Args: + data_provider_name (str): Name of the data provider. + data_type (str): Type of data to download (e.g., Trade, Quote). + resolution (str): Time resolution of the data (e.g., Second, Minute). + security_type (str): Type of security (e.g., Equity, Equity Options). + tickers (List[str]): List of tickers to download data for. + start_date (str): Start date of the data download in YYYY-MM-DD format. + end_date (str): End date of the data download in YYYY-MM-DD format. + data_provider_config_file_json (Dict[str, Any]): Mock data provider configuration JSON. + extra_run_command (List[str], optional): Extra run commands to be included in the Lean CLI command. + + Returns: + CompletedProcess: Result of the Lean CLI command execution. + """ + # add additional property in module config file + for data_provider in cli_data_downloaders: + data_provider.__setattr__("_specifications_url", "") + + create_fake_lean_cli_directory() + container = initialize_container() + + with mock.patch.object(container.lean_runner, "get_basic_docker_config_without_algo", + return_value={"commands": [], "mounts": []}): + with mock.patch.object(container.api_client.data, "download_public_file_json", + return_value=data_provider_config_file_json): + with mock.patch.object(container.api_client.organizations, "get", return_value=create_api_organization()): + run_parameters = [ + "data", "download", + "--data-provider-historical", data_provider_name, + "--data-type", data_type, + "--resolution", resolution, + "--security-type", security_type, + "--tickers", ','.join(tickers), + "--start-date", start_date, + "--end-date", end_date, + ] + if market: + run_parameters.extend(["--market", market]) + if extra_run_command: + run_parameters += extra_run_command + + return CliRunner().invoke(lean, run_parameters) + + +@pytest.mark.parametrize("data_provider,market,is_crypto,security_type,tickers,data_provider_parameters", + [("Polygon", "NYSE", False, "Equity", ["AAPL"], ["--polygon-api-key", "123"]), + ("Binance", "Binance", True, "CryptoFuture", ["BTCUSDT"], + ["--binance-exchange-name", "BinanceUS", "--binanceus-api-key", "123", + "--binanceus-api-secret", "123"]), + ("CoinApi", "Kraken", True, "Crypto", ["BTCUSDC", "ETHUSD"], + ["--coinapi-api-key", "123", "--coinapi-product", "Free"]), + ("Interactive Brokers", "USA", False, "Index", ["INTL", "NVDA"], + ["--ib-user-name", "123", "--ib-account", "Individual", "--ib-password", "123"])]) +def test_download_data_non_interactive(data_provider: str, market: str, is_crypto: bool, security_type: str, + tickers: List[str], data_provider_parameters: List[str]): + run_data_download = _create_lean_data_download( + data_provider, "Trade", "Minute", security_type, tickers, "20240101", "20240202", + _get_data_provider_config(is_crypto), market, data_provider_parameters) + assert run_data_download.exit_code == 0 + + +@pytest.mark.parametrize("data_type,resolution", + [("Trade", "Hour"), ("trade", "hour"), ("TRADE", "HOUR"), ("TrAdE", "HoUr")]) +def test_download_data_non_interactive_insensitive_input_param(data_type: str, resolution: str): + run_data_download = _create_lean_data_download( + "Polygon", data_type, resolution, "Equity", ["AAPL"], "20240101", "20240202", + _get_data_provider_config(False), "NYSE", ["--polygon-api-key", "123"]) + assert run_data_download.exit_code == 0 + + +@pytest.mark.parametrize("data_provider,wrong_security_type", + [("Polygon", "Future"), ("Polygon", "Crypto"), ("Polygon", "Forex")]) +def test_download_data_non_interactive_wrong_security_type(data_provider: str, wrong_security_type: str): + run_data_download = _create_lean_data_download(data_provider, "Trade", "Hour", wrong_security_type, ["AAPL"], + "20240101", "20240202", _get_data_provider_config(), + extra_run_command=["--polygon-api-key", "123"]) + assert run_data_download.exit_code == 1 + + error_msg = str(run_data_download.exc_info[1]) + assert data_provider in error_msg + assert wrong_security_type in error_msg + + +@pytest.mark.parametrize("data_provider,start_date,end_date", + [("Polygon", "20240101", "20230202"), ("Polygon", "2024-01-01", "2023-02-02")]) +def test_download_data_non_interactive_wrong_start_end_date(data_provider: str, start_date: str, end_date: str): + run_data_download = _create_lean_data_download(data_provider, "Trade", "Hour", "Equity", ["AAPL"], start_date, + end_date, _get_data_provider_config(), "USA", + extra_run_command=["--polygon-api-key", "123"]) + assert run_data_download.exit_code == 1 + + error_msg = str(run_data_download.exc_info[1]) + assert f"Historical start date cannot be greater than or equal to historical end date." in error_msg + + +@pytest.mark.parametrize("wrong_data_type", ["OpenInterest"]) +def test_download_data_non_interactive_wrong_data_type(wrong_data_type: str): + run_data_download = _create_lean_data_download("Polygon", wrong_data_type, "Hour", "Equity", ["AAPL"], "20240101", + "20240202", _get_data_provider_config(), + extra_run_command=["--polygon-api-key", "123"]) + assert run_data_download.exit_code == 1 + + error_msg = str(run_data_download.exc_info[1]) + assert wrong_data_type in error_msg + + def test_non_interactive_bulk_select(): - # TODO + # TODO pass + def test_interactive_bulk_select(): pytest.skip("This test is interactive") @@ -51,21 +217,23 @@ def test_interactive_bulk_select(): products = _select_products_interactive(organization, testSets) # No assertion, since interactive has multiple results + def test_dataset_requirements(): - organization = create_api_organization() - datasource = json.loads(bulk_datasource) - testSet = Dataset(name="testSet", + organization = create_api_organization() + datasource = json.loads(bulk_datasource) + testSet = Dataset(name="testSet", vendor="testVendor", categories=["testData"], options=datasource["options"], paths=datasource["paths"], requirements=datasource.get("requirements", {})) - - for id, name in testSet.requirements.items(): - assert not organization.has_security_master_subscription(id) - assert id==39 -bulk_datasource=""" + for id, name in testSet.requirements.items(): + assert not organization.has_security_master_subscription(id) + assert id == 39 + + +bulk_datasource = """ { "requirements": { "39": "quantconnect-us-equity-security-master" @@ -251,15 +419,16 @@ def test_dataset_requirements(): } """ + def test_validate_datafile() -> None: + try: + value = "/^equity\\/usa\\/(factor_files|map_files)\\/[^\\/]+.zip$/m" + target = re.compile(value[value.index("/") + 1:value.rindex("/")]) + vendor = QCDataVendor(vendorName="Algoseek", regex=target) + DataFile(file='equity/usa/daily/aal.zip', vendor=vendor) + except Exception as err: + pytest.fail(f"{err}") - try: - value = "/^equity\\/usa\\/(factor_files|map_files)\\/[^\\/]+.zip$/m" - target = re.compile(value[value.index("/") + 1:value.rindex("/")]) - vendor = QCDataVendor(vendorName="Algoseek", regex=target) - DataFile(file='equity/usa/daily/aal.zip', vendor=vendor) - except Exception as err: - pytest.fail(f"{err}") def test_filter_pending_datasets() -> None: from lean.commands.data.download import _get_available_datasets, _get_data_information