Skip to content

Commit

Permalink
Merge pull request #70 from QuantConnect/feature-bulk-downloader
Browse files Browse the repository at this point in the history
Feature Bulk Downloading
  • Loading branch information
Martin-Molinero authored Mar 1, 2022
2 parents a9291fe + 27b04ee commit 539c2f1
Show file tree
Hide file tree
Showing 6 changed files with 312 additions and 23 deletions.
14 changes: 13 additions & 1 deletion lean/components/cloud/data_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# limitations under the License.

import multiprocessing
import tarfile
from pathlib import Path
from datetime import *
from typing import Any, List, Callable
Expand Down Expand Up @@ -99,12 +100,17 @@ def download_files(self, data_files: List[Any], overwrite: bool, organization_id
self._lean_config_manager.set_properties({
"factor-file-provider": "QuantConnect.Data.Auxiliary.LocalZipFactorFileProvider"
})

progress.stop()
except KeyboardInterrupt as e:
progress.stop()
raise e

def _process_bulk(self, file: Path, destination: Path):
tar = tarfile.open(file)
tar.extractall(destination)
tar.close()

def _download_file(self,
relative_file: str,
overwrite: bool,
Expand Down Expand Up @@ -132,6 +138,7 @@ def _download_file(self,
callback()
return


try:
file_content = self._api_client.data.download_file(relative_file, organization_id)
except RequestFailedError as error:
Expand All @@ -140,4 +147,9 @@ def _download_file(self,
return

_store_local_file(file_content, local_path)

# Special case: bulk files need unpacked
if "setup/" in relative_file and relative_file.endswith(".tar"):
self._process_bulk(local_path, data_directory)

callback()
61 changes: 54 additions & 7 deletions lean/models/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,31 @@ class DatasetOneOfCondition(DatasetCondition):
def check(self, option_results: Dict[str, OptionResult]) -> bool:
if self.option not in option_results:
return True
# TODO: bug? ^ returns true even if option hasn't been resolved

return option_results[self.option].value in self.values

class DatasetOrCondition(DatasetCondition):
options: List[DatasetCondition]

def check(self, option_results: Dict[str, OptionResult]) -> bool:
# Check each option, if any return true then its true
for option in self.options:
if option.check(option_results):
return True

return False

class DatasetAndCondition(DatasetCondition):
options: List[DatasetCondition]

def check(self, option_results: Dict[str, OptionResult]) -> bool:
# Check each option, if any return false, then its false
for option in self.options:
if not option.check(option_results):
return False

return True

class DatasetOption(WrappedBaseModel, abc.ABC):
id: str
Expand All @@ -71,10 +93,22 @@ def parse_condition(cls, value: Optional[Any]) -> Any:
return value

condition_types = {
"oneOf": DatasetOneOfCondition
"oneof": DatasetOneOfCondition,
"and": DatasetAndCondition,
"or": DatasetOrCondition
}

return condition_types[value["type"]](**value)
conditionType = value["type"].lower()

# Special AND/OR (Composition of conditions)
if conditionType == "and" or conditionType == "or" :
for i in range(0, len(value["options"])):
option = value["options"][i]

# Recurse as needed to flush out conditional tree
value["options"][i] = cls.parse_condition(option)

return condition_types[conditionType](**value)

def configure_interactive(self) -> OptionResult:
"""Prompt the user for input to configure this option.
Expand Down Expand Up @@ -191,7 +225,6 @@ def get_placeholder(self) -> str:
else:
return f"value (example: {min(keys, key=len)})"


class DatasetDateOption(DatasetOption):
start_end: bool = False

Expand Down Expand Up @@ -228,10 +261,22 @@ def parse_condition(cls, value: Optional[Any]) -> Any:
return value

condition_types = {
"oneOf": DatasetOneOfCondition
"oneof": DatasetOneOfCondition,
"and": DatasetAndCondition,
"or": DatasetOrCondition
}

return condition_types[value["type"]](**value)
conditionType = value["type"].lower()

# Special AND/OR (Composition of conditions)
if conditionType == "and" or conditionType == "or" :
for i in range(0, len(value["options"])):
option = value["options"][i]

# Recurse as need to flush out conditional tree
value["options"][i] = cls.parse_condition(option)

return condition_types[conditionType](**value)


class Dataset(WrappedBaseModel):
Expand All @@ -254,9 +299,11 @@ def parse_options(cls, values: List[Any]) -> List[Any]:
for option in values:
if isinstance(option, DatasetOption):
options.append(option)

# TODO: This is a hack around, does not respect option conditions for start-end
elif option["type"] == "start-end":
description_suffix = ""
required_resolutions = ["tick", "second", "minute"]
required_resolutions = ["tick", "second", "minute", "minute/second/tick"]

resolution = next((o for o in options if o.id == "resolution"), None)
if resolution is not None and isinstance(resolution, DatasetSelectOption):
Expand Down Expand Up @@ -333,7 +380,7 @@ def get_data_files(self) -> List[str]:

multiple_option = next((o for o in self.dataset.options if isinstance(o, DatasetTextOption) and o.multiple),
None)
if multiple_option is not None:
if multiple_option is not None and multiple_option.id in self.option_results:
result = self.option_results[multiple_option.id]

for index in range(len(result.value)):
Expand Down
16 changes: 2 additions & 14 deletions tests/commands/cloud/test_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@
from lean.commands import lean
from lean.components.config.optimizer_config_manager import NodeType, OptimizerConfigManager
from lean.container import container
from lean.models.api import QCOptimization, QCOptimizationBacktest, QCOptimizationEstimate, QCFullOrganization, \
QCOrganizationData, QCOrganizationCredit
from lean.models.api import QCOptimization, QCOptimizationBacktest, QCOptimizationEstimate
from lean.models.optimizer import (OptimizationConstraint, OptimizationExtremum, OptimizationParameter,
OptimizationTarget)
from tests.test_helpers import create_api_project, create_fake_lean_cli_directory
from tests.test_helpers import create_api_project, create_fake_lean_cli_directory, create_api_organization


def create_api_optimization() -> QCOptimization:
Expand All @@ -36,17 +35,6 @@ def create_api_optimization() -> QCOptimization:
runtimeStatistics={})


def create_api_organization() -> QCFullOrganization:
return QCFullOrganization(id="1",
name="a",
seats=1,
type="type",
credit=QCOrganizationCredit(movements=[], balance=1000000),
products=[],
data=QCOrganizationData(signedTime=None, current=False),
members=[])


def create_api_optimization_backtest(id: int,
success: bool,
meets_constraints: bool,
Expand Down
Binary file not shown.
Loading

0 comments on commit 539c2f1

Please sign in to comment.