Skip to content

Commit cfc9da0

Browse files
committed
Beta support for configurable dependency resolution & Biocontainers.
Consider the included tool ``seqtk_seq.cwl``. It includes the following SoftwareRequirement hint: ``` hints: SoftwareRequirement: packages: - package: seqtk version: - r93 ``` I'm not happy that ``version`` is a list - but I can live with it for now I guess. If cwltool is executed with the hidden ``--beta-conda-dependencies`` flag, this requirement will be processed by galaxy-lib, Conda will be installed, and seqtk will be installed, and a Conda environment including seqtk will be setup for the job. ``` virtualenv .venv . .venv/bin/activate python setup.py install pip install galaxy-lib cwltool --beta-conda-dependencies tests/seqtk_seq.cwl tests/seqtk_seq_job.json ``` Additional flags are available to configure dependency resolution in a more fine grained way - using Conda however has a number of advantages that make it particularily well suited to CWL. Conda packages are distributed as binaries that work across Mac and Linux and work on relatively old version of Linux (great for HPC). Conda also doesn't require root and supports installation of multiple different versions of a package - again these factors make it great for HPC and non-Docker targets. The Biocontainers project (previously Biodocker) dovetails nicely with this. Every version of every Bioconda package has a corresponding best-practice (very lightweight, very small) Docker container on quay.io (assembled by @bgruening and colleagues). There are over 1800 such containers currently. Continuing with the example above, the new ``--beta-use-biocontainers`` flag instructs cwltool to fetch the corresponding Biocontainers container from quay.io automatically or build one to use locally (required for instance for tools with multiple software requirements - fat tools). ``` cwltool --beta-use-biocontainers tests/seqtk_seq.cwl tests/seqtk_seq_job.json ``` These containers contain the same binaries that the package would use locally (outside of Docker). Therefore this technique allows cross platform reproducibility/remixability across CWL, Galaxy, and CLI - both inside and outside of Docker. My sincerest hope is that we move away from CWL-specific Dockerfiles. For less effort, a community bioconda package can be made and the result can be used in many more contexts. The Docker image will then be maintained by the community Biocontainer project.
1 parent 5c8ee81 commit cfc9da0

28 files changed

+954
-4
lines changed

Diff for: cwltool/builder.py

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def __init__(self): # type: () -> None
3636
self.stagedir = None # type: Text
3737
self.make_fs_access = None # type: Type[StdFsAccess]
3838
self.build_job_script = None # type: Callable[[List[str]], Text]
39+
self.find_default_container = None # type: Callable[[], Text]
3940

4041
def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]):
4142
# type: (Dict[Text, Any], Any, Union[int, List[int]], List[int]) -> List[Dict[Text, Any]]

Diff for: cwltool/job.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434

3535
PYTHON_RUN_SCRIPT = """
3636
import json
37+
import os
3738
import sys
3839
import subprocess
3940
@@ -42,6 +43,7 @@
4243
commands = popen_description["commands"]
4344
cwd = popen_description["cwd"]
4445
env = popen_description["env"]
46+
env["PATH"] = os.environ.get("PATH")
4547
stdin_path = popen_description["stdin_path"]
4648
stdout_path = popen_description["stdout_path"]
4749
stderr_path = popen_description["stderr_path"]
@@ -140,9 +142,12 @@ def run(self, dry_run=False, pull_image=True, rm_container=True,
140142
if docker_req and kwargs.get("use_container") is not False:
141143
env = os.environ
142144
img_id = docker.get_from_requirements(docker_req, docker_is_req, pull_image)
143-
elif kwargs.get("default_container", None) is not None:
144-
env = os.environ
145-
img_id = kwargs.get("default_container")
145+
if img_id is None:
146+
find_default_container = self.builder.find_default_container
147+
default_container = find_default_container and find_default_container()
148+
if default_container:
149+
img_id = default_container
150+
env = os.environ
146151

147152
if docker_is_req and img_id is None:
148153
raise WorkflowException("Docker is required for running this tool.")

Diff for: cwltool/main.py

+125-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import hashlib
1313
import pkg_resources # part of setuptools
1414
import functools
15+
import string
1516

1617
import rdflib
1718
from typing import (Union, Any, AnyStr, cast, Callable, Dict, Sequence, Text,
@@ -31,6 +32,13 @@
3132
from .builder import adjustFileObjs, adjustDirObjs
3233
from .stdfsaccess import StdFsAccess
3334
from .pack import pack
35+
from .utils import get_feature
36+
try:
37+
from galaxy.tools.deps.requirements import ToolRequirement
38+
from galaxy.tools import deps
39+
except ImportError:
40+
ToolRequirement = None # type: ignore
41+
deps = None
3442

3543
_logger = logging.getLogger("cwltool")
3644

@@ -144,6 +152,15 @@ def arg_parser(): # type: () -> argparse.ArgumentParser
144152
exgroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
145153
exgroup.add_argument("--debug", action="store_true", help="Print even more logging")
146154

155+
# help="Dependency resolver configuration file describing how to adapt 'SoftwareRequirement' packages to current system."
156+
parser.add_argument("--beta-dependency-resolvers-configuration", default=None, help=argparse.SUPPRESS)
157+
# help="Defaut root directory used by dependency resolvers configuration."
158+
parser.add_argument("--beta-dependencies-directory", default=None, help=argparse.SUPPRESS)
159+
# help="Use biocontainers for tools without an explicitly annotated Docker container."
160+
parser.add_argument("--beta-use-biocontainers", default=None, help=argparse.SUPPRESS, action="store_true")
161+
# help="Short cut to use Conda to resolve 'SoftwareRequirement' packages."
162+
parser.add_argument("--beta-conda-dependencies", default=None, help=argparse.SUPPRESS, action="store_true")
163+
147164
parser.add_argument("--tool-help", action="store_true", help="Print command line help for tool")
148165

149166
parser.add_argument("--relative-deps", choices=['primary', 'cwd'],
@@ -634,8 +651,20 @@ def main(argsl=None,
634651
stdout.write(json.dumps(processobj, indent=4))
635652
return 0
636653

654+
conf_file = getattr(args, "beta_dependency_resolvers_configuration", None) # Text
655+
use_conda_dependencies = getattr(args, "beta_conda_dependencies", None) # Text
656+
657+
make_tool_kwds = vars(args)
658+
659+
build_job_script = None # type: Callable[[Any, List[str]], Text]
660+
if conf_file or use_conda_dependencies:
661+
dependencies_configuration = DependenciesConfigruation(args) # type: DependenciesConfigruation
662+
make_tool_kwds["build_job_script"] = dependencies_configuration.build_job_script
663+
664+
make_tool_kwds["find_default_container"] = functools.partial(find_default_container, args)
665+
637666
tool = make_tool(document_loader, avsc_names, metadata, uri,
638-
makeTool, vars(args))
667+
makeTool, make_tool_kwds)
639668

640669
if args.print_rdf:
641670
printrdf(tool, document_loader.ctx, args.rdf_serializer, stdout)
@@ -748,5 +777,100 @@ def locToPath(p):
748777
_logger.removeHandler(stderr_handler)
749778
_logger.addHandler(defaultStreamHandler)
750779

780+
781+
COMMAND_WITH_DEPENDENCIES_TEMPLATE = string.Template("""#!/bin/bash
782+
$handle_dependencies
783+
python "run_job.py" "job.json"
784+
""")
785+
786+
787+
def find_default_container(args, builder):
788+
if args.default_container:
789+
return args.default_container
790+
elif args.beta_use_biocontainers:
791+
try:
792+
from galaxy.tools.deps.containers import ContainerRegistry, AppInfo, ToolInfo, DOCKER_CONTAINER_TYPE
793+
except ImportError:
794+
raise Exception("galaxy-lib not found")
795+
796+
app_info = AppInfo(
797+
involucro_auto_init=True,
798+
enable_beta_mulled_containers=True,
799+
) # type: AppInfo
800+
container_registry = ContainerRegistry(app_info) # type: ContainerRegistry
801+
requirements = _get_dependencies(builder)
802+
tool_info = ToolInfo(requirements=requirements) # type: ToolInfo
803+
container_description = container_registry.find_best_container_description([DOCKER_CONTAINER_TYPE], tool_info)
804+
print container_description
805+
if container_description:
806+
return container_description.identifier
807+
808+
return None
809+
810+
811+
class DependenciesConfigruation(object):
812+
813+
def __init__(self, args):
814+
# type: (argparse.Namespace) -> None
815+
conf_file = getattr(args, "beta_dependency_resolvers_configuration", None)
816+
tool_dependency_dir = getattr(args, "beta_dependencies_directory", None)
817+
conda_dependencies = getattr(args, "beta_conda_dependencies", None)
818+
if conf_file is not None and os.path.exists(conf_file):
819+
self.use_tool_dependencies = True
820+
if not tool_dependency_dir:
821+
tool_dependency_dir = os.path.abspath(os.path.dirname(conf_file))
822+
self.tool_dependency_dir = tool_dependency_dir
823+
self.dependency_resolvers_config_file = conf_file
824+
elif conda_dependencies:
825+
if not tool_dependency_dir:
826+
tool_dependency_dir = os.path.abspath("./cwltool_deps")
827+
self.tool_dependency_dir = tool_dependency_dir
828+
self.use_tool_dependencies = True
829+
self.dependency_resolvers_config_file = None
830+
else:
831+
self.use_tool_dependencies = False
832+
833+
@property
834+
def config_dict(self):
835+
return {
836+
'conda_auto_install': True,
837+
'conda_auto_init': True,
838+
}
839+
840+
def build_job_script(self, builder, command):
841+
# type: (Any, List[str]) -> Text
842+
if deps is None:
843+
raise Exception("galaxy-lib not found")
844+
tool_dependency_manager = deps.build_dependency_manager(self) # type: deps.DependencyManager
845+
dependencies = _get_dependencies(builder)
846+
handle_dependencies = "" # str
847+
if dependencies:
848+
handle_dependencies = "\n".join(tool_dependency_manager.dependency_shell_commands(dependencies, job_directory=builder.tmpdir))
849+
850+
template_kwds = dict(handle_dependencies=handle_dependencies) # type: Dict[str, str]
851+
job_script = COMMAND_WITH_DEPENDENCIES_TEMPLATE.substitute(template_kwds)
852+
return job_script
853+
854+
855+
def _get_dependencies(builder):
856+
# type: (Any) -> List[ToolRequirement]
857+
(software_requirement, _) = get_feature(builder, "SoftwareRequirement")
858+
dependencies = [] # type: List[ToolRequirement]
859+
if software_requirement and software_requirement.get("packages"):
860+
packages = software_requirement.get("packages")
861+
for package in packages:
862+
version = package.get("version", None)
863+
if isinstance(version, list):
864+
if version:
865+
version = version[0]
866+
else:
867+
version = None
868+
dependencies.append(ToolRequirement.from_dict(dict(
869+
name=package["package"].split("#")[-1],
870+
version=version,
871+
type="package",
872+
)))
873+
return dependencies
874+
751875
if __name__ == "__main__":
752876
sys.exit(main(sys.argv[1:]))

Diff for: cwltool/process.py

+12
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,18 @@ def _init_job(self, joborder, **kwargs):
502502

503503
builder.resources = self.evalResources(builder, kwargs)
504504

505+
build_job_script = kwargs.get("build_job_script", None) # type: Callable[[Builder, List[str]], Text]
506+
curried_build_job_script = None # type: Callable[[List[str]], Text]
507+
if build_job_script:
508+
curried_build_job_script = lambda commands: build_job_script(builder, commands)
509+
builder.build_job_script = curried_build_job_script
510+
511+
find_default_container = kwargs.get("find_default_container", None) # type: Callable[[Builder], Text]
512+
curried_find_default_container = None # type: Callable[[], Text]
513+
if find_default_container:
514+
curried_find_default_container = lambda: find_default_container(builder)
515+
builder.find_default_container = curried_find_default_container
516+
505517
return builder
506518

507519
def evalResources(self, builder, kwargs):

Diff for: tests/2.fasta

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
>Sequence 561 BP; 135 A; 106 C; 98 G; 222 T; 0 other;
2+
gttcgatgcc taaaatacct tcttttgtcc ctacacagac cacagttttc ctaatggctt
3+
tacaccgact agaaattctt gtgcaagcac taattgaaag cggttggcct agagtgttac
4+
cggtttgtat agctgagcgc gtctcttgcc ctgatcaaag gttcattttc tctactttgg
5+
aagacgttgt ggaagaatac aacaagtacg agtctctccc ccctggtttg ctgattactg
6+
gatacagttg taataccctt cgcaacaccg cgtaactatc tatatgaatt attttccctt
7+
tattatatgt agtaggttcg tctttaatct tcctttagca agtcttttac tgttttcgac
8+
ctcaatgttc atgttcttag gttgttttgg ataatatgcg gtcagtttaa tcttcgttgt
9+
ttcttcttaa aatatttatt catggtttaa tttttggttt gtacttgttc aggggccagt
10+
tcattattta ctctgtttgt atacagcagt tcttttattt ttagtatgat tttaatttaa
11+
aacaattcta atggtcaaaa a

Diff for: tests/2.fastq

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
@EAS54_6_R1_2_1_413_324
2+
CCCTTCTTGTCTTCAGCGTTTCTCC
3+
+
4+
;;3;;;;;;;;;;;;7;;;;;;;88
5+
@EAS54_6_R1_2_1_540_792
6+
TTGGCAGGCCAAGGCCGATGGATCA
7+
+
8+
;;;;;;;;;;;7;;;;;-;;;3;83
9+
@EAS54_6_R1_2_1_443_348
10+
GTTGCTTCTGGCGTGGGTGGGGGGG
11+
+EAS54_6_R1_2_1_443_348
12+
;;;;;;;;;;;9;7;;.7;393333

Diff for: tests/seqtk_seq.cwl

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
cwlVersion: v1.0
2+
class: CommandLineTool
3+
id: "seqtk_seq"
4+
doc: "Convert to FASTA (seqtk)"
5+
inputs:
6+
- id: input1
7+
type: File
8+
inputBinding:
9+
position: 1
10+
prefix: "-a"
11+
outputs:
12+
- id: output1
13+
type: File
14+
outputBinding:
15+
glob: out
16+
baseCommand: ["seqtk", "seq"]
17+
arguments: []
18+
stdout: out
19+
hints:
20+
SoftwareRequirement:
21+
packages:
22+
- package: seqtk
23+
version:
24+
- r93

Diff for: tests/seqtk_seq_job.json

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"input1": {
3+
"class": "File",
4+
"location": "2.fastq"
5+
}
6+
}

Diff for: typeshed/2.7/galaxy/__init__.pyi

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Stubs for galaxy (Python 3.5)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any
6+
7+
PROJECT_NAME = ... # type: str
8+
PROJECT_OWNER = ... # type: str
9+
PROJECT_USERAME = ... # type: str
10+
PROJECT_URL = ... # type: str
11+
PROJECT_AUTHOR = ... # type: str
12+
PROJECT_EMAIL = ... # type: str
13+
RAW_CONTENT_URL = ... # type: Any

Diff for: typeshed/2.7/galaxy/tools/__init__.pyi

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Stubs for galaxy.tools (Python 3.5)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+

Diff for: typeshed/2.7/galaxy/tools/deps/__init__.pyi

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Stubs for galaxy.tools.deps (Python 3.5)
2+
#
3+
# NOTE: This dynamically typed stub was automatically generated by stubgen.
4+
5+
from typing import Any, Optional
6+
from .resolvers import NullDependency as NullDependency
7+
from .resolvers.conda import CondaDependencyResolver as CondaDependencyResolver
8+
from .resolvers.galaxy_packages import GalaxyPackageDependencyResolver as GalaxyPackageDependencyResolver
9+
from .resolvers.tool_shed_packages import ToolShedPackageDependencyResolver as ToolShedPackageDependencyResolver
10+
11+
log = ... # type: Any
12+
EXTRA_CONFIG_KWDS = ... # type: Any
13+
CONFIG_VAL_NOT_FOUND = ... # type: Any
14+
15+
def build_dependency_manager(config: Any): ... # type: DependencyManager
16+
17+
class NullDependencyManager:
18+
dependency_resolvers = ... # type: Any
19+
def uses_tool_shed_dependencies(self): ...
20+
def dependency_shell_commands(self, requirements: Any, **kwds) -> List[str]: ...
21+
def find_dep(self, name, version: Optional[Any] = ..., type: str = ..., **kwds): ...
22+
23+
class DependencyManager:
24+
extra_config = ... # type: Any
25+
default_base_path = ... # type: Any
26+
resolver_classes = ... # type: Any
27+
dependency_resolvers = ... # type: Any
28+
def __init__(self, default_base_path, conf_file: Optional[Any] = ..., **extra_config) -> None: ...
29+
def dependency_shell_commands(self, requirements: Any, **kwds) -> List[str]: ...
30+
def requirements_to_dependencies(self, requirements, **kwds): ...
31+
def uses_tool_shed_dependencies(self): ...
32+
def find_dep(self, name, version: Optional[Any] = ..., type: str = ..., **kwds): ...

0 commit comments

Comments
 (0)