Skip to content

Commit 8f753d4

Browse files
authored
Merge pull request #182 from cgat-developers/AC-kubernetes
Ac kubernetes
2 parents adf1793 + 1a990a8 commit 8f753d4

File tree

11 files changed

+872
-102
lines changed

11 files changed

+872
-102
lines changed

.codecov.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
codecov:
2+
notify:
3+
require_ci_to_pass: yes
4+
5+
coverage:
6+
precision: 2
7+
round: down
8+
paths:
9+
- ./cgatcore/**
10+
ignore:
11+
- tests/**

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# CGAT-core
2+
3+
[![codecov](https://codecov.io/gh/cgat-developers/cgat-core/branch/main/graph/badge.svg)](https://codecov.io/gh/cgat-developers/cgat-core)
14

25
![CGAT-core](https://github.com/cgat-developers/cgat-core/blob/master/docs/img/CGAT_logo.png)
36
----------------------------------------
@@ -24,4 +27,3 @@ Installation
2427
The following sections describe how to install the [cgatcore](https://cgat-developers.github.io/cgat-core/) framework.
2528

2629
The preferred method to install the cgatcore is using conda, by following the instructions on [read the docs](https://cgat-core.readthedocs.io/en/latest/getting_started/Installation.html). However, there are a few other methods to install cgatcore, including pip and our own bash script installer.
27-

cgatcore/pipeline/base_executor.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# cgatcore/pipeline/base_executor.py
2+
import os
3+
import tempfile
4+
5+
6+
class BaseExecutor:
7+
"""Base class for executors that defines the interface for running jobs."""
8+
9+
def __init__(self, **kwargs):
10+
"""Initialize the executor with configuration options."""
11+
self.config = kwargs
12+
self.task_name = "base_task" # Should be overridden by subclasses
13+
self.default_total_time = 0 # Should be overridden by subclasses
14+
15+
def run(self, statement, *args, **kwargs):
16+
"""Run the given job statement. This should be implemented by subclasses."""
17+
raise NotImplementedError("Subclasses must implement this method")
18+
19+
def collect_metric_data(self, *args, **kwargs):
20+
"""Collect metric data if needed."""
21+
raise NotImplementedError("Subclasses must implement this method")
22+
23+
def collect_benchmark_data(self, statements, resource_usage=None):
24+
"""Collect benchmark data for job execution.
25+
26+
Args:
27+
statements (list): List of executed statements
28+
resource_usage (list, optional): Resource usage data
29+
30+
Returns:
31+
dict: Benchmark data including task name and execution time
32+
"""
33+
return {
34+
"task": self.task_name,
35+
"total_t": self.default_total_time,
36+
"statements": statements,
37+
"resource_usage": resource_usage or []
38+
}
39+
40+
def build_job_script(self, statement):
41+
"""Build a simple job script for execution.
42+
Args:
43+
statement (str): The command or script to be executed.
44+
Returns:
45+
tuple: A tuple containing the full command (as a string) and the path where the job script is stored.
46+
"""
47+
48+
job_script_dir = self.config.get("job_script_dir", tempfile.gettempdir())
49+
os.makedirs(job_script_dir, exist_ok=True)
50+
51+
script_path = os.path.join(job_script_dir, "job_script.sh")
52+
with open(script_path, "w") as script_file:
53+
script_file.write(f"#!/bin/bash\n\n{statement}\n")
54+
55+
os.chmod(script_path, 0o755) # Make it executable
56+
return statement, script_path
57+
58+
def __enter__(self):
59+
"""Enter the runtime context related to this object."""
60+
# Any initialisation logic needed for the executor can be added here
61+
return self
62+
63+
def __exit__(self, exc_type, exc_value, traceback):
64+
"""Exit the runtime context related to this object."""
65+
# Cleanup logic, if any, can be added here
66+
pass

cgatcore/pipeline/execution.py

Lines changed: 65 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ def process_data(infile, outfile):
8282
from cgatcore.pipeline.files import get_temp_filename, get_temp_dir
8383
from cgatcore.pipeline.parameters import substitute_parameters, get_params
8484
from cgatcore.pipeline.cluster import get_queue_manager, JobInfo
85+
from cgatcore.pipeline.executors import SGEExecutor, SlurmExecutor, TorqueExecutor, LocalExecutor
86+
try:
87+
from cgatcore.pipeline.kubernetes import KubernetesExecutor
88+
except ImportError:
89+
KubernetesExecutor = None # Fallback if Kubernetes is not available
90+
8591

8692
# talking to a cluster
8793
try:
@@ -424,6 +430,50 @@ def interpolate_statement(statement, kwargs):
424430
return statement
425431

426432

433+
def get_executor(options=None):
434+
"""
435+
Return an executor instance based on the specified queue manager in options.
436+
437+
Parameters:
438+
- options (dict): Dictionary containing execution options,
439+
including "cluster_queue_manager".
440+
441+
Returns:
442+
- Executor instance appropriate for the specified queue manager.
443+
"""
444+
if options is None:
445+
options = get_params()
446+
447+
if options.get("testing", False):
448+
return LocalExecutor(**options)
449+
450+
# Check if to_cluster is explicitly set to False
451+
if not options.get("to_cluster", True): # Defaults to True if not specified
452+
return LocalExecutor(**options)
453+
454+
queue_manager = options.get("cluster_queue_manager", None)
455+
456+
# Check for KubernetesExecutor
457+
if queue_manager == "kubernetes" and KubernetesExecutor is not None:
458+
return KubernetesExecutor(**options)
459+
460+
# Check for SGEExecutor (Sun Grid Engine)
461+
elif queue_manager == "sge" and shutil.which("qsub") is not None:
462+
return SGEExecutor(**options)
463+
464+
# Check for SlurmExecutor
465+
elif queue_manager == "slurm" and shutil.which("sbatch") is not None:
466+
return SlurmExecutor(**options)
467+
468+
# Check for TorqueExecutor
469+
elif queue_manager == "torque" and shutil.which("qsub") is not None:
470+
return TorqueExecutor(**options)
471+
472+
# Fallback to LocalExecutor, not sure if this should raise an error though, feels like it should
473+
else:
474+
return LocalExecutor(**options)
475+
476+
427477
def join_statements(statements, infile, outfile=None):
428478
'''join a chain of statements into a single statement.
429479
@@ -1318,32 +1368,6 @@ class LocalArrayExecutor(LocalExecutor):
13181368
pass
13191369

13201370

1321-
def make_runner(**kwargs):
1322-
"""factory function returning an object capable of executing
1323-
a list of command line statements.
1324-
"""
1325-
1326-
run_as_array = "job_array" in kwargs and kwargs["job_array"] is not None
1327-
1328-
# run on cluster if:
1329-
# * to_cluster is not defined or set to True
1330-
# * command line option without_cluster is set to False
1331-
# * an SGE session is present
1332-
run_on_cluster = will_run_on_cluster(kwargs)
1333-
if run_on_cluster:
1334-
if run_as_array:
1335-
runner = GridArrayExecutor(**kwargs)
1336-
else:
1337-
runner = GridExecutor(**kwargs)
1338-
else:
1339-
if run_as_array:
1340-
runner = LocalArrayExecutor(**kwargs)
1341-
else:
1342-
runner = LocalExecutor(**kwargs)
1343-
1344-
return runner
1345-
1346-
13471371
def run(statement, **kwargs):
13481372
"""run a command line statement.
13491373
@@ -1442,7 +1466,7 @@ def run(statement, **kwargs):
14421466
"""
14431467
logger = get_logger()
14441468

1445-
# combine options using priority
1469+
# Combine options using priority
14461470
options = dict(list(get_params().items()))
14471471
caller_options = get_caller_locals()
14481472
options.update(list(caller_options.items()))
@@ -1451,7 +1475,7 @@ def run(statement, **kwargs):
14511475
del options["self"]
14521476
options.update(list(kwargs.items()))
14531477

1454-
# inject params named tuple from TaskLibrary functions into option
1478+
# Inject params named tuple from TaskLibrary functions into option
14551479
# dict. This allows overriding options set in the code with options set
14561480
# in a .yml file
14571481
if "params" in options:
@@ -1460,7 +1484,7 @@ def run(statement, **kwargs):
14601484
except AttributeError:
14611485
pass
14621486

1463-
# insert parameters supplied through simplified interface such
1487+
# Insert parameters supplied through simplified interface such
14641488
# as job_memory, job_options, job_queue
14651489
options['cluster']['options'] = options.get(
14661490
'job_options', options['cluster']['options'])
@@ -1483,34 +1507,33 @@ def run(statement, **kwargs):
14831507

14841508
options["task_name"] = calling_module + "." + get_calling_function()
14851509

1486-
# build statements using parameter interpolation
1510+
# Build statements using parameter interpolation
14871511
if isinstance(statement, list):
1488-
statement_list = []
1489-
for stmt in statement:
1490-
statement_list.append(interpolate_statement(stmt, options))
1512+
statement_list = [interpolate_statement(stmt, options) for stmt in statement]
14911513
else:
14921514
statement_list = [interpolate_statement(statement, options)]
14931515

14941516
if len(statement_list) == 0:
1495-
logger.warn("no statements found - no execution")
1517+
logger.warn("No statements found - no execution")
14961518
return []
14971519

14981520
if options.get("dryrun", False):
14991521
for statement in statement_list:
1500-
logger.info("dry-run: {}".format(statement))
1522+
logger.info("Dry-run: {}".format(statement))
15011523
return []
15021524

1503-
# execute statement list
1504-
runner = make_runner(**options)
1505-
with runner as r:
1506-
benchmark_data = r.run(statement_list)
1525+
# Use get_executor to get the appropriate executor
1526+
executor = get_executor(options) # Updated to use get_executor
1527+
1528+
# Execute statement list within the context of the executor
1529+
with executor as e:
1530+
benchmark_data = e.run(statement_list)
15071531

1508-
# log benchmark_data
1532+
# Log benchmark data
15091533
for data in benchmark_data:
15101534
logger.info(json.dumps(data))
15111535

1512-
BenchmarkData = collections.namedtuple(
1513-
'BenchmarkData', sorted(benchmark_data[0]))
1536+
BenchmarkData = collections.namedtuple('BenchmarkData', sorted(benchmark_data[0]))
15141537
return [BenchmarkData(**d) for d in benchmark_data]
15151538

15161539

0 commit comments

Comments
 (0)