Skip to content

Commit bd5fd41

Browse files
committed
fix manifest error by adding checkpoint
1 parent 1f9416e commit bd5fd41

File tree

6 files changed

+63
-15
lines changed

6 files changed

+63
-15
lines changed

config/testing/params_refcon.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
reference_container_store: "."
33
reference_container_names:
4-
- test_v0
4+
- test_v1
55
use_reference_container: True
66

77

workflow/Snakefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ rule run_all:
99
# add output of final rule(s) here
1010
# to trigger complete run
1111
[],
12-
1312

1413

1514
onsuccess:

workflow/rules/commons/02_pyutils.smk

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,9 @@ def load_accounting_information(wildcards):
244244
logerr(f"Size of accounting file list: {len(created_files)}")
245245
except FileNotFoundError:
246246
if VERBOSE:
247-
logerr(f"Accounting file does not exist (yet): {account_file}")
247+
warn_msg = f"Accounting file does not exist (yet): {account_file}\n"
248+
warn_msg += "Please RERUN the workflow in DRY RUN MODE to create the file accounts!"
249+
logerr(warn_msg)
248250
return sorted(created_files)
249251

250252

@@ -484,16 +486,35 @@ def collect_git_labels():
484486
# =======================================================
485487

486488

489+
def trigger_refcon_manifest_caching(wildcards):
490+
"""
491+
This function merely triggers the checkpoint
492+
to merge all reference containers caches into
493+
one. This checkpoint is needed to get a
494+
start-to-end run, otherwise "refcon_find_container"
495+
would produce an error.
496+
"""
497+
refcon_manifest_cache = str(
498+
checkpoints.refcon_cache_manifests.get(**wildcards).output.cache
499+
)
500+
expected_path = DIR_PROC.joinpath(".cache", "refcon", "refcon_manifests.cache")
501+
# following assert safeguard against future changes
502+
assert pathlib.Path(refcon_manifest_cache).resolve() == expected_path.resolve()
503+
return refcon_manifest_cache
504+
505+
487506
def refcon_find_container(manifest_cache, ref_filename):
488507

489508
if not pathlib.Path(manifest_cache).is_file():
490509
if DRYRUN:
491510
return ""
492511
else:
493-
raise FileNotFoundError(
494-
f"Reference container manifest cache does not exist: {manifest_cache}"
495-
)
496-
512+
if VERBOSE:
513+
warn_msg = "Warning: reference container manifest cache "
514+
warn_msg += "does not exist yet. Returning empty reference "
515+
warn_msg += "container path."
516+
logerr(warn_msg)
517+
return ""
497518

498519
manifests = pandas.read_csv(manifest_cache, sep="\t", header=0)
499520

workflow/rules/commons/03_smkutils.smk

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,33 @@ rule create_manifest:
9898
import collections
9999
import pandas
100100

101+
if not all([af.is_file() for af in ACCOUNTING_FILES.values()]):
102+
err_msg = "Accounting files have not been created yet, cannot create workflow manifest.\n"
103+
err_msg += "Please rerun the workflow twice in dry run mode:\n"
104+
err_msg += "snakemake --dry-run (or: -n) [...other options...]"
105+
logerr(err_msg)
106+
raise RuntimeError(
107+
"Cannot proceed with workflow execution w/o accouting files."
108+
)
109+
110+
if len(input.manifest_files) == 0:
111+
warn_msg = "No files recorded for inclusion in workflow manifest.\n"
112+
warn_msg += "Are you sure you did not forget annotating rules with:\n"
113+
warn_msg += "commons/02_pyutils.smk::register_input()\n"
114+
warn_msg += "commons/02_pyutils.smk::register_result()\n"
115+
logerr(warn_msg)
116+
101117
records = collections.defaultdict(dict)
102118
for line in fileinput.input(ACCOUNTING_FILES.values(), mode="r"):
103119
path_id, path_record = process_accounting_record(line)
104120
records[path_id].update(path_record)
105121

106122
df = pandas.DataFrame.from_records(list(records.values()))
107-
df.sort_values(["file_category", "file_name"], ascending=True)
123+
if df.empty:
124+
logerr("Manifest DataFrame is empty - aborting")
125+
raise RuntimeError("Manifest DataFrame is empty")
126+
127+
df.sort_values(["file_category", "file_name"], ascending=True, inplace=True)
108128
reordered_columns = [
109129
"file_name",
110130
"file_category",

workflow/rules/commons/05_refcon.smk

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ if USE_REFERENCE_CONTAINER:
1414
shell:
1515
"{input.sif} manifest > {output.manifest}"
1616

17-
1817
rule refcon_run_get_file:
1918
"""
2019
Snakemake interacts with Singularity containers using "exec",
@@ -26,7 +25,7 @@ if USE_REFERENCE_CONTAINER:
2625
(i.e., treat them like a regular file)
2726
"""
2827
input:
29-
cache=DIR_PROC.joinpath(".cache", "refcon", "refcon_manifests.cache"),
28+
cache=trigger_refcon_manifest_caching,
3029
output:
3130
DIR_GLOBAL_REF.joinpath("{filename}"),
3231
envmodules:
@@ -39,8 +38,7 @@ if USE_REFERENCE_CONTAINER:
3938
shell:
4039
"{params.refcon_path} get {wildcards.filename} {output}"
4140

42-
43-
rule refcon_cache_manifests:
41+
checkpoint refcon_cache_manifests:
4442
input:
4543
manifests=expand(
4644
DIR_PROC.joinpath(".cache", "refcon", "{refcon_name}.manifest"),

workflow/snaketests.smk

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ rule create_test_file:
3131
# END OF RUN BLOCK
3232

3333

34+
3435
rule test_log_functions:
3536
"""
3637
Test pyutil logging functions
@@ -51,6 +52,7 @@ rule test_log_functions:
5152
# END OF RUN BLOCK
5253

5354

55+
5456
rule test_find_script_success:
5557
input:
5658
expand(rules.test_log_functions.output, logtype=["err", "out"]),
@@ -69,6 +71,7 @@ rule test_find_script_success:
6971
# END OF RUN BLOCK
7072

7173

74+
7275
rule test_find_script_fail:
7376
input:
7477
rules.test_find_script_success.output,
@@ -88,6 +91,7 @@ rule test_find_script_fail:
8891
# END OF RUN BLOCK
8992

9093

94+
9195
rule test_rsync_f2d:
9296
input:
9397
rules.create_test_file.output,
@@ -106,6 +110,7 @@ rule test_rsync_f2d:
106110
# END OF RUN BLOCK
107111

108112

113+
109114
rule test_rsync_f2f:
110115
input:
111116
rules.create_test_file.output,
@@ -118,6 +123,7 @@ rule test_rsync_f2f:
118123
# END OF RUN BLOCK
119124

120125

126+
121127
rule test_rsync_fail:
122128
input:
123129
rules.create_test_file.output,
@@ -138,6 +144,7 @@ rule test_rsync_fail:
138144
# END OF RUN BLOCK
139145

140146

147+
141148
rule test_git_labels:
142149
input:
143150
rules.test_rsync_f2d.output,
@@ -155,15 +162,18 @@ rule test_git_labels:
155162
# END OF RUN BLOCK
156163

157164

165+
158166
if USE_REFERENCE_CONTAINER:
159167
CONTAINER_TEST_FILES = [
160168
DIR_GLOBAL_REF.joinpath("genome.fasta.fai"),
169+
DIR_GLOBAL_REF.joinpath("exclusions.bed"),
170+
DIR_GLOBAL_REF.joinpath("hg38_full.fasta.fai"),
161171
DIR_PROC.joinpath(".cache", "refcon", "refcon_manifests.cache"),
162172
]
163-
REGISTER_REFERENCE_FILE = CONTAINER_TEST_FILES[0]
173+
REGISTER_REFERENCE_FILES = CONTAINER_TEST_FILES[:3]
164174
else:
165175
CONTAINER_TEST_FILES = []
166-
REGISTER_REFERENCE_FILE = []
176+
REGISTER_REFERENCE_FILES = []
167177

168178

169179
rule trigger_tests:
@@ -174,7 +184,7 @@ rule trigger_tests:
174184
DIR_RES.joinpath("testing", "all-ok.txt"),
175185
params:
176186
acc_out=lambda wildcards, output: register_result(output),
177-
acc_ref=lambda wildcards, input: register_reference(REGISTER_REFERENCE_FILE),
187+
acc_ref=lambda wildcards, input: register_reference(REGISTER_REFERENCE_FILES),
178188
run:
179189
with open(output[0], "w") as testfile:
180190
testfile.write("ok")

0 commit comments

Comments
 (0)