Skip to content

Commit d82b5e9

Browse files
committed
log passes
1 parent f707614 commit d82b5e9

File tree

4 files changed

+17
-13
lines changed

4 files changed

+17
-13
lines changed

bin/identify_tophit_and_contaminants2.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m
483483
parser.add_argument('myco_dir', metavar='myco_dir', type=str, help='Path to myco directory')
484484
parser.add_argument('prev_species_json', metavar='prev_species_json', type=str, help='Path to previous species json file. Can be set to \'null\'')
485485
parser.add_argument('permissive', metavar='permissive', type=str, help="Is either \'yes\' or \'no\', given in response to the question: do you want to carry on to Clockwork regardless of errors?")
486+
parser.add_argument('pass_number', metavar='pass_number', type=int, help="Pass number. Refers to what pass of decontamination the pipeline is on")
486487
args = parser.parse_args()
487488

488489
# REQUIREMENTS
@@ -495,6 +496,7 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m
495496
myco_dir = sys.argv[6]
496497
prev_species_json = sys.argv[7]
497498
permissive = sys.argv[8]
499+
pass_number = sys.argv[9]
498500

499501
# read assembly summary
500502
urls, tax_ids = read_assembly_summary(assembly_file)
@@ -509,6 +511,6 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m
509511
f.write(url + "\n")
510512

511513
# print final file
512-
out_file2 = sample_id + '_species_in_sample.json'
514+
out_file2 = sample_id + '_species_in_sample_pass_' + str(pass_number) + '.json'
513515
with open(out_file2, 'w') as f:
514516
json.dump(out, f, indent = 4)

docker/Dockerfile.preprocessing-0.9.8.1

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
FROM ubuntu:focal
22

3+
34
LABEL maintainer="[email protected]" \
45
about.summary="container for the preprocessing workflow"
56

modules/decontaminationModules.nf

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ process identifyBacterialContaminants {
3434
report_json = "${sample_name}_report.json"
3535

3636
"""
37-
identify_tophit_and_contaminants2.py ${afanc_json} ${kraken_json} ${refseq} ${params.species} ${params.unmix_myco} ${resources} null ${params.permissive}
37+
identify_tophit_and_contaminants2.py ${afanc_json} ${kraken_json} ${refseq} ${params.species} ${params.unmix_myco} ${resources} null ${params.permissive} ${pass}
3838
3939
contam_to_remove=\$(jq -r '.summary_questions.are_there_contaminants' ${sample_name}_species_in_sample.json)
4040
acceptable_species=\$(jq -r '.summary_questions.is_the_top_species_appropriate' ${sample_name}_species_in_sample.json)
@@ -340,40 +340,40 @@ process summarise {
340340
tuple val(sample_name), path("${sample_name}_species_in_sample.json"), stdout, emit: summary_json
341341
stdout emit: do_we_break
342342
path "${sample_name}_err.json", emit: summary_log optional true
343-
path "${sample_name}_report.json", emit: summary_report optional true
343+
path "${sample_name}_pass_${pass}_report.json", emit: summary_report optional true
344344
val(pass), emit: pass_number
345345

346346
script:
347347
error_log = "${sample_name}_err.json"
348-
report_json = "${sample_name}_report.json"
349-
348+
report_json = "${sample_name}_pass_${pass}_report.json"
349+
species_in_sample = "${sample_name}_species_in_sample_pass_${pass}.json"
350350
"""
351-
identify_tophit_and_contaminants2.py ${afanc_json} ${kraken_json} ${refseq} ${params.species} ${params.unmix_myco} ${resources} ${prev_species_json} ${params.permissive}
351+
identify_tophit_and_contaminants2.py ${afanc_json} ${kraken_json} ${refseq} ${params.species} ${params.unmix_myco} ${resources} ${prev_species_json} ${params.permissive} ${pass}
352352
353353
354-
contam_to_remove=\$(jq -r '.summary_questions.are_there_contaminants' ${sample_name}_species_in_sample.json)
355-
acceptable_species=\$(jq -r '.summary_questions.is_the_top_species_appropriate' ${sample_name}_species_in_sample.json)
356-
top_hit=\$(jq -r '.top_hit.name' ${sample_name}_species_in_sample.json)
354+
contam_to_remove=\$(jq -r '.summary_questions.are_there_contaminants' ${species_in_sample})
355+
acceptable_species=\$(jq -r '.summary_questions.is_the_top_species_appropriate' ${species_in_sample})
356+
top_hit=\$(jq -r '.top_hit.name' ${species_in_sample})
357357
358358
if [ \$contam_to_remove == 'yes' ]; then
359359
if [ "${params.permissive}" == "no" ]; then
360360
printf "${sample_name}"
361-
echo '{"error":"sample remains contaminated, even after attempting to resolve this"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${sample_name}_species_in_sample.json > ${report_json}
361+
echo '{"error":"sample remains contaminated, even after attempting to resolve this"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${species_in_sample} > ${report_json}
362362
else
363363
if [ "${pass}" == 2 ]; then
364364
printf "NOW_ALIGN_TO_REF_${sample_name}"
365365
else
366366
printf "${sample_name}"
367367
fi
368-
echo '{"warning":"sample remains contaminated, even after attempting to resolve this"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${sample_name}_species_in_sample.json > ${report_json}
368+
echo '{"warning":"sample remains contaminated, even after attempting to resolve this"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${species_in_sample} > ${report_json}
369369
fi
370370
fi
371371
372372
if [ \$contam_to_remove == 'no' ] && [ \$acceptable_species == 'yes' ]; then
373373
printf "NOW_ALIGN_TO_REF_${sample_name}"
374374
elif [ \$contam_to_remove == 'no' ] && [ \$acceptable_species == 'no' ]; then
375375
jq -n --arg key "\$top_hit" '{"error": ("top hit " + \$key + " does not have a reference genome. Sample will not proceed beyond preprocessing workflow.")}' > ${error_log} && \
376-
jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${sample_name}_species_in_sample.json > ${report_json}
376+
jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${species_in_sample} > ${report_json}
377377
printf "DO_NOT_PROCEED_${sample_name}"
378378
fi
379379
"""

modules/preprocessingModules.nf

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,8 @@ process afanc {
374374
cp ${sample_name}/${sample_name}.json ${sample_name}_afanc_original.json
375375
reformat_afanc_json.py ${sample_name}/${sample_name}.json
376376
377-
identify_tophit_and_contaminants2.py ${afanc_report} ${kraken_json} $refseq_path ${params.species} ${params.unmix_myco} $resource_dir null ${params.permissive}
377+
identify_tophit_and_contaminants2.py ${afanc_report} ${kraken_json} $refseq_path ${params.species} ${params.unmix_myco} $resource_dir null ${params.permissive} ${pass}
378+
mv "${sample_name}"_species_in_sample_pass_0.json "${sample_name}"_species_in_sample.json
378379
379380
echo '{"error":"Kraken's top family hit either wasn't Mycobacteriaceae, or there were < 100k Mycobacteriaceae reads. Sample will not proceed further than afanc."}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${sample_name}_species_in_sample.json > ${report_json}
380381

0 commit comments

Comments
 (0)