Skip to content

Commit

Permalink
log passes
Browse files Browse the repository at this point in the history
  • Loading branch information
WhalleyT committed Aug 22, 2024
1 parent f707614 commit d82b5e9
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 13 deletions.
4 changes: 3 additions & 1 deletion bin/identify_tophit_and_contaminants2.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m
parser.add_argument('myco_dir', metavar='myco_dir', type=str, help='Path to myco directory')
parser.add_argument('prev_species_json', metavar='prev_species_json', type=str, help='Path to previous species json file. Can be set to \'null\'')
parser.add_argument('permissive', metavar='permissive', type=str, help="Is either \'yes\' or \'no\', given in response to the question: do you want to carry on to Clockwork regardless of errors?")
parser.add_argument('pass_number', metavar='pass_number', type=int, help="Pass number. Refers to what pass of decontamination the pipeline is on")
args = parser.parse_args()

# REQUIREMENTS
Expand All @@ -495,6 +496,7 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m
myco_dir = sys.argv[6]
prev_species_json = sys.argv[7]
permissive = sys.argv[8]
pass_number = sys.argv[9]

# read assembly summary
urls, tax_ids = read_assembly_summary(assembly_file)
Expand All @@ -509,6 +511,6 @@ def process_reports(afanc_json_path, kraken_json_path, supposed_species, unmix_m
f.write(url + "\n")

# print final file
out_file2 = sample_id + '_species_in_sample.json'
out_file2 = sample_id + '_species_in_sample_pass_' + str(pass_number) + '.json'
with open(out_file2, 'w') as f:
json.dump(out, f, indent = 4)
1 change: 1 addition & 0 deletions docker/Dockerfile.preprocessing-0.9.8.1
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
FROM ubuntu:focal


LABEL maintainer="[email protected]" \
about.summary="container for the preprocessing workflow"

Expand Down
22 changes: 11 additions & 11 deletions modules/decontaminationModules.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ process identifyBacterialContaminants {
report_json = "${sample_name}_report.json"

"""
identify_tophit_and_contaminants2.py ${afanc_json} ${kraken_json} ${refseq} ${params.species} ${params.unmix_myco} ${resources} null ${params.permissive}
identify_tophit_and_contaminants2.py ${afanc_json} ${kraken_json} ${refseq} ${params.species} ${params.unmix_myco} ${resources} null ${params.permissive} ${pass}
contam_to_remove=\$(jq -r '.summary_questions.are_there_contaminants' ${sample_name}_species_in_sample.json)
acceptable_species=\$(jq -r '.summary_questions.is_the_top_species_appropriate' ${sample_name}_species_in_sample.json)
Expand Down Expand Up @@ -340,40 +340,40 @@ process summarise {
tuple val(sample_name), path("${sample_name}_species_in_sample.json"), stdout, emit: summary_json
stdout emit: do_we_break
path "${sample_name}_err.json", emit: summary_log optional true
path "${sample_name}_report.json", emit: summary_report optional true
path "${sample_name}_pass_${pass}_report.json", emit: summary_report optional true
val(pass), emit: pass_number

script:
error_log = "${sample_name}_err.json"
report_json = "${sample_name}_report.json"

report_json = "${sample_name}_pass_${pass}_report.json"
species_in_sample = "${sample_name}_species_in_sample_pass_${pass}.json"
"""
identify_tophit_and_contaminants2.py ${afanc_json} ${kraken_json} ${refseq} ${params.species} ${params.unmix_myco} ${resources} ${prev_species_json} ${params.permissive}
identify_tophit_and_contaminants2.py ${afanc_json} ${kraken_json} ${refseq} ${params.species} ${params.unmix_myco} ${resources} ${prev_species_json} ${params.permissive} ${pass}
contam_to_remove=\$(jq -r '.summary_questions.are_there_contaminants' ${sample_name}_species_in_sample.json)
acceptable_species=\$(jq -r '.summary_questions.is_the_top_species_appropriate' ${sample_name}_species_in_sample.json)
top_hit=\$(jq -r '.top_hit.name' ${sample_name}_species_in_sample.json)
contam_to_remove=\$(jq -r '.summary_questions.are_there_contaminants' ${species_in_sample})
acceptable_species=\$(jq -r '.summary_questions.is_the_top_species_appropriate' ${species_in_sample})
top_hit=\$(jq -r '.top_hit.name' ${species_in_sample})
if [ \$contam_to_remove == 'yes' ]; then
if [ "${params.permissive}" == "no" ]; then
printf "${sample_name}"
echo '{"error":"sample remains contaminated, even after attempting to resolve this"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${sample_name}_species_in_sample.json > ${report_json}
echo '{"error":"sample remains contaminated, even after attempting to resolve this"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${species_in_sample} > ${report_json}
else
if [ "${pass}" == 2 ]; then
printf "NOW_ALIGN_TO_REF_${sample_name}"
else
printf "${sample_name}"
fi
echo '{"warning":"sample remains contaminated, even after attempting to resolve this"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${sample_name}_species_in_sample.json > ${report_json}
echo '{"warning":"sample remains contaminated, even after attempting to resolve this"}' | jq '.' > ${error_log} && jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${species_in_sample} > ${report_json}
fi
fi
if [ \$contam_to_remove == 'no' ] && [ \$acceptable_species == 'yes' ]; then
printf "NOW_ALIGN_TO_REF_${sample_name}"
elif [ \$contam_to_remove == 'no' ] && [ \$acceptable_species == 'no' ]; then
jq -n --arg key "\$top_hit" '{"error": ("top hit " + \$key + " does not have a reference genome. Sample will not proceed beyond preprocessing workflow.")}' > ${error_log} && \
jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${sample_name}_species_in_sample.json > ${report_json}
jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${species_in_sample} > ${report_json}
printf "DO_NOT_PROCEED_${sample_name}"
fi
"""
Expand Down
3 changes: 2 additions & 1 deletion modules/preprocessingModules.nf
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,8 @@ process afanc {
cp ${sample_name}/${sample_name}.json ${sample_name}_afanc_original.json
reformat_afanc_json.py ${sample_name}/${sample_name}.json
identify_tophit_and_contaminants2.py ${afanc_report} ${kraken_json} $refseq_path ${params.species} ${params.unmix_myco} $resource_dir null ${params.permissive}
identify_tophit_and_contaminants2.py ${afanc_report} ${kraken_json} $refseq_path ${params.species} ${params.unmix_myco} $resource_dir null ${params.permissive} ${pass}
mv "${sample_name}"_species_in_sample_pass_0.json "${sample_name}"_species_in_sample.json
echo '{"error":"Kraken's top family hit either wasn't Mycobacteriaceae, or there were < 100k Mycobacteriaceae reads. Sample will not proceed further than afanc."}' | jq '.' > ${error_log} && printf "no" && jq -s ".[0] * .[1] * .[2]" ${software_json} ${error_log} ${sample_name}_species_in_sample.json > ${report_json}
Expand Down

0 comments on commit d82b5e9

Please sign in to comment.