Skip to content

Commit e31e30e

Browse files
Rachel Colquhounrmcolq
Rachel Colquhoun
authored andcommitted
deduplicate by biosample AFTER mutation calling
1 parent 4dbefa7 commit e31e30e

File tree

2 files changed

+18
-7
lines changed

2 files changed

+18
-7
lines changed

modules/deduplicate_cog_uk.nf

+14-4
Original file line numberDiff line numberDiff line change
@@ -321,25 +321,35 @@ process uk_remove_duplicates_rootbiosample_by_date {
321321
}
322322

323323

324-
workflow deduplicate_cog_uk {
324+
workflow deduplicate_by_cogid_cog_uk {
325325
take:
326326
uk_fasta
327327
uk_metadata
328328
main:
329329
uk_annotate_with_unmapped_genome_completeness(uk_fasta, uk_metadata)
330330
uk_remove_duplicates_COGID_by_proportionN(uk_fasta, uk_annotate_with_unmapped_genome_completeness.out)
331331
uk_unify_headers(uk_remove_duplicates_COGID_by_proportionN.out.uk_fasta_updated, uk_remove_duplicates_COGID_by_proportionN.out.uk_metadata_updated)
332-
uk_remove_duplicates_biosamplesourceid_by_date(uk_unify_headers.out, uk_remove_duplicates_COGID_by_proportionN.out.uk_metadata_updated)
332+
emit:
333+
fasta = uk_unify_headers.out
334+
metadata = uk_remove_duplicates_COGID_by_proportionN.out.uk_metadata_updated
335+
}
336+
337+
workflow deduplicate_by_biosample_cog_uk {
338+
take:
339+
uk_fasta
340+
uk_metadata
341+
main:
342+
uk_remove_duplicates_biosamplesourceid_by_date(uk_fasta, uk_metadata)
333343
uk_remove_duplicates_rootbiosample_by_date(uk_remove_duplicates_biosamplesourceid_by_date.out.uk_fasta_updated, uk_remove_duplicates_biosamplesourceid_by_date.out.uk_metadata_updated)
334344
emit:
335345
fasta = uk_remove_duplicates_rootbiosample_by_date.out.uk_fasta_updated
336346
metadata = uk_remove_duplicates_rootbiosample_by_date.out.uk_metadata_updated
337-
all_fasta = uk_unify_headers.out
338347
}
339348

340349

341350
workflow {
342351
uk_fasta = file(params.uk_fasta)
343352
uk_metadata = file(params.uk_metadata)
344-
deduplicate_cog_uk(uk_fasta, uk_metadata)
353+
deduplicate_by_cogid_cog_uk(uk_fasta, uk_metadata)
354+
deduplicate_by_biosample_cog_uk(deduplicate_by_cogid_cog_uk.out.fasta, deduplicate_by_cogid_cog_uk.out.metadata)
345355
}

workflows/process_cog_uk.nf

+4-3
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ workflow process_cog_uk {
1818
main:
1919
preprocess_cog_uk(uk_fasta, uk_metadata, uk_accessions)
2020
pangolin_cog_uk(preprocess_cog_uk.out.fasta, preprocess_cog_uk.out.metadata)
21-
deduplicate_cog_uk(preprocess_cog_uk.out.fasta, pangolin_cog_uk.out.metadata)
21+
deduplicate_by_cogid_cog_uk(preprocess_cog_uk.out.fasta, pangolin_cog_uk.out.metadata)
2222
align_and_variant_call_cog_uk(deduplicate_cog_uk.out.fasta)
23-
filter_and_trim_cog_uk(align_and_variant_call_cog_uk.out.fasta, deduplicate_cog_uk.out.metadata)
23+
deduplicate_by_biosample_cog_uk(align_and_variant_call_cog_uk.out.fasta,deduplicate_by_cogid_cog_uk.out.metadata)
24+
filter_and_trim_cog_uk(deduplicate_by_biosample_cog_uk.out.fasta, deduplicate_by_biosample_cog_uk.out.metadata)
2425
emit:
25-
unaligned_fasta = deduplicate_cog_uk.out.all_fasta
26+
unaligned_fasta = deduplicate_by_cogid_cog_uk.out.fasta
2627
aligned_fasta = align_and_variant_call_cog_uk.out.fasta
2728
trimmed_fasta = filter_and_trim_cog_uk.out.fasta
2829
metadata = filter_and_trim_cog_uk.out.metadata

0 commit comments

Comments
 (0)