Skip to content

Commit 229dd9c

Browse files
Rachel Colquhounrmcolq
Rachel Colquhoun
authored andcommitted
bugfixes as keeping full metadata
1 parent 825db6b commit 229dd9c

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

Diff for: modules/deduplicate_cog_uk.nf

+14-7
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,12 @@ process uk_unify_headers {
138138
open("${uk_fasta.baseName}.UH.fa", "w") as fasta_out:
139139
reader = csv.DictReader(csv_in, delimiter=",", quotechar='\"', dialect = "unix")
140140
for row in reader:
141-
record = alignment[row["fasta_header"]]
142-
fasta_out.write(">" + row["sequence_name"] + "\\n")
143-
fasta_out.write(str(record.seq) + "\\n")
141+
if row["why_excluded"]:
142+
continue
143+
if row["fasta_header"] in alignment:
144+
record = alignment[row["fasta_header"]]
145+
fasta_out.write(">" + row["sequence_name"] + "\\n")
146+
fasta_out.write(str(record.seq) + "\\n")
144147
"""
145148
}
146149

@@ -216,15 +219,17 @@ process uk_remove_duplicates_biosamplesourceid_by_date {
216219
writer.writeheader()
217220
218221
for row in reader:
222+
if row["why_excluded"]:
223+
writer.writerow(row)
224+
continue
219225
fasta_header = row["sequence_name"]
220226
if fasta_header in tokeep:
221227
writer.writerow(row)
222228
seqrec = alignment[fasta_header]
223229
fasta_out.write(">" + seqrec.id + "\\n")
224230
fasta_out.write(str(seqrec.seq) + "\\n")
225231
else:
226-
if not row["why_excluded"]:
227-
row["why_excluded"] = "duplicate biosample_source_id"
232+
row["why_excluded"] = "duplicate biosample_source_id"
228233
writer.writerow(row)
229234
"""
230235
}
@@ -300,15 +305,17 @@ process uk_remove_duplicates_rootbiosample_by_date {
300305
writer.writeheader()
301306
302307
for row in reader:
308+
if row["why_excluded"]:
309+
writer.writerow(row)
310+
continue
303311
fasta_header = row["sequence_name"]
304312
if fasta_header in tokeep:
305313
writer.writerow(row)
306314
seqrec = alignment[fasta_header]
307315
fasta_out.write(">" + seqrec.id + "\\n")
308316
fasta_out.write(str(seqrec.seq) + "\\n")
309317
else:
310-
if not row["why_excluded"]:
311-
row["why_excluded"] = "duplicate root_biosample_source_id"
318+
row["why_excluded"] = "duplicate root_biosample_source_id"
312319
writer.writerow(row)
313320
"""
314321
}

Diff for: modules/filter_and_trim_cog_uk.nf

+4-2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ process uk_filter_low_coverage_sequences {
4747
writer.writeheader()
4848
4949
for row in reader:
50+
if row["why_excluded"]:
51+
writer.writerow(row)
52+
continue
5053
id = row["sequence_name"]
5154
if id in alignment:
5255
seq = str(alignment[id].seq)
@@ -56,8 +59,7 @@ process uk_filter_low_coverage_sequences {
5659
fasta_out.write(">" + id + "\\n")
5760
fasta_out.write(seq + "\\n")
5861
else:
59-
if not row["why_excluded"]:
60-
row["why_excluded"] = "low mapped_completeness"
62+
row["why_excluded"] = "low mapped_completeness"
6163
writer.writerow(row)
6264
"""
6365
}

0 commit comments

Comments
 (0)