@@ -138,9 +138,12 @@ process uk_unify_headers {
138
138
open("${ uk_fasta.baseName} .UH.fa", "w") as fasta_out:
139
139
reader = csv.DictReader(csv_in, delimiter=",", quotechar='\" ', dialect = "unix")
140
140
for row in reader:
141
- record = alignment[row["fasta_header"]]
142
- fasta_out.write(">" + row["sequence_name"] + "\\ n")
143
- fasta_out.write(str(record.seq) + "\\ n")
141
+ if row["why_excluded"]:
142
+ continue
143
+ if row["fasta_header"] in alignment:
144
+ record = alignment[row["fasta_header"]]
145
+ fasta_out.write(">" + row["sequence_name"] + "\\ n")
146
+ fasta_out.write(str(record.seq) + "\\ n")
144
147
"""
145
148
}
146
149
@@ -216,15 +219,17 @@ process uk_remove_duplicates_biosamplesourceid_by_date {
216
219
writer.writeheader()
217
220
218
221
for row in reader:
222
+ if row["why_excluded"]:
223
+ writer.writerow(row)
224
+ continue
219
225
fasta_header = row["sequence_name"]
220
226
if fasta_header in tokeep:
221
227
writer.writerow(row)
222
228
seqrec = alignment[fasta_header]
223
229
fasta_out.write(">" + seqrec.id + "\\ n")
224
230
fasta_out.write(str(seqrec.seq) + "\\ n")
225
231
else:
226
- if not row["why_excluded"]:
227
- row["why_excluded"] = "duplicate biosample_source_id"
232
+ row["why_excluded"] = "duplicate biosample_source_id"
228
233
writer.writerow(row)
229
234
"""
230
235
}
@@ -300,15 +305,17 @@ process uk_remove_duplicates_rootbiosample_by_date {
300
305
writer.writeheader()
301
306
302
307
for row in reader:
308
+ if row["why_excluded"]:
309
+ writer.writerow(row)
310
+ continue
303
311
fasta_header = row["sequence_name"]
304
312
if fasta_header in tokeep:
305
313
writer.writerow(row)
306
314
seqrec = alignment[fasta_header]
307
315
fasta_out.write(">" + seqrec.id + "\\ n")
308
316
fasta_out.write(str(seqrec.seq) + "\\ n")
309
317
else:
310
- if not row["why_excluded"]:
311
- row["why_excluded"] = "duplicate root_biosample_source_id"
318
+ row["why_excluded"] = "duplicate root_biosample_source_id"
312
319
writer.writerow(row)
313
320
"""
314
321
}
0 commit comments