@@ -128,12 +128,12 @@ def make_codon_aln(self):
128
128
if self .nt_seq is not None and self .aa_seq is not None :
129
129
codon_aln = []
130
130
codons = ['' .join (t ) for t in zip (* [iter (self .nt_seq )] * 3 )]
131
- for i in range ( len ( codons ) ):
131
+ for aa , codon in zip ( self . aa_seq , codons ):
132
132
# Check if stop codon
133
- if codons [ i ] == 'TAA' or codons [ i ] == 'TGA' or codons [ i ] == 'TAG' :
133
+ if codon == 'TAA' or codon == 'TGA' or codon == 'TAG' :
134
134
codon_aln .append ('-*-' )
135
135
else :
136
- codon_aln .append ('-{}-' .format (self . aa_seq [ i ] ))
136
+ codon_aln .append ('-{}-' .format (aa ))
137
137
self .codon_aln = '' .join (codon_aln )
138
138
return self .codon_aln
139
139
@@ -194,10 +194,10 @@ def set_regions(virus, base, nt_coords, nt_seq, aa_coords, aa_seq):
194
194
:param base: The base of the sequence
195
195
:param nt_coords: Path to the csv file containing the global coordinates of the nucleotide region.
196
196
The file stream has one genomic entry per line and has the following format: region_name,start,end
197
- :param nt_seq: The file stream containing the reference nucleotide sequence in read mode
197
+ :param nt_seq: The nucleotide sequence
198
198
:param aa_coords: Path to the csv file containing the global coordinates of the protein region.
199
199
The file stream has one genomic entry per line and has the following format: region_name,start,end
200
- :param aa_seqe: The file stream containing the reference nucleotide sequence in read mode
200
+ :param aa_seq: A list of lists containing the amino acid sequences
201
201
:return: A list of GenomeRegions
202
202
"""
203
203
@@ -225,26 +225,24 @@ def set_regions(virus, base, nt_coords, nt_seq, aa_coords, aa_seq):
225
225
genome_regions .append (seq_region )
226
226
227
227
# Parse protein coordinates file
228
- prot_names = []
229
228
prot_coords = []
230
229
for aa_line in aa_coords :
231
230
aa_line = aa_line .strip ()
232
231
aa_line = aa_line .split (',' )
233
232
prot_coords .append ([int (aa_line [1 ]), int (aa_line [2 ])])
234
- prot_names .append (aa_line [0 ])
235
233
236
- for i in range ( len ( prot_names ) ):
234
+ for i , coords in enumerate ( prot_coords ):
237
235
for seq_region in genome_regions :
238
- if prot_names [i ] in seq_region .region_name :
236
+ if aa_seq [i ][ 0 ]. startswith ( seq_region .region_name ) :
239
237
# Set global and local protein coordinates
240
- seq_region .set_coords (prot_coords [i ], 'prot' )
241
- seq_region .set_seq_from_ref (aa_seq , 'prot' )
242
-
238
+ seq_region .set_coords (coords , 'prot' )
239
+ seq_region .set_sequence (aa_seq [i ][1 ], 'prot' )
243
240
seq_region .set_pos_from_pstart (virus )
241
+ seq_region .make_codon_aln ()
244
242
245
243
else :
246
244
# Parse protein region coordinates file
247
- for aa_line in aa_coords :
245
+ for i , aa_line in enumerate ( aa_coords ) :
248
246
aa_line = aa_line .strip ()
249
247
aa_line = aa_line .split (',' )
250
248
prot_coords = [int (aa_line [1 ]), int (aa_line [2 ])]
@@ -253,7 +251,7 @@ def set_regions(virus, base, nt_coords, nt_seq, aa_coords, aa_seq):
253
251
254
252
# Set global and local nucleotide coordinates
255
253
seq_region .set_coords (prot_coords , 'prot' )
256
- seq_region .set_seq_from_ref (aa_seq , 'prot' )
254
+ seq_region .set_sequence (aa_seq [ i ][ 1 ] , 'prot' )
257
255
258
256
# Set relative positions
259
257
seq_region .set_pos_from_cds (virus )
@@ -268,17 +266,17 @@ def set_regions(virus, base, nt_coords, nt_seq, aa_coords, aa_seq):
268
266
for nt_line in nt_coords :
269
267
nt_line = nt_line .strip ()
270
268
nt_line = nt_line .split (',' )
271
- nucl_coords .append ([int (nt_line [1 ]), int (nt_line [2 ])])
272
269
nucl_names .append (nt_line [0 ])
270
+ nucl_coords .append ([int (nt_line [1 ]), int (nt_line [2 ])])
273
271
274
- for i in range ( len ( nucl_names ) ):
272
+ for i , name in enumerate ( nucl_names ):
275
273
for seq_region in genome_regions :
276
- if nucl_names [ i ] in seq_region .region_name :
274
+ if name . startswith ( seq_region .region_name ) :
277
275
# Set global and local protein coordinates
278
276
seq_region .set_coords (nucl_coords [i ], 'nucl' )
279
277
seq_region .set_seq_from_ref (nt_seq , 'nucl' )
280
-
281
278
seq_region .set_pos_from_pstart (virus )
279
+ seq_region .make_codon_aln ()
282
280
283
281
return genome_regions
284
282
@@ -557,7 +555,7 @@ def output_retrieved_region(region, outfile=None):
557
555
print ("\t Nucleotide Sequence:" )
558
556
seq_lines = [region .nt_seq [i :i + 60 ] for i in range (0 , len (region .nt_seq ), 60 )]
559
557
for line in seq_lines :
560
- print ('\t \t {}\n ' .format (line ))
558
+ print ('\t \t {}' .format (line ))
561
559
562
560
if region .aa_seq :
563
561
print ("\t Protein Sequence:" )
@@ -739,8 +737,8 @@ def retrieve(virus, base, ref_regions, region, qstart=1, qend='end'):
739
737
query_region = GenomeRegion (region )
740
738
741
739
# Set local and global coordinates
742
- query_region .set_coords ([qstart , qend ], base )
743
- global_coords = query_region .local_to_global_index (ref_region , [qstart , qend ], base )
740
+ # query_region.set_coords([qstart, qend], base)
741
+ global_coords = GenomeRegion .local_to_global_index (ref_region , [qstart , qend ], base )
744
742
query_region .set_coords (global_coords , base )
745
743
746
744
# Set sequences protein and nucleotide sequences
@@ -878,7 +876,7 @@ def main():
878
876
# Ensure proper configuration files are set
879
877
configs = handle_args (args .virus , args .base , args .ref_nt , args .nt_coords , args .ref_aa , args .aa_coords )
880
878
ref_nt_seq = configs [0 ][0 ][1 ]
881
- ref_aa_seq = configs [1 ][ 0 ][ 1 ]
879
+ ref_aa_seq = configs [1 ]
882
880
nt_coords = configs [2 ]
883
881
aa_coords = configs [3 ]
884
882
reference_sequence = configs [4 ]
0 commit comments