Skip to content

Commit cda6703

Browse files
committed
-Updated test cases
-Refactoring relative positions (#20 in progress)
1 parent c832001 commit cda6703

File tree

2 files changed

+56
-45
lines changed

2 files changed

+56
-45
lines changed

poplars/sequence_locator.py

+25-14
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def __init__(self, region_name, ncoords=None, nt_seq=None, pcoords=None, aa_seq=
3232
self.nt_seq = nt_seq
3333
self.pcoords = pcoords
3434
self.aa_seq = aa_seq
35-
self.cds, self.gstart, self.qstart, self.pstart = None, None, None, None
35+
self.cds_offset, self.gstart, self.qstart, self.pstart = [], [], [], []
3636
self.codon_aln = ''
3737

3838
def get_coords(self, base):
@@ -67,19 +67,30 @@ def set_seq_from_ref(self, sequence, base):
6767
else:
6868
self.aa_seq = sequence[self.pcoords[0] - 1: self.pcoords[1]]
6969

70-
def set_pos_from_cds(self, region_coords):
70+
def set_pos_from_cds(self, ref_reg_name):
7171
"""
72-
Gives the position of a sequence relative to the start of the coding sequence
72+
Gives the position of a the query sequence relative to the start of the coding sequence
7373
"""
74-
if self.ncoords is not None:
75-
local_ncoords = self.global_to_local_index(self.get_coords('nucl'))
76-
print('local ncoords {}'.format(local_ncoords))
7774

78-
if self.region_name != '5\'LTR':
79-
len_region = region_coords[1] - region_coords[0]
80-
cds_start = region_coords[0] - local_ncoords[0] + 1
81-
cds_end = cds_start + len_region
82-
self.cds = [cds_start, cds_end]
75+
ref_region = GENOME_REGIONS[ref_reg_name]
76+
if self.ncoords and ref_region.ncoords:
77+
78+
query_start = self.ncoords[0]
79+
query_end = self.ncoords[1]
80+
ref_start = ref_region.ncoords[0]
81+
ref_end = ref_region.ncoords[1]
82+
83+
if query_start == ref_start:
84+
start = 1
85+
else:
86+
start = query_start - ref_start
87+
88+
if query_end == ref_end:
89+
end = query_end - query_start
90+
else:
91+
end = query_end - start
92+
93+
self.cds_offset = [start, end]
8394

8495
def set_pos_from_gstart(self):
8596
self.gstart = self.ncoords
@@ -150,9 +161,9 @@ def set_pcoords_from_ncoords(self):
150161
"""
151162
Sets protein coordinates relative to the protein start, given the nucleotide coordinates
152163
"""
153-
if self.cds is not None:
154-
prot_start = self.cds[0] // 3 + 1
155-
prot_end = self.cds['CDS'][1] // 3
164+
if self.cds_offset is not None:
165+
prot_start = self.cds_offset[0] // 3 + 1
166+
prot_end = self.cds_offset[1] // 3
156167
self.pcoords = [prot_start, prot_end]
157168

158169
def set_ncoords_from_pcoords(self):

poplars/tests/test_sequence_locator.py

+31-31
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import unittest
2-
from io import StringIO
32
from poplars.sequence_locator import *
43

54

@@ -20,15 +19,15 @@ def setUp(self):
2019
open(self.hiv_ncoords_path) as hiv_ncoords, open(self.hiv_pcoords_path) as hiv_pcoords:
2120
self.hiv_nt_seq = convert_fasta(hiv_nt.read().split())[0][1]
2221
self.hiv_aa_seq = convert_fasta(hiv_aa.read().split())[0][1]
23-
self.hiv_ncoords = hiv_ncoords.read()
24-
self.hiv_pcoords = hiv_pcoords.read()
22+
self.hiv_ncoords = hiv_ncoords
23+
self.hiv_pcoords = hiv_pcoords
2524

2625
with open(self.siv_nt_seq_path) as siv_nt, open(self.siv_aa_seq_path) as siv_aa, \
2726
open(self.siv_ncoords_path) as siv_ncoords, open(self.siv_pcoords_path) as siv_pcoords:
2827
self.siv_nt_seq = convert_fasta(siv_nt.read().split())[0][1]
2928
self.siv_aa_seq = convert_fasta(siv_aa.read().split())[0][1]
30-
self.siv_ncoords = siv_ncoords.read()
31-
self.siv_pcoords = siv_pcoords.read()
29+
self.siv_ncoords = siv_ncoords
30+
self.siv_pcoords = siv_pcoords
3231

3332

3433
class TestGetCoords(InputTestCase):
@@ -136,42 +135,44 @@ def testHIVLTR5Start(self):
136135
region.set_coords([1, 634], 'nucl')
137136
region.set_pos_from_cds([1, 634])
138137
expected = []
139-
result = region.rel_pos['CDS']
138+
result = region.cds_offset
140139
self.assertEqual(expected, result)
141140

142141
def testFromSIVLTR5Start(self):
143142
region = GenomeRegion('5\'LTR')
144143
region.set_pos_from_cds([1, 1110])
145144
expected = []
146-
result = region.rel_pos['CDS']
145+
result = region.cds_offset
147146
self.assertEqual(expected, result)
148147

149148
def testFromHIVStart(self):
150-
region = GenomeRegion('Gag', [790, 2292])
151-
region.set_pos_from_cds([790, 2292])
149+
ref_region = GenomeRegion('Gag', [790, 2292])
150+
query_region = GenomeRegion('Gag', [790, 2292])
151+
query_region.set_pos_from_cds(ref_region)
152152
expected = [1, 1503]
153-
result = region.rel_pos['CDS']
153+
result = query_region.cds_offset
154154
self.assertEqual(expected, result)
155155

156156
def testFromSIVStart(self):
157-
region = GenomeRegion('Integrase', [4785, 5666])
158-
region.set_pos_from_cds([4785, 5666])
157+
ref_region = GenomeRegion('Integrase', [4785, 5666])
158+
query_region = GenomeRegion('Integrase', [4785, 5666])
159+
query_region.set_pos_from_cds(ref_region)
159160
expected = [1, 882]
160-
result = region.rel_pos['CDS']
161+
result = query_region.cds_offset
161162
self.assertEqual(expected, result)
162163

163164
def testGagfromP1Start(self):
164165
region = GenomeRegion('Gag', [2086, 2133])
165166
region.set_pos_from_cds([2086, 2133])
166167
expected = [1297, 1344]
167-
result = region.rel_pos['CDS']
168+
result = region.cds_offset
168169
self.assertEqual(expected, result)
169170

170171
def testPolFromP1Start(self):
171172
region = GenomeRegion('Pol', [2086, 2133])
172173
region.set_pos_from_cds([2086, 2133])
173174
expected = [2, 49]
174-
result = region.rel_pos['CDS']
175+
result = region.cds_offset
175176
self.assertEqual(expected, result)
176177

177178

@@ -194,14 +195,14 @@ def testCapsidFromAAStart(self):
194195
'PKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPAATLEEMMTACQGVGGPGHKARVL')
195196
region.set_pos_from_pstart()
196197
expected = [1, 231]
197-
result = region.rel_pos['pstart']
198+
result = region.pstart
198199
self.assertEqual(expected, result)
199200

200201
def testLTR5FromAAStart(self):
201202
region = GenomeRegion('5\'LTR')
202203
region.set_pos_from_pstart()
203204
expected = []
204-
result = region.rel_pos['pstart']
205+
result = region.pstart
205206
self.assertEqual(expected, result)
206207

207208
def testGagFromAAStart(self):
@@ -212,7 +213,7 @@ def testGagFromAAStart(self):
212213
[450, 500], 'QSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ')
213214
region.set_pos_from_pstart()
214215
expected = [450, 500]
215-
result = region.rel_pos['pstart']
216+
result = region.pstart
216217
self.assertEqual(expected, result)
217218

218219

@@ -278,7 +279,6 @@ class TestProtCoordsFromNuclCoords(unittest.TestCase):
278279
def testSimpleUse(self):
279280
region = GenomeRegion('Gag', [2086, 2133], 'TTTTTAGGGAAGATCTGGCCTTCCTACAAGGGAAGGCCAGGGAATTTT', None, None)
280281
region.set_pos_from_cds([2086, 2133])
281-
print(region.rel_pos['CDS'])
282282
region.set_pcoords_from_ncoords()
283283
expected = [433, 448]
284284
self.assertEqual(expected, region.pcoords)
@@ -349,7 +349,7 @@ def testSIVInputCoords(self):
349349
aa_coords = configs[3]
350350

351351
with open(nt_coords) as ncoords, open(aa_coords) as pcoords:
352-
result = set_regions('nucl', ncoords, ref_nt_seq, pcoords, ref_aa_seq)
352+
result = make_regions(ncoords, ref_nt_seq, pcoords, ref_aa_seq)
353353

354354
for i, reg in enumerate(result):
355355
self.assertEqual(list(region_names.keys())[i], reg.region_name)
@@ -430,7 +430,7 @@ def testHIVInputCoords(self):
430430
aa_coords = configs[3]
431431

432432
with open(nt_coords) as ncoords, open(aa_coords) as pcoords:
433-
result = set_regions('prot', ncoords, ref_nt_seq, pcoords, ref_aa_seq)
433+
result = make_regions(ncoords, ref_nt_seq, pcoords, ref_aa_seq)
434434

435435
for i, reg in enumerate(result):
436436
self.assertEqual(list(region_names.keys())[i], reg.region_name)
@@ -713,7 +713,7 @@ def testNuclAlignment(self):
713713
'CCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCT'
714714
'TTGGCAACGACCCCTCGTCACAATAA']
715715
expected = [(2133, 2292)]
716-
result = get_region_coordinates(aln)
716+
result = query_region_coordinates(aln)
717717
self.assertEqual(expected, result)
718718

719719
def testProtAlignment(self):
@@ -724,7 +724,7 @@ def testProtAlignment(self):
724724
'-------------------------------------------------------------------------------------------------------'
725725
'----------------------------------------------------------------------------------------']
726726
expected = [(0, 132)]
727-
result = get_region_coordinates(aln)
727+
result = query_region_coordinates(aln)
728728
self.assertEqual(expected, result)
729729

730730

@@ -854,7 +854,7 @@ def testDefaultInput(self):
854854
aa_coords = configs[3]
855855

856856
with open(nt_coords) as ncoords, open(aa_coords) as pcoords:
857-
ref_regions = set_regions('nucl', ncoords, ref_nt_seq, pcoords, ref_aa_seq)
857+
ref_regions = make_regions(ncoords, ref_nt_seq, pcoords, ref_aa_seq)
858858

859859
result = retrieve('hiv', 'nucl', ref_regions, 'p2')
860860
query_region = result[0]
@@ -898,9 +898,9 @@ def testSIVInput(self):
898898
aa_coords = configs[3]
899899

900900
with open(nt_coords) as ncoords, open(aa_coords) as pcoords:
901-
ref_regions = set_regions('nucl', ncoords, ref_nt_seq, pcoords, ref_aa_seq)
901+
ref_regions = make_regions(ncoords, ref_nt_seq, pcoords, ref_aa_seq)
902902

903-
result = retrieve('nucl', ref_regions, 'Nef', 20, 80)
903+
result = retrieve('nucl', 'Nef', 20, 80)
904904
result_region = result[0]
905905
expected_region = 'Nef'
906906
expected_seq = 'TGAGGCGGTCCAGGCCGTCTGGAGATCTGCGACAGAGACTCTTGCGGGCGCGTGGGGAGAC'
@@ -1078,15 +1078,15 @@ def testSimpleNuclEquiv(self):
10781078
aa_coords = configs[3]
10791079

10801080
with open(nt_coords) as ncoords, open(aa_coords) as pcoords:
1081-
ref_regions = set_regions('prot', ncoords, ref_nt_seq, pcoords, ref_aa_seq)
1081+
ref_regions = make_regions(ncoords, ref_nt_seq, pcoords, ref_aa_seq)
10821082
for region in ref_regions:
10831083
region.make_codon_aln()
10841084

10851085
query_region = GenomeRegion('Gag', [2133, 2292], 'GCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATG',
10861086
[364, 377], 'AEAMSQVTNSATIM')
10871087

10881088
expected = 'GCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATG'
1089-
result = set_nucleotide_equivalents(query_region, ref_regions)
1089+
result = set_nucleotide_equivalents(query_region)
10901090
self.assertEqual(expected, result)
10911091

10921092
def testWithGaps(self):
@@ -1095,7 +1095,7 @@ def testWithGaps(self):
10951095
region.make_codon_aln()
10961096
q_region = GenomeRegion('test1', [12, 27], 'TTAAACCCGGGTTTA', [1, 5], 'LNPGL')
10971097
expected = 'TTAAACCCGGGTTTA'
1098-
result = set_nucleotide_equivalents(q_region, ref_regions)
1098+
result = set_nucleotide_equivalents(q_region)
10991099
self.assertEqual(expected, result)
11001100

11011101

@@ -1111,15 +1111,15 @@ def testHIV(self):
11111111
aa_coords = configs[3]
11121112

11131113
with open(nt_coords) as ncoords, open(aa_coords) as pcoords:
1114-
ref_regions = set_regions('nucl', ncoords, ref_nt_seq, pcoords, ref_aa_seq)
1114+
ref_regions = make_regions(ncoords, ref_nt_seq, pcoords, ref_aa_seq)
11151115

11161116
for reg in ref_regions:
11171117
reg.make_codon_aln()
11181118

11191119
query_region = GenomeRegion('Gag', [2133, 2292], 'GCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATG',
11201120
[364, 377], 'AEAMSQVTNSATIM')
11211121
expected = 'AEAMSQVTNSATIM'
1122-
result = set_protein_equivalents(query_region, ref_regions)
1122+
result = set_protein_equivalents(query_region)
11231123
self.assertEqual(expected, result)
11241124

11251125

0 commit comments

Comments
 (0)