Skip to content

Commit 9d174db

Browse files
committed
-Updated test cases and code cleanup (#20 in progress)
1 parent a66f019 commit 9d174db

File tree

2 files changed

+52
-22
lines changed

2 files changed

+52
-22
lines changed

poplars/sequence_locator.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -225,15 +225,17 @@ def set_regions(virus, base, nt_coords, nt_seq, aa_coords, aa_seq):
225225
genome_regions.append(seq_region)
226226

227227
# Parse protein coordinates file
228+
prot_names = []
228229
prot_coords = []
229230
for aa_line in aa_coords:
230231
aa_line = aa_line.strip()
231232
aa_line = aa_line.split(',')
233+
prot_names.append(aa_line[0])
232234
prot_coords.append([int(aa_line[1]), int(aa_line[2])])
233235

234236
for i, coords in enumerate(prot_coords):
235237
for seq_region in genome_regions:
236-
if aa_seq[i][0].startswith(seq_region.region_name):
238+
if prot_names[i].startswith(seq_region.region_name):
237239
# Set global and local protein coordinates
238240
seq_region.set_coords(coords, 'prot')
239241
seq_region.set_sequence(aa_seq[i][1], 'prot')
@@ -716,6 +718,7 @@ def retrieve(virus, base, ref_regions, region, qstart=1, qend='end'):
716718
"""
717719

718720
query_region = None
721+
overlap_regions = {}
719722
for ref_region in ref_regions:
720723

721724
if ref_region.region_name == region:
@@ -757,7 +760,6 @@ def retrieve(virus, base, ref_regions, region, qstart=1, qend='end'):
757760
retrieved_regions = find_matches(virus, base, ref_regions, [query_region.get_coords(base)])
758761

759762
# Remove duplicated retrieved region
760-
overlap_regions = {}
761763
for key in retrieved_regions:
762764
if retrieved_regions[key].region_name != region:
763765
overlap_regions[key] = retrieved_regions[key]

poplars/tests/test_sequence_locator.py

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -797,8 +797,27 @@ def testNuclMatches(self):
797797

798798
coordinates = [(2133, 2292)]
799799
result = find_matches('hiv', 'nucl', ref_regions, coordinates)
800-
region_names = ['Gag', 'p6']
801-
self.assertListEqual(region_names, list(result.keys()))
800+
801+
exp_region_names = ['Gag', 'p6']
802+
exp_pos_from_cds = [[1344, 1504], [1, 159]]
803+
exp_pos_from_qstart = [[1, 160], [2, 160]]
804+
exp_pos_from_gstart = [[2133, 2292], [2134, 2292]]
805+
exp_pos_from_pstart = [[449, 501], [1, 53]]
806+
exp_aa_seq = ['XLQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ',
807+
'LQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ']
808+
exp_nt_seq = ['TCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGA'
809+
'CAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA',
810+
'TCTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAACTCCCCCTCAGAAGCAGGAGCCGATAGA'
811+
'CAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTGGCAACGACCCCTCGTCACAATAA']
812+
813+
self.assertListEqual(exp_region_names, list(result.keys()))
814+
for i, (key, value) in enumerate(result.items()):
815+
self.assertEqual(exp_pos_from_cds[i], value.rel_pos['CDS'])
816+
self.assertEqual(exp_pos_from_qstart[i], value.rel_pos['qstart'])
817+
self.assertEqual(exp_pos_from_gstart[i], value.rel_pos['gstart'])
818+
self.assertEqual(exp_pos_from_pstart[i], value.rel_pos['pstart'])
819+
self.assertEqual(exp_aa_seq[i], value.get_sequence('prot'))
820+
self.assertEqual(exp_nt_seq[i], value.get_sequence('nucl'))
802821

803822

804823
class TestRetrieve(InputTestCase):
@@ -813,7 +832,7 @@ def testDefaultInput(self):
813832
aa_coords = configs[3]
814833

815834
with open(nt_coords) as ncoords, open(aa_coords) as pcoords:
816-
ref_regions = set_regions('hiv', 'nucl', ncoords, self.hiv_nt_seq, pcoords, self.hiv_aa_seq)
835+
ref_regions = set_regions('hiv', 'nucl', ncoords, ref_nt_seq, pcoords, ref_aa_seq)
817836

818837
result = retrieve('hiv', 'nucl', ref_regions, 'p2')
819838
query_region = result[0]
@@ -830,17 +849,26 @@ def testDefaultInput(self):
830849
exp_pos_from_pstart = [[364, 377], [1, 14]]
831850
expected_proteins = ['AEAMSQVTNSATIM', 'AEAMSQVTNSATIM']
832851

833-
for i, region in enumerate(overlap_regions):
834-
self.assertEqual(exp_region_names[i], overlap_regions[i].region_name)
835-
# self.assertEqual(exp_pos_from_cds[i], overlap_regions[i].pos_from_cds)
836-
# self.assertEqual(exp_pos_from_qstart[i], overlap_regions[i].pos_from_qstart)
837-
# self.assertEqual(exp_pos_from_gstart[i], overlap_regions[i].pos_from_gstart)
838-
# self.assertEqual(exp_pos_from_pstart[i], overlap_regions[i].pos_from_pstart)
839-
# self.assertEqual(expected_proteins[i], overlap_regions[i].get_sequence('prot'))
852+
for i, (k, v) in enumerate(overlap_regions.items()):
853+
self.assertListEqual(list(overlap_regions.keys()), exp_region_names)
854+
self.assertEqual(exp_pos_from_cds[i], v.rel_pos['CDS'])
855+
self.assertEqual(exp_pos_from_qstart[i], v.rel_pos['qstart'])
856+
self.assertEqual(exp_pos_from_gstart[i], v.rel_pos['gstart'])
857+
self.assertEqual(exp_pos_from_pstart[i], v.rel_pos['pstart'])
858+
self.assertEqual(expected_proteins[i], v.get_sequence('prot'))
840859

841860
def testSIVInput(self):
842-
ref_regions = set_regions('siv', 'nucl', self.siv_nt_seq, self.siv_ncoords_path,
843-
self.siv_aa_seq, self.siv_pcoords_path)
861+
862+
configs = handle_args('siv', 'nucl', self.siv_nt_seq_path, self.siv_ncoords_path,
863+
self.siv_aa_seq_path, self.siv_pcoords_path)
864+
865+
ref_nt_seq = configs[0][0][1]
866+
ref_aa_seq = configs[1]
867+
nt_coords = configs[2]
868+
aa_coords = configs[3]
869+
870+
with open(nt_coords) as ncoords, open(aa_coords) as pcoords:
871+
ref_regions = set_regions('siv', 'nucl', ncoords, ref_nt_seq, pcoords, ref_aa_seq)
844872

845873
result = retrieve('siv', 'nucl', ref_regions, 'Nef', 20, 80)
846874
result_region = result[0]
@@ -849,21 +877,21 @@ def testSIVInput(self):
849877
self.assertEqual(expected_region, result_region.region_name)
850878
self.assertEqual(expected_seq, result_region.get_sequence('nucl'))
851879

852-
found_regions = result[1]
880+
overlap_regions = result[1]
853881
exp_region_names = ['Env(gp160)', 'gp41', 'Nef']
854882
exp_pos_from_cds = [[2493, 2553], [918, 978], [20, 80]]
855883
exp_pos_from_qstart = [[1, 61], [1, 61], [1, 61]]
856884
exp_pos_from_gstart = [[9096, 9156], [9096, 9156], [9096, 9156]]
857885
exp_pos_from_pstart = [[832, 851], [307, 326], [7, 27]]
858886
expected_proteins = ['VWRSATETLAGAWGD', 'VWRSATETLAGAWGD', 'SGDLRQRLLRARGE']
859887

860-
for i in range(len(found_regions)):
861-
self.assertEqual(exp_region_names[i], found_regions[i].region_name)
862-
self.assertEqual(exp_pos_from_cds[i], found_regions[i].pos_from_cds)
863-
self.assertEqual(exp_pos_from_qstart[i], found_regions[i].pos_from_qstart)
864-
self.assertEqual(exp_pos_from_gstart[i], found_regions[i].pos_from_gstart)
865-
self.assertEqual(exp_pos_from_pstart[i], found_regions[i].pos_from_pstart)
866-
self.assertEqual(expected_proteins[i], found_regions[i].get_sequence('prot'))
888+
for i, (k, v) in enumerate(overlap_regions.items()):
889+
self.assertListEqual(list(overlap_regions.keys()), exp_region_names)
890+
self.assertEqual(exp_pos_from_cds[i], v.rel_pos['CDS'])
891+
self.assertEqual(exp_pos_from_qstart[i], v.rel_pos['qstart'])
892+
self.assertEqual(exp_pos_from_gstart[i], v.rel_pos['gstart'])
893+
self.assertEqual(exp_pos_from_pstart[i], v.rel_pos['pstart'])
894+
self.assertEqual(expected_proteins[i], v.get_sequence('prot'))
867895

868896

869897
class TestHandleArgs(InputTestCase):

0 commit comments

Comments
 (0)