Skip to content

Commit 73fece5

Browse files
committed
-Fixing off-by-one bugs #20
1 parent 3793640 commit 73fece5

File tree

2 files changed

+67
-11
lines changed

2 files changed

+67
-11
lines changed

poplars/sequence_locator.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def set_pos_from_cds(self, region_coords):
7575
if self.ncoords is not None:
7676
if self.region_name != '5\'LTR':
7777
self.rel_pos['CDS'].append((self.ncoords[0] + 1 - region_coords[0]))
78-
self.rel_pos['CDS'].append((self.ncoords[1] + 1 - region_coords[0]) + 1)
78+
self.rel_pos['CDS'].append((self.ncoords[1] + 1 - region_coords[0]))
7979

8080
def set_pos_from_gstart(self):
8181
self.rel_pos['gstart'] = self.ncoords
@@ -161,7 +161,7 @@ def get_overlap(self, region, coord_pair, base):
161161
end = min(local_pair[1] + 1, len(seq))
162162
if base == 'nucl':
163163
overlap.append(seq[start - 1: end + 1])
164-
overlap.append(self.local_to_global_index(region, [start, end], base))
164+
overlap.append(self.local_to_global_index(region, [start, end + 1], base))
165165
else:
166166
overlap.append(seq[start - 1: end - 1])
167167
overlap.append(self.local_to_global_index(region, [start, end - 1], base))
@@ -455,7 +455,7 @@ def find_matches(base, ref_regions, match_coordinates):
455455

456456
for coord in match_coordinates:
457457
start_aln = coord[0]
458-
end_aln = coord[1] - 1 # -1 to account for match.end() in regex
458+
end_aln = coord[1]
459459

460460
for ref_region in ref_regions:
461461
if ref_region.region_name != "Complete":
@@ -504,9 +504,9 @@ def set_protein_equivalents(query_reg, ref_regions):
504504
for ref_reg in ref_regions:
505505
if ref_reg.region_name == query_reg.region_name and ref_reg.region_name not in non_coding:
506506
if ref_reg.codon_aln is not None and query_reg.pcoords is not None:
507-
prot_equiv = ref_reg.codon_aln[query_reg.ncoords[0]: query_reg.ncoords[1]]
507+
prot_equiv = ref_reg.codon_aln[query_reg.pcoords[0]: query_reg.pcoords[1]]
508508
prot_equiv = re.sub('[-]', '', prot_equiv)
509-
query_reg.set_aa_seq(prot_equiv)
509+
query_reg.set_sequence('prot', prot_equiv)
510510

511511
return prot_equiv
512512

@@ -561,7 +561,7 @@ def output_retrieved_region(region, outfile=None):
561561

562562
if region.rel_pos['gstart']:
563563
print("\t\tNucleotide position relative to genome start:\t{} --> {}"
564-
.format(region.rel_pos['gstart'][0] + 1, region.rel_pos['gstart'][1] + 1))
564+
.format(region.rel_pos['gstart'][0], region.rel_pos['gstart'][1]))
565565

566566
if region.rel_pos['pstart']:
567567
print("\t\tAmino acid position relative to protein start:\t{} --> {}"
@@ -642,7 +642,7 @@ def output_overlap(overlap_regions, outfile=None):
642642

643643
if region.rel_pos['gstart']:
644644
print("\t\tNucleotide position relative to genome start:\t{} --> {}"
645-
.format(region.rel_pos['gstart'][0] + 1, region.rel_pos['gstart'][1] + 1))
645+
.format(region.rel_pos['gstart'][0], region.rel_pos['gstart'][1]))
646646

647647
if region.rel_pos['pstart']:
648648
print("\t\tAmino acid position relative to protein start:\t{} --> {}"
@@ -725,7 +725,6 @@ def retrieve(base, ref_regions, region, qstart=1, qend='end'):
725725
query_region = GenomeRegion(region)
726726

727727
# Set local and global coordinates
728-
# query_region.set_coords([qstart, qend], base)
729728
global_coords = GenomeRegion.local_to_global_index(ref_region, [qstart, qend], base)
730729
query_region.set_coords(global_coords, base)
731730

poplars/tests/test_sequence_locator.py

+60-3
Original file line numberDiff line numberDiff line change
@@ -205,13 +205,13 @@ def testGagFromAAStart(self):
205205
class TestMakeCodonAln(unittest.TestCase):
206206

207207
def testSimpleUse(self):
208-
region = GenomeRegion('p6', [2134, 2298], 'CTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAAC'
208+
region = GenomeRegion('p6', [2134, 2292], 'CTTCAGAGCAGACCAGAGCCAACAGCCCCACCAGAAGAGAGCTTCAGGTCTGGGGTAGAGACAACAAC'
209209
'TCCCCCTCAGAAGCAGGAGCCGATAGACAAGGAACTGTATCCTTTAACTTCCCTCAGGTCACTCTTTG'
210210
'GCAACGACCCCTCGTCACAATAA',
211-
[449, 500], 'LQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ')
211+
[449, 500], 'LQSRPEPTAPPEESFRSGVETTTPPQKQEPIDKELYPLTSLRSLFGNDPSSQ*')
212212
region.make_codon_aln()
213213
expected = '-L--Q--S--R--P--E--P--T--A--P--P--E--E--S--F--R--S--G--V--E--T--T--T--P--P--Q-' \
214-
'-K--Q--E--P--I--D--K--E--L--Y--P--L--T--S--L--R--S--L--F--G--N--D--P--S--S--Q-'
214+
'-K--Q--E--P--I--D--K--E--L--Y--P--L--T--S--L--R--S--L--F--G--N--D--P--S--S--Q--*-'
215215
result = region.codon_aln
216216
self.assertEqual(expected, result)
217217

@@ -1060,5 +1060,62 @@ def testInputAllSIV(self):
10601060
self.assertEqual(expected_ref_prot, result_ref_prot)
10611061

10621062

1063+
class TestSetNucleotideEquivalent(InputTestCase):
1064+
1065+
def testSimpleNuclEquiv(self):
1066+
configs = handle_args('hiv', 'prot', self.hiv_nt_seq_path, self.hiv_ncoords_path,
1067+
self.hiv_aa_seq_path, self.hiv_pcoords_path)
1068+
1069+
ref_nt_seq = configs[0][0][1]
1070+
ref_aa_seq = configs[1]
1071+
nt_coords = configs[2]
1072+
aa_coords = configs[3]
1073+
1074+
with open(nt_coords) as ncoords, open(aa_coords) as pcoords:
1075+
ref_regions = set_regions('prot', ncoords, ref_nt_seq, pcoords, ref_aa_seq)
1076+
for region in ref_regions:
1077+
region.make_codon_aln()
1078+
1079+
query_region = GenomeRegion('Gag', [2133, 2292], 'GCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATG',
1080+
[364, 377], 'AEAMSQVTNSATIM')
1081+
1082+
expected = 'GCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATG'
1083+
result = set_nucleotide_equivalents(query_region, ref_regions)
1084+
self.assertEqual(expected, result)
1085+
1086+
def testWithGaps(self):
1087+
ref_regions = [GenomeRegion('test1', [1, 31], 'GGGGGCCCGGGTTTAAACCCGGGTTTAAATTTC', [1, 11], 'GGPGLNPGLNF')]
1088+
for region in ref_regions:
1089+
region.make_codon_aln()
1090+
q_region = GenomeRegion('test1', [12, 27], 'TTAAACCCGGGTTTA', [1, 5], 'LNPGL')
1091+
expected = 'TTAAACCCGGGTTTA'
1092+
result = set_nucleotide_equivalents(q_region, ref_regions)
1093+
self.assertEqual(expected, result)
1094+
1095+
1096+
class TestSetProteinEquivalent(InputTestCase):
1097+
1098+
def testHIV(self):
1099+
configs = handle_args('hiv', 'nucl', self.hiv_nt_seq_path, self.hiv_ncoords_path,
1100+
self.hiv_aa_seq_path, self.hiv_pcoords_path)
1101+
1102+
ref_nt_seq = configs[0][0][1]
1103+
ref_aa_seq = configs[1]
1104+
nt_coords = configs[2]
1105+
aa_coords = configs[3]
1106+
1107+
with open(nt_coords) as ncoords, open(aa_coords) as pcoords:
1108+
ref_regions = set_regions('nucl', ncoords, ref_nt_seq, pcoords, ref_aa_seq)
1109+
1110+
for reg in ref_regions:
1111+
reg.make_codon_aln()
1112+
1113+
query_region = GenomeRegion('Gag', [2133, 2292], 'GCTGAAGCAATGAGCCAAGTAACAAATTCAGCTACCATAATG',
1114+
[364, 377], 'AEAMSQVTNSATIM')
1115+
expected = 'AEAMSQVTNSATIM'
1116+
result = set_protein_equivalents(query_region, ref_regions)
1117+
self.assertEqual(expected, result)
1118+
1119+
10631120
if __name__ == '__main__':
10641121
unittest.main()

0 commit comments

Comments
 (0)