Skip to content

Commit 454473c

Browse files
committed
-Code clean up for sequence_locator unit tests
-Modifying sequence_locator to store global and local coordinates (#23, #20)
1 parent 40aa56a commit 454473c

File tree

2 files changed

+133
-210
lines changed

2 files changed

+133
-210
lines changed

poplars/sequence_locator.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,8 @@ def local_to_global_index(self, local_pair, base):
170170
"""
171171
Converts a pair of local indices (relative to the region of interest) to global indices
172172
"""
173-
start = local_pair[0] + self.get_global_coords(base)[0] - 1
174-
end = self.get_global_coords(base)[0] + local_pair[1] - 1
173+
start = local_pair[0] + self.get_local_coords(base)[0] - 1
174+
end = self.get_local_coords(base)[0] + local_pair[1] - 1
175175
global_pair = [start, end]
176176
return global_pair
177177

@@ -511,12 +511,12 @@ def set_nucleotide_equivalents(query_reg, ref_regions):
511511
nt_equiv = None
512512
for ref_reg in ref_regions:
513513
if ref_reg.region_name == query_reg.region_name and ref_reg.codon_aln is not None:
514-
if query_reg.nt_coords is not None:
514+
if query_reg.global_ncoords is not None:
515515
query_reg.make_codon_aln()
516516
regex = re.compile(query_reg.codon_aln)
517517
coords = regex.search(ref_reg.codon_aln).span()
518518
nt_equiv = ref_reg.nt_seq[coords[0]: coords[1]]
519-
query_reg.set_nt_seq(nt_equiv)
519+
query_reg.set_sequence('nucl', nt_equiv)
520520

521521
return nt_equiv
522522

@@ -602,44 +602,43 @@ def output(query_regions, outfile=None):
602602
.format(reg.rel_pos['rstart'][0], reg.rel_pos['rstart'][1]))
603603

604604

605-
def retrieve(virus, base, ref_regions, region, outfile=None, local_start=1, local_end='end'):
605+
def retrieve(virus, base, ref_regions, region, outfile=None, qstart=1, qend='end'):
606606
"""
607607
Retrieves a sequence given its coordinates
608608
:param virus: The organism (HIV or SIV)
609609
:param base: The base of the sequence (nucleotide or protein)
610610
:param ref_regions: A list of GenomeRegion objects
611611
:param region: The genomic region
612612
:param outfile: The file stream of the output file
613-
:param local_start: <option> The start coordinate of the query region (given as local coordinate)
614-
:param local_end: <option> The end coordinate of the query region (given as local coordinate)
613+
:param qstart: <option> The start coordinate of the query region (given as local coordinate)
614+
:param qend: <option> The end coordinate of the query region (given as local coordinate)
615615
:return: The genomic region defined by the starting and ending coordinates
616616
"""
617617

618618
query_region = None
619619
for ref_region in ref_regions:
620-
sequence_range = ref_region.get_local_coords(base)
621-
region_start, region_end = sequence_range[0], sequence_range[1]
622-
length = region_end - region_start
623-
624-
if local_end == 'end':
625-
local_end = length
626620

627621
if ref_region.region_name == region:
622+
sequence_range = ref_region.get_local_coords(base)
623+
region_start, region_end = sequence_range[0], sequence_range[1]
624+
625+
if qend == 'end':
626+
qend = region_end - region_start
628627

629-
if local_start != region_start or local_end > region_end:
628+
if qstart < region_start or qend > region_end:
630629
print("Invalid {} coordinates: {} to {}.\nValid range: {} to {}"
631-
.format(region, local_start, local_end, region_start, region_end))
630+
.format(region, qstart, qend, region_start, region_end))
632631
sys.exit(0)
633632

634633
query_region = GenomeRegion(region)
635634

636635
# Set local and global coordinates
637-
query_region.set_local_coords([local_start, local_end], base)
638-
global_coords = query_region.local_to_global_index([local_start, local_end], base)
636+
query_region.set_local_coords([qstart, qend], base)
637+
global_coords = query_region.local_to_global_index([qstart, qend], base)
639638
query_region.set_global_coords(global_coords, base)
640639

641640
# Set sequences protein and nucleotide sequences
642-
seq = ref_region.get_sequence(base)[local_start: local_end + 1]
641+
seq = ref_region.get_sequence(base)[qstart: qend + 1]
643642
query_region.set_sequence(seq, base)
644643

645644
# Set equivalent sequence

0 commit comments

Comments
 (0)