Skip to content

Commit e4af71b

Browse files
committed
-Refactoring and fixing off-by-one errors (#20)
1 parent 7127d48 commit e4af71b

File tree

1 file changed

+33
-27
lines changed

1 file changed

+33
-27
lines changed

poplars/sequence_locator.py

+33-27
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ def find_overlap(self, base, q_coords):
9696
:param q_coords: the coordinates of the query region (global coordinates)
9797
:return overlap: A QueryRegion object that represents the where the query overlaps with the reference region
9898
"""
99-
start, end = 0, 0
10099
ref_coords = self.get_coords(base) # Coordinates are global coordinates
101100

102101
# If the ref_coords are in the range of the query region and the q_coords are in the range of the ref region
@@ -114,26 +113,34 @@ def find_overlap(self, base, q_coords):
114113
else:
115114
start = q_coords[0]
116115

117-
overlap = QueryRegion(self.region_name, self, base, self.genome)
116+
overlap = QueryRegion(self.region_name, self, base, self.genome)
117+
118+
# Set the nucleotide and protein regions
119+
if base == 'nucl':
120+
overlap.set_coords(q_coords, 'nucl')
121+
overlap.set_nt_seq_from_genome()
122+
prot_overlap = self.set_protein_equivalents(overlap)
123+
overlap.set_pos_from_qstart('nucl')
124+
overlap.set_pos_from_rstart('nucl')
125+
126+
# Set protein coordinates if the overlap is in a coding region
127+
if overlap.region_name not in NON_CODING:
128+
overlap.set_coords(prot_overlap[1], 'prot')
129+
overlap.set_sequence('prot', prot_overlap[0])
130+
print(overlap.aa_seq)
131+
overlap.set_pos_from_gstart()
132+
overlap.set_pos_from_cds()
133+
else:
134+
nucl_overlap = self.set_nucleotide_equivalents(overlap)
135+
overlap.set_coords(q_coords, 'prot')
136+
overlap.set_sequence('prot', self.get_sequence('prot')[start: end])
137+
overlap.set_coords(nucl_overlap[1], 'nucl')
138+
overlap.set_sequence(nucl_overlap[0], 'nucl')
118139

119-
# Set the nucleotide and protein regions
120-
if base == 'nucl':
121-
overlap.set_coords(q_coords, 'nucl')
122-
overlap.set_nt_seq_from_genome()
123-
prot_overlap = self.set_protein_equivalents(overlap)
124-
125-
# Set protein coordinates if the overlap is in a coding region
126-
if overlap.region_name not in NON_CODING:
127-
overlap.set_coords(prot_overlap[1], 'prot')
128-
overlap.set_sequence(prot_overlap[0], 'prot')
129-
else:
130-
nucl_overlap = self.set_nucleotide_equivalents(overlap)
131-
overlap.set_coords(q_coords, 'prot')
132-
overlap.set_sequence('prot', self.get_sequence('prot')[start: end])
133-
overlap.set_coords(nucl_overlap[1], 'nucl')
134-
overlap.set_sequence(nucl_overlap[0], 'nucl')
140+
return overlap # return overlap object not dict
135141

136-
return overlap # return overlap object not dict
142+
else:
143+
return None
137144

138145
def set_protein_equivalents(self, overlap):
139146
"""
@@ -361,10 +368,11 @@ def find_matches(self, base, query_matches):
361368
for q_coords in query_matches:
362369
for name in self.ref_genome_regions:
363370
if name != 'Complete':
364-
365371
# Use coordinates to find which regions overlap with the query region
366372
overlap = self.ref_genome_regions[name].find_overlap(base, q_coords)
367-
query_regions[name] = overlap
373+
374+
if overlap is not None:
375+
query_regions[name] = overlap
368376

369377
return query_regions
370378

@@ -635,8 +643,6 @@ def output_overlap(overlap_regions, outfile=None):
635643

636644
for key in overlap_regions:
637645
region = overlap_regions[key]
638-
if region.region_name.startswith('5\'LTR'):
639-
print("\t3'LTR\n")
640646

641647
print("\nRegion:\t{}".format(region.region_name))
642648
print("\n\tNucleotide Sequence:")
@@ -646,9 +652,9 @@ def output_overlap(overlap_regions, outfile=None):
646652

647653
if region.aa_seq is not None:
648654
print("\n\tProtein Sequence:")
649-
seq_lines = [region.nt_seq[i:i + 60] for i in range(0, len(region.nt_seq), 60)]
655+
seq_lines = [region.nt_seq[i:i + 60] for i in range(0, len(region.aa_seq), 60)]
650656
for line in seq_lines:
651-
print('\t\t{}\n'.format(line))
657+
print('\t\t{}'.format(line))
652658

653659
print("\n\tRelative Positions:")
654660

@@ -682,7 +688,7 @@ def output_overlap(overlap_regions, outfile=None):
682688

683689
if region.aa_seq is not None:
684690
outfile.write("\n\tProtein Sequence:\n")
685-
seq_lines = [region.nt_seq[i:i + 60] for i in range(0, len(region.nt_seq), 60)]
691+
seq_lines = [region.nt_seq[i:i + 60] for i in range(0, len(region.aa_seq), 60)]
686692
for line in seq_lines:
687693
print('\t\t{}'.format(line))
688694

@@ -831,7 +837,7 @@ def main():
831837
# Find indices where the query sequence aligns with the reference sequence
832838
match_coords = ref_genome.query_region_coordinates(alignment[-1]) # Query is the last item
833839
query_regions = ref_genome.find_matches(args.base, match_coords)
834-
# output_overlap(query_regions, args.out)
840+
output_overlap(query_regions, args.out)
835841

836842
else:
837843
valid_in = valid_inputs(args.virus, args.start, args.end, args.region)

0 commit comments

Comments
 (0)