|
| 1 | +from collections import defaultdict |
| 2 | +from algorithm.base import Algorithm |
| 3 | + |
| 4 | +class BoyerMooreHorspool(Algorithm): |
| 5 | + |
| 6 | + def __init__(self, reference): |
| 7 | + spec_characters = '\tEmCGH7' |
| 8 | + reference = reference.translate(reference.maketrans('', '', spec_characters)) |
| 9 | + self.reference = reference |
| 10 | + |
| 11 | + @property |
| 12 | + def name(self): |
| 13 | + return 'Boyer-Moore-Horspool' |
| 14 | + |
| 15 | + def set_candidate(self, candidate, **params): |
| 16 | + self.candidate = candidate |
| 17 | + |
| 18 | + def set_skip_table(self): |
| 19 | + len_reference = len(self.reference) |
| 20 | + len_candidate = len(self.candidate) |
| 21 | + if len_candidate > len_reference: |
| 22 | + print("Error: len candidate > len references {} > {}.".format(len_candidate, len_reference)) |
| 23 | + return -1 |
| 24 | + |
| 25 | + self.table_skip = defaultdict(lambda: len_candidate) |
| 26 | + |
| 27 | + for offset in range(len_candidate - 1): |
| 28 | + self.table_skip[ord(self.candidate[offset])] = len_candidate - offset - 1 |
| 29 | + |
| 30 | + def search(self, multiple_search=False) -> list: |
| 31 | + self.set_skip_table() |
| 32 | + |
| 33 | + len_reference = len(self.reference) |
| 34 | + len_candidate = len(self.candidate) |
| 35 | + |
| 36 | + offset_lst = [] |
| 37 | + offset = len_candidate - 1 |
| 38 | + |
| 39 | + while offset < len_reference: |
| 40 | + j = len_candidate - 1 |
| 41 | + i = offset |
| 42 | + while j >= 0 and self.reference[i] == self.candidate[j]: |
| 43 | + j -= 1 |
| 44 | + i -= 1 |
| 45 | + if j == -1: |
| 46 | + offset_lst.append(i + 1) |
| 47 | + offset += self.table_skip[ord(self.reference[offset])] |
| 48 | + |
| 49 | + return offset_lst |
0 commit comments