Skip to content

Commit d4a7298

Browse files
committed
Suffix array, lcp using kasai
1 parent aaa2bb1 commit d4a7298

File tree

5 files changed

+227
-0
lines changed

5 files changed

+227
-0
lines changed

900-1000q/1044.py

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
'''
2+
Given a string S, consider all duplicated substrings: (contiguous) substrings of S that occur 2 or more times. (The occurrences may overlap.)
3+
4+
Return any duplicated substring that has the longest possible length. (If S does not have a duplicated substring, the answer is "".)
5+
6+
7+
8+
Example 1:
9+
10+
Input: "banana"
11+
Output: "ana"
12+
Example 2:
13+
14+
Input: "abcd"
15+
Output: ""
16+
17+
18+
Note:
19+
20+
2 <= S.length <= 10^5
21+
S consists of lowercase English letters.
22+
'''
23+
24+
class Suffix(object):
25+
def __init__(self):
26+
self.index = 0
27+
self.first_rank = -1
28+
self.adjacent_rank = -1
29+
30+
def __lt__(self, other):
31+
if self.first_rank == other.first_rank:
32+
return self.adjacent_rank < other.adjacent_rank
33+
return self.first_rank < other.first_rank
34+
35+
def create_suffix_array(s):
36+
N = len(s)
37+
suffix_array = []
38+
39+
for index, char in enumerate(s):
40+
suffix_obj = Suffix()
41+
suffix_obj.index = index
42+
suffix_obj.first_rank = ord(char)-ord('a')
43+
suffix_obj.adjacent_rank = ord(s[index+1])-ord('a') if (index+1 < N) else -1
44+
suffix_array.append(suffix_obj)
45+
46+
suffix_array.sort()
47+
48+
no_char = 4
49+
index_map = {}
50+
while no_char < 2*N:
51+
rank = 0
52+
prev_rank, suffix_array[0].first_rank = suffix_array[0].first_rank, rank
53+
index_map[suffix_array[0].index] = 0
54+
55+
for index in range(1, N):
56+
if suffix_array[index].first_rank == prev_rank and suffix_array[index].adjacent_rank == suffix_array[index-1].adjacent_rank:
57+
suffix_array[index].first_rank = rank
58+
else:
59+
rank += 1
60+
prev_rank, suffix_array[index].first_rank = suffix_array[index].first_rank, rank
61+
index_map[suffix_array[index].index] = index
62+
63+
for index in range(N):
64+
adjacent_index = suffix_array[index].index + (no_char/2)
65+
suffix_array[index].adjacent_rank = suffix_array[index_map[adjacent_index]] if adjacent_index < N else -1
66+
67+
suffix_array.sort()
68+
no_char *= 2
69+
70+
return [suffix.index for suffix in suffix_array]
71+
72+
def lcp_w_suffix_str(array, s):
73+
N = len(array)
74+
75+
lcp_array = [0]*N
76+
inv_suffix = [0]*N
77+
78+
for index in range(N):
79+
inv_suffix[array[index]] = index
80+
81+
maxLen = 0
82+
83+
for index in range(N):
84+
if inv_suffix[index] == N-1:
85+
maxLen = 0
86+
continue
87+
88+
index_j = array[inv_suffix[index]+1]
89+
while(index+maxLen < N and index_j+maxLen < N and s[index+maxLen] == s[index_j+maxLen]):
90+
maxLen += 1
91+
92+
lcp_array[inv_suffix[index]] = maxLen
93+
94+
if maxLen > 0:
95+
maxLen -= 1
96+
97+
return lcp_array
98+
99+
100+
class Solution(object):
101+
def longestDupSubstring(self, S):
102+
"""
103+
:type S: str
104+
:rtype: str
105+
"""
106+
suffix_array = create_suffix_array(S)
107+
lcp_array = lcp_w_suffix_str(suffix_array, S)
108+
109+
start, end = 0, 0
110+
111+
for index in range(len(S)):
112+
if lcp_array[index] > end:
113+
end = lcp_array[index]
114+
start = suffix_array[index]
115+
116+
if end == 0:
117+
return ""
118+
# print start, end
119+
return S[start:start+end]

Extra/lcp.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from suffix_array import SuffixArray
2+
3+
class LCP(object):
4+
def __init__(self, s):
5+
self.s = s
6+
self.lcp_array = []
7+
self.suffix_array = SuffixArray(s)
8+
self.suffix_array.create_suffix_array()
9+
10+
def lcp_w_suffix_str(self):
11+
N = len(self.suffix_array.suffix_array)
12+
array = self.suffix_array.suffix_array
13+
14+
self.lcp_array = [0]*N
15+
inv_suffix = [0]*N
16+
17+
for index in range(N):
18+
inv_suffix[array[index].index] = index
19+
20+
maxLen = 0
21+
22+
for index in range(N):
23+
if inv_suffix[index] == N-1:
24+
maxLen = 0
25+
continue
26+
27+
index_j = array[inv_suffix[index]+1].index
28+
while(index+maxLen < N and index_j+maxLen < N and self.s[index+maxLen] == self.s[index_j+maxLen]):
29+
maxLen += 1
30+
31+
self.lcp_array[inv_suffix[index]] = maxLen
32+
33+
if maxLen > 0:
34+
maxLen -= 1
35+
36+
return self.lcp_array
37+
38+
39+
if __name__ == '__main__':
40+
lcp = LCP("banana")
41+
lcp.lcp_w_suffix_str()
42+
print lcp.lcp_array

Extra/suffix_array.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
class Suffix(object):
2+
def __init__(self):
3+
self.index = 0
4+
self.first_rank = -1
5+
self.adjacent_rank = -1
6+
7+
def __lt__(self, other):
8+
if self.first_rank == other.first_rank:
9+
return self.adjacent_rank < other.adjacent_rank
10+
return self.first_rank < other.first_rank
11+
12+
class SuffixArray(object):
13+
def __init__(self, s):
14+
self.s = s
15+
self.suffix_array = []
16+
17+
def print_suffix(self):
18+
for index in range(len(self.s)):
19+
ele = self.suffix_array[index]
20+
print("Suffix index {}, Suffix string {}".format(ele.index, self.s[ele.index:]))
21+
22+
def create_suffix_array(self):
23+
N = len(self.s)
24+
25+
for index, char in enumerate(self.s):
26+
suffix_obj = Suffix()
27+
suffix_obj.index = index
28+
suffix_obj.first_rank = ord(char)-ord('a')
29+
suffix_obj.adjacent_rank = ord(self.s[index+1])-ord('a') if (index+1 < N) else -1
30+
self.suffix_array.append(suffix_obj)
31+
32+
self.suffix_array.sort()
33+
34+
no_char = 4
35+
index_map = {}
36+
while no_char < 2*N:
37+
rank = 0
38+
prev_rank, self.suffix_array[0].first_rank = self.suffix_array[0].first_rank, rank
39+
index_map[self.suffix_array[0].index] = 0
40+
41+
for index in range(1, N):
42+
if self.suffix_array[index].first_rank == prev_rank and self.suffix_array[index].adjacent_rank == self.suffix_array[index-1].adjacent_rank:
43+
self.suffix_array[index].first_rank = rank
44+
else:
45+
rank += 1
46+
prev_rank, self.suffix_array[index].first_rank = self.suffix_array[index].first_rank, rank
47+
index_map[self.suffix_array[index].index] = index
48+
49+
for index in range(N):
50+
adjacent_index = self.suffix_array[index].index + (no_char/2)
51+
self.suffix_array[index].adjacent_rank = self.suffix_array[index_map[adjacent_index]] if adjacent_index < N else -1
52+
53+
self.suffix_array.sort()
54+
no_char *= 2
55+
56+
if __name__ == '__main__':
57+
suffix_array = SuffixArray("banana")
58+
suffix_array.create_suffix_array()
59+
suffix_array.print_suffix()

Extra/suffix_array.pyc

2.52 KB
Binary file not shown.

README.md

+7
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,16 @@ Python solution of problems from [LeetCode](https://leetcode.com/).
44

55
### LeetCode Algorithm
66

7+
##### [Extra](./Extra/)
8+
# | Title | Solution |
9+
|---| ----- | -------- |
10+
|1|Suffix Array|[Python](./Extra/suffix_array.py)|
11+
|2|Longest Common Prefix|[Python](./Extra/lcp.py)|
12+
713
##### [Problems 1000-1100](./1000-1100q/)
814
| # | Title | Solution | Difficulty |
915
|---| ----- | -------- | ---------- |
16+
|1044|[Longest Duplicate Substring](https://leetcode.com/problems/longest-duplicate-substring)|[Python](./1000-1100q/1044.py)|Hard|
1017
|1039|[Minimum Score Triangulation of Polygon](https://leetcode.com/problems/minimum-score-triangulation-of-polygon)|[Python](./1000-1100q/1039.py)|Medium|
1118
|1038|[Binary Search Tree to Greater Sum Tree](https://leetcode.com/problems/binary-search-tree-to-greater-sum-tree/)|[Python](./1000-1100q/1038.py)|Medium|
1219
|1037|[Valid Boomerang](https://leetcode.com/problems/valid-boomerang/)|[Python](./1000-1100q/1037.py)|Easy|

0 commit comments

Comments
 (0)