-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathlongest-duplicate-substring.py
More file actions
137 lines (99 loc) · 4.18 KB
/
longest-duplicate-substring.py
File metadata and controls
137 lines (99 loc) · 4.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from typing import List
from collections import defaultdict
from functools import reduce
class Solution:
def longestDupSubstring(self, S: str) -> str:
class RollingHash:
def __init__(self, nums: List[int]) -> None:
self._hash = 0
self._mod = 2 << 63 - 1
self._pow = 26 ** (len(nums) - 1) % self._mod
for num in nums:
self.append(num)
def append(self, num: int) -> None:
self._hash = (self._hash * 26 + num) % self._mod
def popleft(self, num: int) -> None:
self._hash -= self._pow * num
def __hash__(self) -> int:
return self._hash
def check_matches(nums: List[int], size: int) -> int:
rolling_hash = RollingHash(nums[:size])
seen = defaultdict(list)
for pos in range(1, len(nums) - size + 1):
seen[hash(rolling_hash)].append(pos - 1)
rolling_hash.popleft(nums[pos - 1])
rolling_hash.append(nums[pos + size - 1])
if hash(rolling_hash) in seen:
# Double check, if we have a collision.
# In this task we don't have collisions, so this just
# increasing running time
for prev_pos in seen[hash(rolling_hash)]:
if S[prev_pos : prev_pos + size] == S[pos : pos + size]:
return pos
return -1
def bisect(S: str) -> str:
left, right = 0, len(S)
start, size = 0, 0
nums = [ord(c) - ord("a") for c in S]
while (middle := (left + right) // 2) != left:
if (match := check_matches(nums, middle)) != -1:
left = middle
start, size = match, middle
else:
right = middle
return S[start : start + size]
return bisect(S)
def longestDupSubstringFast(self, S: str) -> str:
mod = 2 ** 63 - 1
def check_matches(nums: List[int], size: int) -> int:
rolling_hash = reduce(lambda x, y: (x * 26 + y) % mod, nums[:size], 0)
most_significant = (26 ** size) % mod
seen = {rolling_hash}
for pos in range(1, len(nums) - size + 1):
rolling_hash = (
rolling_hash * 26
- most_significant * nums[pos - 1]
+ nums[pos + size - 1]
) % mod
if rolling_hash in seen:
return pos
seen.add(rolling_hash)
return -1
def bisect(S: str) -> str:
left, right = 0, len(S) - 1
start, size = 0, 0
nums = [ord(c) - ord("a") for c in S]
while (middle := (left + right) // 2) != left:
if (match := check_matches(nums, middle)) != -1:
left = middle
start, size = match, middle
else:
right = middle
return S[start : start + size]
return bisect(S)
def longestDupSubstringBruteForce(self, S: str) -> str:
char_to_pos = defaultdict(list)
for pos, char in enumerate(S):
char_to_pos[char].append(pos)
def match_from_pos(left, right, S):
match = 0
while left < len(S) and right < len(S) and S[left] == S[right]:
left += 1
right += 1
match += 1
return match
max_match, start, end = 0, 0, 0
for positions in char_to_pos.values():
for left in range(len(positions)):
for right in range(left + 1, len(positions)):
match = match_from_pos(positions[left], positions[right], S)
if match > max_match:
max_match, start, end = (
match,
positions[left],
positions[left] + match,
)
return S[start:end]
class TestSolution:
def setup(self):
self.sol = Solution()