字符串匹配经典问题整理
KMP算法
class Solution: def strStr(self, s: str, pattern: str) -> int: if len(pattern) == 0: return 0 ne = [0] * len(pattern) ne[0], k = -1, 0 for i in range(2, len(pattern)): while k != 0 and pattern[k] != pattern[i -1]: k = ne[k] if pattern[i - 1] == pattern[k]: ne[i] = k + 1 k += 1 j = 0 for i in range(len(s)): while j > 0 and pattern[j] != s[i]: j = ne[j] if s[i] == pattern[j]: j += 1 if j == len(pattern): return i - j + 1 return -1
2、找出数组中的美丽下标 II -- 匹配多个下标
class Solution: def beautifulIndices(self, s: str, a: str, b: str, k: int) -> List[int]: def kmp(s, pattern): if len(pattern) == 0: return 0 ne = [0] * len(pattern) ne[0], k = -1, 0 for i in range(2, len(pattern)): while k != 0 and pattern[k] != pattern[i -1]: k = ne[k] if pattern[i - 1] == pattern[k]: ne[i] = k + 1 k += 1 j, res = 0, [] for i in range(len(s)): while j > 0 and pattern[j] != s[i]: j = ne[j] if s[i] == pattern[j]: j += 1 if j == len(pattern): res.append(i - j + 1) j = j - 1 j = ne[j] while j > 0 and pattern[j] != s[i]: j = ne[j] if s[i] == pattern[j]: j += 1 return res a1, b1 = kmp(s, a), kmp(s, b) res = [] for idx in a1: left = bisect_left(b1, idx - k) right = bisect_right(b1, idx + k) - 1 if left <= right and left < len(b1): res.append(idx) return res
3、将单词恢复初始状态所需的最短时间 II -- 自身匹配
class Solution: def minimumTimeToInitialState(self, word: str, kk: int) -> int: def kmp(needle): if len(needle) == 0: return 0 ne = [0] * len(needle) ne[0], k = -1, 0 for i in range(2, len(needle)): while k != 0 and needle[k] != needle[i -1]: k = ne[k] if needle[i - 1] == needle[k]: ne[i] = k + 1 k += 1 j, i, res = len(needle), len(needle) - 1, -1 while j > 0: if len(needle) - j > 0 and (len(needle) - j) % kk == 0: return (len(needle) - j) // kk j = j - 1 j = ne[j] while j > 0 and needle[j] != needle[i]: j = ne[j] if needle[i] == needle[j]: j += 1 return res res = kmp(word) if res == -1: return len(word) // kk + (len(word) % kk > 0) return res
4、kmp算法不适合匹配通配符,例如替换字符后匹配,正确解法是暴力匹配算法:
class Solution: def matchReplacement(self, s: str, sub: str, m: List[List[str]]) -> bool: d = defaultdict(set) for i, j in m: d[i].add(j) for i in range(len(s) - len(sub) + 1): for j in range(len(sub)): if s[i + j] != sub[j] and s[i + j] not in d[sub[j]]: break else: return True return False
5、树形kmp -- 二叉树中的链表
# Definition for singly-linked list. # class ListNode: # def __init__(self, val=0, next=None): # self.val = val # self.next = next # Definition for a binary tree node. # class TreeNode: # def __init__(self, val=0, left=None, right=None): # self.val = val # self.left = left # self.right = right class Solution: def isSubPath(self, head: ListNode, root: TreeNode) -> bool: p, k = head.next, head head.last = None while p: p.last = head p = p.next p = head.next while p and p.next: while k != head and k.val != p.val: k = k.last if k.val == p.val: p.next.last = k.next k = k.next p = p.next @cache def dfs(p, q): if not q: return True if not p: return False res = False if p.val == q.val: return dfs(p.left, q.next) or dfs(p.right, q.next) if res: return True if q.last: res = dfs(p, q.last) or dfs(p, q.last) if res: return True return dfs(p.left, head) or dfs(p.right, head) if not root: return False return dfs(root, head)
6、无限重复kmp算法 -- 最大重复子字符串
class Solution: def maxRepeating(self, sequence: str, word: str) -> int: ne = defaultdict(int) def build(): ne[0], k, j = -1, 0, 2 c = 1 while True: i = j % len(word) while k != 0 and word[k % len(word)] != word[i -1]: k = ne[k] if word[i - 1] == word[k % len(word)]: ne[j] = k + 1 k += 1 j += 1 if j % len(word) == 0: yield c c += 1 m = build() c = next(m) j, maxl = 0, 0 for i in range(len(sequence)): while j > 0 and word[j % len(word)] != sequence[i]: j = ne[j] if sequence[i] == word[j % len(word)]: j += 1 if j % len(word) == 0: maxl = max(maxl, j // len(word)) if maxl >= c: c = next(m) return maxl
7、重复的子字符串
class Solution: def repeatedSubstringPattern(self, s: str) -> bool: return s in (s+s)[1:-1]
8、旋转字符串
class Solution(object): def rotateString(self, A, B): return len(A) == len(B) and B in A+A
9、kmp+ 数位dp -- 找到所有好字符串
class Solution: def findGoodStrings(self, n: int, s1: str, s2: str, evil: str) -> int: ne = [0] * len(evil) ne[0], k = -1, 0 for i in range(2, len(evil)): while k != 0 and evil[k] != evil[i -1]: k = ne[k] if evil[i - 1] == evil[k]: ne[i] = k + 1 k += 1 @cache def f(s, i: int, is_limit: bool, j) -> int: if j == len(evil): return 0 if i == n: return 1 res = 0 up = (ord(s[i]) - ord('a')) if is_limit else 25 for d in range(0, up + 1): # 枚举要填入的数字 d c = chr(d + ord('a')) nj = j while nj > 0 and evil[nj] != c: nj = ne[nj] if evil[nj] == c: nj += 1 res += f(s, i + 1, is_limit and d == up, nj) return res % (10 ** 9 + 7) return (f(s2, 0, True, 0) - f(s1, 0, True, 0) + (1 if evil not in s1 else 0)) % (10 ** 9 + 7)
10、最长快乐前缀
class Solution: def longestPrefix(self, s: str) -> str: n = len(s) fail = [-1] * n for i in range(1, n): j = fail[i - 1] while j != -1 and s[j + 1] != s[i]: j = fail[j] if s[j + 1] == s[i]: fail[i] = j + 1 return s[:fail[-1] + 1]
class Solution: def removeOccurrences(self, s: str, part: str) -> str: m = len(part) pi1 = [0] * m # part 的前缀数组 # 更新 part 的前缀数组 j = 0 for i in range(1, m): while j > 0 and part[i] != part[j]: j = pi1[j-1] if part[i] == part[j]: j += 1 pi1[i] = j res = [] pi2 = [0] # res 的前缀数组 for ch in s: # 模拟从左至右匹配的过程 res.append(ch) # 更新 res 的前缀数组 j = pi2[-1] while j > 0 and ch != part[j]: j = pi1[j-1] if ch == part[j]: j += 1 pi2.append(j) if j == m: # 如果匹配成功,那么删去对应后缀 pi2[-m:] = [] res[-m:] = [] return "".join(res)
12、扩展kmp(z函数)-- 构造字符串的总得分和
class Solution: def sumScores(self, s: str) -> int: n = len(s) z = [0] * n ans, l, r = n, 0, 0 for i in range(1, n): z[i] = max(min(z[i - l], r - i + 1), 0) while i + z[i] < n and s[z[i]] == s[i + z[i]]: l, r = i, i + z[i] z[i] += 1 ans += z[i] return ans
字典树
class Trie: def __init__(self): self.children = [None] * 26 self.isEnd = False def insert(self, word: str) -> None: node = self for ch in word: ch = ord(ch) - ord("a") if not node.children[ch]: node.children[ch] = Trie() node = node.children[ch] node.isEnd = True def searchPrefix(self, prefix:str): node = self for ch in prefix: ch = ord(ch) - ord("a") if not node.children[ch]: return None node = node.children[ch] return node def search(self, word: str) -> bool: node = self.searchPrefix(word) return node is not None and node.isEnd def startsWith(self, prefix: str) -> bool: return self.searchPrefix(prefix) is not None
class WordDictionary: def __init__(self): """ Initialize your data structure here. """ self.isEnd = False self.ch = [None] * 26 def addWord(self, word: str) -> None: p = self for c in word: c = ord(c) - ord('a') if not p.ch[c]: p.ch[c] = WordDictionary() p = p.ch[c] p.isEnd = True def search(self, word: str) -> bool: return self.searchSub(self, word) def searchSub(self, p, word): for i, c in enumerate(word): if c == '.': res = False for a in p.ch: if a: res = res or self.searchSub(a, word[i + 1:]) return res else: c = ord(c) - ord('a') if not p.ch[c]: return False p = p.ch[c] return p.isEnd # Your WordDictionary object will be instantiated and called as such: # obj = WordDictionary() # obj.addWord(word) # param_2 = obj.search(word)
3、统计前后缀下标对 II -- 双字符字典树
class Trie: def __init__(self): self.child = {} self.count = 0 def add(self, cc): if cc not in self.child: self.child[cc] = Trie() self.child[cc].count += 1 return self.child[cc] def get(self, cc): if cc not in self.child: return None return self.child[cc] class Solution: def countPrefixSuffixPairs(self, words: List[str]) -> int: n = len(words) root = Trie() res = 0 for i in reversed(range(n)): p = root for j, c in enumerate(words[i]): cc = c + words[i][-j - 1] if not p: break p = p.get(cc) if p: res += p.count p = root for j, c in enumerate(words[i]): p = p.add(c + words[i][-j - 1]) return res
class Solution: def countDistinct(self, nums: List[int], k: int, p: int) -> int: ne = [{}] def get(): ne.append({}) return len(ne) - 1 for i in range(len(nums)): cnt, node = 0, 0 for j in range(i, len(nums)): if nums[j] % p == 0: cnt += 1 if cnt > k: break if nums[j] not in ne[node]: ne[node][nums[j]] = get() node = ne[node][nums[j]] return len(ne) - 1
5、数组中两个数的最大异或值 -- 异或反向匹配
class Tire: def __init__(self): self.ch = [None] * 2 def build(self, nums): for num in nums: p = self for i in range(31, -1, -1): t = (num >> i) & 1 if not p.ch[t]: p.ch[t] = Tire() p = p.ch[t] class Solution: def findMaximumXOR(self, nums: List[int]) -> int: t = Tire() t.build(nums) max_n = 0 for num in nums: p = t xor = 0 for i in range(31, -1, -1): n = (num >> i) & 1 m = not n if not p.ch[m]: p = p.ch[n] xor = xor << 1 else: p = p.ch[m] xor = xor << 1 | 1 max_n = max(max_n, xor) return max_n
字符串哈希
P = 31 MOD = 10 ** 9 + 7 class Solution: def findAnswer(self, parent: List[int], s: str) -> List[bool]: m = defaultdict(list) for i, p in enumerate(parent): if p > -1: m[p].append(i) res = [False] * len(parent) def dfs(p): r1, r2, retf, retb, le = "", "", 0, 0, 0 for k in m[p]: t1, t2, f, b, l = dfs(k) r1 += t1 r2 = t2 + r2 retf = (retf * pow(P, l, MOD) + f) % MOD retb = (retb + b * pow(P, le, MOD)) % MOD le += l else: r1 += s[p] r2 = s[p] + r2 retf = (retf * pow(P, 1, MOD) + (ord(s[p]) - ord('a'))) % MOD retb = (retb + (ord(s[p]) - ord('a')) * pow(P, le, MOD)) % MOD le += 1 res[p] = (retf == retb and r1 == r2) return r1, r2, retf, retb, le dfs(0) return res