25、字符串匹配 Rabin-Karp 算法
1、暴力搜索
/**
* 子串匹配
*/
public class Bruteforce {
private Bruteforce() {
}
/**
* 暴力搜索 O(|s| * |t|)
*/
public static int bruteforce(String s, String t) {
if (s.length() < t.length()) return -1;
// s[i ... i + t.length - 1] == t ?
for (int i = 0; i + t.length() - 1 < s.length(); i++) {
int j;
for (j = 0; j < t.length(); j++) {
if (s.charAt(i + j) != t.charAt(j)) break;
}
if (j == t.length()) return i;
}
return -1;
}
}
2、改进思路
3、字符串转哈希思想
3.1、段式回文
解决
public class LongestDecomposition {
public int longestDecomposition(String text) {
return solve(text, 0, text.length() - 1);
}
/**
* s[left ... right]
*/
private int solve(String s, int left, int right) {
if (left > right) return 0;
for (int l = left, r = right; l < r; l++, r--) {
// s[left ... l] == s[r ... right] ?
if (equal(s, left, l, r, right)) return 2 + solve(s, l + 1, r - 1);
}
return 1;
}
/**
* s[l1 ... r1] == s[l2 ... r2] ?
*/
private boolean equal(String s, int l1, int r1, int l2, int r2) {
for (; l1 <= r1 && l2 <= r2; l1++, l2++) {
if (s.charAt(l1) != s.charAt(l2)) return false;
}
return true;
}
}
优化
利用 Hash 来加速字符串比较
public class LongestDecomposition {
private final long MOD = (long) (1e9 + 7);
private long[] pow26;
public int longestDecomposition(String text) {
// pow26[i] = (26 ^ i) % MOD
pow26 = new long[text.length()];
pow26[0] = 1;
for (int i = 1; i < pow26.length; i++) pow26[i] = (pow26[i - 1] * 26) % MOD;
return solve(text, 0, text.length() - 1);
}
/**
* s[left ... right]
*/
private int solve(String s, int left, int right) {
if (left > right) return 0;
int B = 26;
long prevHash = 0;
long postHash = 0;
for (int l = left, r = right; l < r; l++, r--) {
// s[left ... l] == s[r ... right] ? 有可能存在哈希冲突
prevHash = (prevHash * B + (s.charAt(l) - 'a')) % MOD;
postHash = ((s.charAt(r) - 'a') * pow26[right - r] + postHash) % MOD;
if (prevHash == postHash && equal(s, left, l, r, right)) return 2 + solve(s, l + 1, r - 1);
}
return 1;
}
/**
* s[l1 ... r1] == s[l2 ... r2] ?
*/
private boolean equal(String s, int l1, int r1, int l2, int r2) {
for (; l1 <= r1 && l2 <= r2; l1++, l2++) {
if (s.charAt(l1) != s.charAt(l2)) return false;
}
return true;
}
}
3.2、最长快乐前缀
解决
public class LongestPrefix {
public String longestPrefix(String s) {
// s[0 ... len - 1] == s[s.length - len ... s.length - 1] ?
for (int len = s.length() - 1; len >= 1; len--) {
if (equal(s, 0, len - 1, s.length() - len, s.length() - 1)) return s.substring(0, len);
}
return "";
}
/**
* s[l1 ... r1] == s[l2 ... r2] ?
*/
private boolean equal(String s, int l1, int r1, int l2, int r2) {
for (; l1 <= r1 && l2 <= r2; l1++, l2++) {
if (s.charAt(l1) != s.charAt(l2)) return false;
}
return true;
}
}
优化
利用 Hash 来加速字符串比较
(a + b) % M == (a % M + b % M) % M
(a * b) % M == (a % M * b % M) % M
(a / b) % M != (a % M / b % M) % M
public class LongestPrefix {
private final long MOD = (long) (1e9 + 7);
private long[] pow26;
public String longestPrefix(String s) {
// pow26[i] = (26 ^ i) % MOD
pow26 = new long[s.length()];
pow26[0] = 1;
for (int i = 1; i < pow26.length; i++) pow26[i] = (pow26[i - 1] * 26) % MOD;
// prevHash[i] = hash(s[0 ... i])
long[] prevHash = new long[s.length()];
prevHash[0] = s.charAt(0) - 'a';
for (int i = 1; i < s.length(); i++) {
prevHash[i] = (prevHash[i - 1] * 26 + s.charAt(i) - 'a') % MOD;
}
// postHash[i] = hash(s[i ... s.length - 1])
long[] postHash = new long[s.length()];
postHash[postHash.length - 1] = s.charAt(s.length() - 1) - 'a';
for (int i = postHash.length - 2; i >= 0; i--) {
postHash[i] = ((s.charAt(i) - 'a') * pow26[s.length() - 1 - i] + postHash[i + 1]) % MOD;
}
for (int len = s.length() - 1; len >= 1; len--) {
// s[0 ... len - 1] == s[s.length - len ... s.length - 1] ? 有可能存在哈希冲突
if (prevHash[len - 1] == postHash[s.length() - len] && equal(s, 0, len - 1, s.length() - len, s.length() - 1)) {
return s.substring(0, len);
}
}
return "";
}
/**
* s[l1 ... r1] == s[l2 ... r2] ?
*/
private boolean equal(String s, int l1, int r1, int l2, int r2) {
for (; l1 <= r1 && l2 <= r2; l1++, l2++) {
if (s.charAt(l1) != s.charAt(l2)) return false;
}
return true;
}
}
3.3、重复的 DNA 序列
解决
public class FindRepeatedDnaSequences {
public List<String> findRepeatedDnaSequences(String s) {
HashSet<String> seen = new HashSet<>();
HashSet<String> res = new HashSet<>();
// s[i ... i + 9]
for (int i = 0; i + 9 < s.length(); i++) {
String key = s.substring(i, i + 10);
if (seen.contains(key)) res.add(key);
else seen.add(key);
}
return new ArrayList<>(res);
}
}
优化
利用滚动 Hash 来加速字符串比较
public class FindRepeatedDnaSequences {
/**
* 滚动哈希, 10 进制
*/
public List<String> findRepeatedDnaSequences(String s) {
if (s.length() <= 10) return new ArrayList<>();
HashSet<Long> seen = new HashSet<>();
HashSet<String> res = new HashSet<>();
int[] map = new int[256];
map['A'] = 1;
map['C'] = 2;
map['G'] = 3;
map['T'] = 4;
long hash = 0;
long ten9 = (long) 1e9;
// hash = (s[0 ... 8])
for (int i = 0; i < 9; i++) hash = hash * 10 + map[s.charAt(i)];
// hash = hash(s[i - 9 ... i])
for (int i = 9; i < s.length(); i++) {
hash = hash * 10 + map[s.charAt(i)];
if (seen.contains(hash)) res.add(s.substring(i - 9, i + 1));
else seen.add(hash);
hash -= map[s.charAt(i - 9)] * ten9;
}
return new ArrayList<>(res);
}
}
4、Rabin-Karp
/**
* Rabin-Karp 算法, 滚动哈希思想 O(n)
*/
public class RabinKarp {
private RabinKarp() {
}
public static int rabinKarp(String s, String t) {
if (t.length() == 0) return 0;
if (s.length() < t.length()) return -1;
int B = 256;
long MOD = (long) (1e9 + 7);
long P = 1; // P = B ^ (t.length - 1)
for (int i = 0; i < t.length() - 1; i++) P = P * B % MOD;
// targetHash = hash(t)
long targetHash = 0;
for (int i = 0; i < t.length(); i++) targetHash = (targetHash * B + t.charAt(i)) % MOD;
// curHash = hash(s[0 ... t.length - 2])
long curHash = 0;
for (int i = 0; i < t.length() - 1; i++) curHash = (curHash * B + s.charAt(i)) % MOD;
// hash(s[i - t.length + 1 ... i])
for (int i = t.length() - 1; i < s.length(); i++) {
curHash = (curHash * B + s.charAt(i)) % MOD;
if (curHash == targetHash && equal(s, i - t.length() + 1, i, t)) return i - t.length() + 1;
curHash = (curHash - s.charAt(i - t.length() + 1) * P % MOD + MOD) % MOD; // 注意
}
return -1;
}
/**
* s[l ... r] == t ?
*/
private static boolean equal(String s, int l, int r, String t) {
for (int i = 0; i < t.length(); i++) {
if (s.charAt(l + i) != t.charAt(i)) return false;
}
return true;
}
}
5、复杂度分析
本文来自博客园,作者:lidongdongdong~,转载请注明原文链接:https://www.cnblogs.com/lidong422339/p/17325531.html