只是不愿随波逐流 ...|

lidongdongdong~

园龄:2年7个月粉丝:14关注:8

25、字符串匹配 Rabin-Karp 算法

内容来自刘宇波老师算法与数据结构体系课

1、暴力搜索

image

/**
* 子串匹配
*/
public class Bruteforce {
private Bruteforce() {
}
/**
* 暴力搜索 O(|s| * |t|)
*/
public static int bruteforce(String s, String t) {
if (s.length() < t.length()) return -1;
// s[i ... i + t.length - 1] == t ?
for (int i = 0; i + t.length() - 1 < s.length(); i++) {
int j;
for (j = 0; j < t.length(); j++) {
if (s.charAt(i + j) != t.charAt(j)) break;
}
if (j == t.length()) return i;
}
return -1;
}
}

2、改进思路

image
image

3、字符串转哈希思想

3.1、段式回文

1147 - 段式回文

解决

public class LongestDecomposition {
public int longestDecomposition(String text) {
return solve(text, 0, text.length() - 1);
}
/**
* s[left ... right]
*/
private int solve(String s, int left, int right) {
if (left > right) return 0;
for (int l = left, r = right; l < r; l++, r--) {
// s[left ... l] == s[r ... right] ?
if (equal(s, left, l, r, right)) return 2 + solve(s, l + 1, r - 1);
}
return 1;
}
/**
* s[l1 ... r1] == s[l2 ... r2] ?
*/
private boolean equal(String s, int l1, int r1, int l2, int r2) {
for (; l1 <= r1 && l2 <= r2; l1++, l2++) {
if (s.charAt(l1) != s.charAt(l2)) return false;
}
return true;
}
}

优化

利用 Hash 来加速字符串比较

image

public class LongestDecomposition {
private final long MOD = (long) (1e9 + 7);
private long[] pow26;
public int longestDecomposition(String text) {
// pow26[i] = (26 ^ i) % MOD
pow26 = new long[text.length()];
pow26[0] = 1;
for (int i = 1; i < pow26.length; i++) pow26[i] = (pow26[i - 1] * 26) % MOD;
return solve(text, 0, text.length() - 1);
}
/**
* s[left ... right]
*/
private int solve(String s, int left, int right) {
if (left > right) return 0;
int B = 26;
long prevHash = 0;
long postHash = 0;
for (int l = left, r = right; l < r; l++, r--) {
// s[left ... l] == s[r ... right] ? 有可能存在哈希冲突
prevHash = (prevHash * B + (s.charAt(l) - 'a')) % MOD;
postHash = ((s.charAt(r) - 'a') * pow26[right - r] + postHash) % MOD;
if (prevHash == postHash && equal(s, left, l, r, right)) return 2 + solve(s, l + 1, r - 1);
}
return 1;
}
/**
* s[l1 ... r1] == s[l2 ... r2] ?
*/
private boolean equal(String s, int l1, int r1, int l2, int r2) {
for (; l1 <= r1 && l2 <= r2; l1++, l2++) {
if (s.charAt(l1) != s.charAt(l2)) return false;
}
return true;
}
}

3.2、最长快乐前缀

1392 - 最长快乐前缀

解决

public class LongestPrefix {
public String longestPrefix(String s) {
// s[0 ... len - 1] == s[s.length - len ... s.length - 1] ?
for (int len = s.length() - 1; len >= 1; len--) {
if (equal(s, 0, len - 1, s.length() - len, s.length() - 1)) return s.substring(0, len);
}
return "";
}
/**
* s[l1 ... r1] == s[l2 ... r2] ?
*/
private boolean equal(String s, int l1, int r1, int l2, int r2) {
for (; l1 <= r1 && l2 <= r2; l1++, l2++) {
if (s.charAt(l1) != s.charAt(l2)) return false;
}
return true;
}
}

优化

利用 Hash 来加速字符串比较

image

(a + b) % M == (a % M + b % M) % M
(a * b) % M == (a % M * b % M) % M
(a / b) % M != (a % M / b % M) % M
public class LongestPrefix {
private final long MOD = (long) (1e9 + 7);
private long[] pow26;
public String longestPrefix(String s) {
// pow26[i] = (26 ^ i) % MOD
pow26 = new long[s.length()];
pow26[0] = 1;
for (int i = 1; i < pow26.length; i++) pow26[i] = (pow26[i - 1] * 26) % MOD;
// prevHash[i] = hash(s[0 ... i])
long[] prevHash = new long[s.length()];
prevHash[0] = s.charAt(0) - 'a';
for (int i = 1; i < s.length(); i++) {
prevHash[i] = (prevHash[i - 1] * 26 + s.charAt(i) - 'a') % MOD;
}
// postHash[i] = hash(s[i ... s.length - 1])
long[] postHash = new long[s.length()];
postHash[postHash.length - 1] = s.charAt(s.length() - 1) - 'a';
for (int i = postHash.length - 2; i >= 0; i--) {
postHash[i] = ((s.charAt(i) - 'a') * pow26[s.length() - 1 - i] + postHash[i + 1]) % MOD;
}
for (int len = s.length() - 1; len >= 1; len--) {
// s[0 ... len - 1] == s[s.length - len ... s.length - 1] ? 有可能存在哈希冲突
if (prevHash[len - 1] == postHash[s.length() - len] && equal(s, 0, len - 1, s.length() - len, s.length() - 1)) {
return s.substring(0, len);
}
}
return "";
}
/**
* s[l1 ... r1] == s[l2 ... r2] ?
*/
private boolean equal(String s, int l1, int r1, int l2, int r2) {
for (; l1 <= r1 && l2 <= r2; l1++, l2++) {
if (s.charAt(l1) != s.charAt(l2)) return false;
}
return true;
}
}

3.3、重复的 DNA 序列

187 - 重复的 DNA 序列

解决

public class FindRepeatedDnaSequences {
public List<String> findRepeatedDnaSequences(String s) {
HashSet<String> seen = new HashSet<>();
HashSet<String> res = new HashSet<>();
// s[i ... i + 9]
for (int i = 0; i + 9 < s.length(); i++) {
String key = s.substring(i, i + 10);
if (seen.contains(key)) res.add(key);
else seen.add(key);
}
return new ArrayList<>(res);
}
}

优化

利用滚动 Hash 来加速字符串比较

image
image

public class FindRepeatedDnaSequences {
/**
* 滚动哈希, 10 进制
*/
public List<String> findRepeatedDnaSequences(String s) {
if (s.length() <= 10) return new ArrayList<>();
HashSet<Long> seen = new HashSet<>();
HashSet<String> res = new HashSet<>();
int[] map = new int[256];
map['A'] = 1;
map['C'] = 2;
map['G'] = 3;
map['T'] = 4;
long hash = 0;
long ten9 = (long) 1e9;
// hash = (s[0 ... 8])
for (int i = 0; i < 9; i++) hash = hash * 10 + map[s.charAt(i)];
// hash = hash(s[i - 9 ... i])
for (int i = 9; i < s.length(); i++) {
hash = hash * 10 + map[s.charAt(i)];
if (seen.contains(hash)) res.add(s.substring(i - 9, i + 1));
else seen.add(hash);
hash -= map[s.charAt(i - 9)] * ten9;
}
return new ArrayList<>(res);
}
}

4、Rabin-Karp

image

/**
* Rabin-Karp 算法, 滚动哈希思想 O(n)
*/
public class RabinKarp {
private RabinKarp() {
}
public static int rabinKarp(String s, String t) {
if (t.length() == 0) return 0;
if (s.length() < t.length()) return -1;
int B = 256;
long MOD = (long) (1e9 + 7);
long P = 1; // P = B ^ (t.length - 1)
for (int i = 0; i < t.length() - 1; i++) P = P * B % MOD;
// targetHash = hash(t)
long targetHash = 0;
for (int i = 0; i < t.length(); i++) targetHash = (targetHash * B + t.charAt(i)) % MOD;
// curHash = hash(s[0 ... t.length - 2])
long curHash = 0;
for (int i = 0; i < t.length() - 1; i++) curHash = (curHash * B + s.charAt(i)) % MOD;
// hash(s[i - t.length + 1 ... i])
for (int i = t.length() - 1; i < s.length(); i++) {
curHash = (curHash * B + s.charAt(i)) % MOD;
if (curHash == targetHash && equal(s, i - t.length() + 1, i, t)) return i - t.length() + 1;
curHash = (curHash - s.charAt(i - t.length() + 1) * P % MOD + MOD) % MOD; // 注意
}
return -1;
}
/**
* s[l ... r] == t ?
*/
private static boolean equal(String s, int l, int r, String t) {
for (int i = 0; i < t.length(); i++) {
if (s.charAt(l + i) != t.charAt(i)) return false;
}
return true;
}
}

5、复杂度分析

image

posted @   lidongdongdong~  阅读(43)  评论(0编辑  收藏  举报
点击右上角即可分享
微信分享提示
评论
收藏
关注
推荐
深色
回顶
展开