多字符串查找算法:kmp与step
1,计算kmp失败回退函数:
public static int[] getKmpFail(String pattern) { if (!hasLength(pattern)) throw new IllegalArgumentException("null or empty pattern is not allowed to get kmp fail array."); int i, j, len = pattern.length(); int[] fail = new int[len]; fail[0] = -1; for (j = 1; j < len; j++) { i = fail[j - 1]; while ((pattern.charAt(j) != pattern.charAt(i + 1)) && (i >= 0)) { i = fail[i]; } if (pattern.charAt(j) == pattern.charAt(i + 1)) { fail[j] = i + 1; } else { fail[j] = -1; } } return fail; }
2,查找子串:
public static int kmpIndexOf(String source, String pattern, int from, int[] fail, int to) { int i = from, j = 0, lenp = pattern.length(); while ((i < to) && (j < lenp)) { if (source.charAt(i) == pattern.charAt(j)) { i++; j++; } else { if (j == 0) { i++; } else { j = fail[j - 1] + 1; } } } return (j == lenp) ? (i - lenp) : -1; }
3,kmp多字符串查找:
public static int[] kmpFirstMatch(String source, String[] patterns, int from, int[][] fails) { int[] indices = { -1, -1 }; int to = source.length(); for (int i = 0; i < patterns.length; i++) {//多次遍历source查找patterns[i] if(to - from < patterns[i].length()) continue; int index = kmpIndexOf(source, patterns[i], from, fails[i], to); if (index != -1 && (index < indices[0] || indices[0] == -1)) { indices[0] = to = index;//缩小范围,to之后的不用查找了 indices[1] = patterns[i].length();//返回目标索引和长度 to++;//if(from == index) break;上面的continue更优化 } } return indices; }
4,step多字符串查找:
public static int[] stepFirstMatch(char[] sources, int from, char[][] chars) { int[] indices = { -1, -1 }; for (int i = from; i < sources.length; i++) {//从头至尾依次查找 for (int j = 0; j < chars.length; j++) { if (sources[i] == chars[j][0]) { int k = 1; while (k < chars[j].length && i + k < sources.length && sources[i + k] == chars[j][k]) k++; if (k == chars[j].length) { indices[0] = i; indices[1] = k; return indices;//找到就立即返回 } } } } return indices; }
step比kmp方式要快一点,大概是基本类型上占些优势。
找到stepFirstMatch的一个合适用法:生成随机充值码时排出相似字符串!
//检查相似字符不能同时出现,生成的随机码串调用checkNoSimilarPairs(randomString)即可。 private static String[] similarPairsString = {"0O", "1I", "2Z", "VY"}; private static char[][] similarPairs = null; static { similarPairs = new char[similarPairsString.length*2][]; for(int i = 0; i < similarPairsString.length; i++) { similarPairs[2*i] = similarPairsString[i].toCharArray(); similarPairs[2*i + 1] = new StringBuilder(similarPairsString[i]).reverse().toString().toCharArray(); } } private boolean checkNoSimilarPairs(String source) { return stepFirstMatch(source.toCharArray(), 0, similarPairs)[0] == -1; }