Loading

【LBLD】滑动窗口算法延伸:RABIN KARP 字符匹配算法

滑动窗口算法延伸:RABIN KARP 字符匹配算法

187. 重复的DNA序列

普通方法:

class Solution {
public:
    vector<string> findRepeatedDnaSequences(string s) {
        int n = s.size();
        unordered_set<string> seen;
        unordered_set<string> res;
        
        for (int i = 0; i + 10 <= n; i++) {
            string subStr = s.substr(i, 10);
            if (seen.count(subStr)) {
                res.insert(subStr);
            }
            else {
                seen.insert(subStr);
            }
        }
        return vector<string>(res.begin(), res.end());
    }
};

滑动哈希:

class Solution {
public:
    vector<string> findRepeatedDnaSequences(string s) {
        vector<int> nums(s.size());
        for (int i = 0; i < s.size(); i++) {
            switch (s[i]) {
                case 'A':
                    nums[i] = 0;
                    break;
                case 'C':
                    nums[i] = 1;
                    break;
                case 'G':
                    nums[i] = 2;
                    break;
                case 'T':
                    nums[i] = 3;
                    break;
            }
        }

        unordered_set<int> seen;
        unordered_set<string> res;

        int L = 10;
        int R = 4;
        int RL = pow(R, L - 1);
        int windowHash = 0;

        int left = 0, right = 0;
        while (right < nums.size()) {
            windowHash = nums[right] + windowHash * R;
            right++;

            if (right - left == L) {
                if (seen.count(windowHash)) {
                    res.insert(s.substr(left, 10));
                }
                else {
                    seen.insert(windowHash);
                }
                windowHash = windowHash - RL * nums[left];
                left++;
            }
        }
        return vector<string>(res.begin(), res.end());
    }
};

28. 找出字符串中第一个匹配项的下标

class Solution {
public:
    int strStr(string haystack, string needle) {
        int L = needle.size();
        int Q = 257;
        int R = 256;
        long RL = 1;
        for (int i = 0; i < L - 1; i++) {
            RL = (RL * R) % Q;
        }

        int needleHash = 0;
        for (int i = 0; i < needle.size(); i++) {
            needleHash = ((needleHash * R) % Q + needle[i]) % Q;
        }

        int left = 0, right = 0;
        int windowHash = 0;
        while (right < haystack.size()) {
            windowHash = ((windowHash * R) % Q + haystack[right]) % Q;
            right++;

            cout << haystack.substr(left, right-left) << endl;

            if (right - left == L) {
                if (windowHash == needleHash) {
                    bool res = true;
                    for (int j = 0; j < needle.size(); j++)
                        res = res && (haystack[left+j] == needle[j]);
                    if (res) return left;
                }
                windowHash = (windowHash - (haystack[left] * RL) % Q + Q) % Q;
                left++;
            }
        }
        return -1;
    }
};
posted @ 2023-04-13 20:38  杨谖之  阅读(169)  评论(0编辑  收藏  举报