字符串哈希

  • 字符串哈希就是指一个字符串转化成一个整数,并保证字符串不同,得到的哈希值不同,这样就可以用来判断一个该字串是否重复出现过。
  • 一般采用取一固定值P(P取质数),把字符串看作P进制数,并分配一个大于0的数值,代表每种字符。 一般来说,我们分配的数值都远小于P。例如,对于小写字母构成的字符串,可以令a = 1 , b = 2 , . . . , z = 26 。 a=1,b=2,...,z=26。a=1,b=2,...,z=26。 取一固定值M,求出该P进制数对M的余数(取模),作为该字符串的Hash值。
  • 一般P取P=131或者P=13331,冲突概率低。M通常取M=10E9+7作为模,直接使用**unsigned long long **类型来存储hash值
  • 在出现碰撞的情况下,可以构造两组甚至多组hash值来对比确定是否是同一字符串

链接:https://leetcode-cn.com/problems/repeated-string-match/

class Solution {
public:
    int strStr(string haystack, string needle) {
        int n = haystack.size(), m = needle.size();
        if (m == 0) {
            return 0;
        }

        long long k1 = 1e9 + 7;
        long long k2 = 1337;
        srand((unsigned)time(NULL));
        long long kMod1 = rand() % k1 + k1;
        long long kMod2 = rand() % k2 + k2;

        long long hash_needle = 0;
        for (auto c : needle) {
            hash_needle = (hash_needle * kMod2 + c) % kMod1;
        }
        long long hash_haystack = 0, extra = 1;
        for (int i = 0; i < m - 1; i++) {
            hash_haystack = (hash_haystack * kMod2 + haystack[i % n]) % kMod1;
            extra = (extra * kMod2) % kMod1;
        }
        for (int i = m - 1; (i - m + 1) < n; i++) {
            hash_haystack = (hash_haystack * kMod2 + haystack[i % n]) % kMod1;
            if (hash_haystack == hash_needle) {
                return i - m + 1;
            }
            hash_haystack = (hash_haystack - extra * haystack[(i - m + 1) % n]) % kMod1;
            hash_haystack = (hash_haystack + kMod1) % kMod1;
        }
        return -1;
    }

    int repeatedStringMatch(string a, string b) {
        int an = a.size(), bn = b.size();
        int index = strStr(a, b);
        if (index == -1) {
            return -1;
        }
        if (an - index >= bn) {
            return 1;
        }
        return (bn + index - an - 1) / an + 2;
    }
};

链接:https://leetcode-cn.com/problems/longest-duplicate-substring/

class Solution {
public:
    int n;
    unsigned long long prime = 31;
    string longestDupSubstring(string s) {
        n = s.size();
        int l = 1;
        int r = n - 1;
        int pos = -1;
        int len = 0;

        auto find = [&](int len){
            unsigned long long hash = 0;
            unsigned long long power = 1;
            for (int i = 0; i < len; i++) {
                hash = hash * prime + (s[i] - 'a');
                power *= prime;
            }
            unordered_set<unsigned long long> exist{hash};
            for(int i = len; i < n; i++) {
                hash = hash * prime - power * (s[i-len] - 'a') + (s[i] - 'a');
                if (exist.count(hash)) return (i - len + 1);
                exist.insert(hash);
            }
            return -1;
        };

        while(l <= r) {
            int mid = (l + r) / 2;
            int start = find(mid);
            if (start != -1) {
                len = mid;
                pos = start;
                l = mid + 1;
            } else {
                r = mid - 1;
            }
        }

        if (pos == -1) return "";
        else return s.substr(pos, len);
    }
};
posted @ 2021-12-23 18:36  kitamu  阅读(263)  评论(0编辑  收藏  举报
Live2D