[LeetCode] string整体做hash key,窗口思想复杂度O(n)。附来自LeetCode的4例题(标题有字数限制,写不下所有例题题目 T.T)
例题 1, anagrams (string as key思想)
Given an array of strings, return all groups of strings that are anagrams.
Note: All inputs will be in lower-case.
class Solution { public: vector<string> anagrams(vector<string> &strs) { } };
题意本身并不是很清晰,开始我的代码总是报Output Limit Exceeded,后来搜了相关文章,明白了题目真正要求的输出格式。
For example:
Input: ["tea","and","ate","eat","den"]
Output: ["tea","ate","eat"]
开始,我的思路是,将每一个string 都和其他比较,互为anagram的就记录到vector<string> res中。最后返回res。
写这段代码时,我对输出的理解还存在错误,以为对于所有anagram group,只要将这个group中的第一个放入返回的vector<string>中即可。所以下面代码中,如果res中后面的元素已经判定和res中靠前的string互为anagram,后面的元素会被从res中移除。
class Solution { public: vector<string> anagrams(vector<string> &strs) { vector<string> res; if(strs.size() == 0) return res; dic = new int[26]; for(vector<string>::iterator it = strs.begin(); it < strs.end(); ++it){ res.push_back(*it); } for(int i = 0; i < res.size(); ++i){ for(int j = i+1; j < res.size(); ++j){ initDic(dic, 26, res[i]); int k = 0; for(; k < res[j].length(); ++k){ //判断 res[i] 和res[j] 是否为anagrams dic[res[j][k] - 'a']--; if(dic[res[j][k] - 'a'] < 0) break; } if(k == res[j].length() && judgeDic(dic, 26)){ res.erase(res.begin() + j); //移除和res中的元素互为anagram的 --j; } } } return res; } private: int* dic; void initDic(int* dic, int n, string str){ for(int i = 0; i < n; ++i){ dic[i] = 0; } for(int j = 0; j < str.length(); ++j){ dic[str[j] - 'a']++; } } bool judgeDic(int* dic, int n){ int i = 0; for(; i < n; ++i){ if(dic[i] != 0) break; } return (i == n); } };
原因就在于宏观上的O(n2),应该有优化的余地。Annie Kim's Blog中介绍了空间换时间的做法,即定义一个map<string, int>,然后遍历strs的元素,对于strs中的每一个string s,先将s的内容排序,再将排好序的s当作key。
但是这个思路的缺点在于:因为是将string 排序后本身作为key,因此如果题目增加难度,比如string中包含标点和空格,那么这种方法就不能准确判断两个string是否anagram了。另外,如果string非常长,用来做key也不是很方便。
我结合我自己的思路做了一些修改,修改后的思路中,key不是排完序的string,而是依然利用我开始代码里面的dic[26]:先从头到尾扫一遍string,然后给dic对应位置+1,然后将dic元素本身的排列作为key。这样,(1) 在有空格和标点的情况下,依然可以判断两个string是否是anagram,如果有大写字母或者数字,只需要扩张dic的大小即可;而且Key的长度为定值,这里总是26。(2) 不再需要O(mlogm)的时间复杂度,需要O(m+26) = O(m)的复杂度。
class Solution { public: vector<string> anagrams(vector<string> &strs) { vector<string> res; if(strs.size() == 0) return res; map<string, int> rec; dic = new int[26]; for(int i = 0; i < strs.size(); ++i){ string key = generateKeyByDic(dic, 26, strs[i]); if(rec.find(key) == rec.end()){ rec.insert(make_pair(key, i)); }else{ if(rec[key] >= 0){ res.push_back(strs[rec[key]]); rec[key] = -1; } res.push_back(strs[i]); } } return res; } private: int* dic; string generateKeyByDic(int* dic, int n, string str){ for(int i = 0; i < n; ++i){ dic[i] = 0; } for(int j = 0; j < str.length(); ++j){ if(str[j] <= 'z' && str[j] >= 'a') dic[str[j] - 'a']++; } string key(26, '0'); for(int k = 0; k < 26; ++k){ key[k] = dic[k] + '0'; } return key; } };
100 / 100 test cases passed. Runtime: 224 ms
而是用sorted string做key的方法,数据是 100 / 100 test cases passed. Runtime: 228 ms
时间上并没有提高多少,原因应该是test case的string长度都不算大,故O(mlogm)和O(m+26) 差别不大。
不论是引用的思路,还是我的思路,核心都是使用了map<string, int>,当需要在一堆字符串中找出包含相同字符的 group,这种空间换时间的方法可以考虑。
例题 2, Longest Substring Without Repeating Characters (窗口思想)
Given a string, find the length of the longest substring without repeating characters. For example, the longest substring without repeating letters for "abcabcbb" is "abc", which the length is 3. For "bbbbb" the longest substring is "b", with the length of 1.
class Solution { public: int lengthOfLongestSubstring(string s) { } };
这道题需要使用窗口的思想,定义start,end作为窗口的两端,开始时start = end = 0;再定义一个Map,用来检测窗口中是否有重复字符。
class Solution { public: int lengthOfLongestSubstring(string s) { int len = s.length(); if(len == 0) return 0; int start = 0, end = 0, max = 0; int* map = new int[256]; //自定义Map for(int i = 0; i < 256; ++i) map[i] = 0; while(end < len){ if(map[s[end] - '\0'] == 0){ map[s[end] - '\0']++; //右移end扩大窗口 if((end - start + 1) > max) max = (end - start + 1); ++end; }else{ for(; map[s[end] - '\0'] > 0; map[s[start] - '\0']--, ++start); //右移start缩小窗口 } } return max; } };
例题 3, Minimum Window Substring (窗口思想)
Given a string S and a string T, find the minimum window in S which will contain all the characters in T in complexity O(n).
For example,
T = "ABC"
Minimum window is "BANC"
If there is no such window in S that covers all characters in T, return the emtpy string ""
If there are multiple such windows, you are guaranteed that there will always be only one unique minimum window in S.
class Solution { public: string minWindow(string S, string T) { } };
先不断向右移动end直到当前窗口已经包含T中所有字符,然后向右移动start 直到再移动start的话窗口就不再包含T所有字符了,这个时候记录下窗口大小(end - start) 并和 min 比较即可。最后返回min。
注意:这道题中“S that covers all characters in T”其实意思不够明确,提交代码后,发现如果一个char在T中出现了两次,S也必须出现这样的char两次。我在看题时就有这个疑问,就先按照char个数不算的方式做了,提交后char的个数也是计入的。
class Solution { public: string minWindow(string S, string T) { if(T.length() == 0 || S.length() == 0) return ""; int chT[256]; int chS[256]; int i, j, k, cntT = T.length(), cntS = 0; for(i = 0; i < 256; chT[i] = 0, chS[i] = 0, ++i); for(i = 0; i < T.length(); ++chT[T[i] - '\0'], ++i); for(i = 0; i < S.length(); ++i){ if(chT[S[i] - '\0'] > 0 && chS[S[i] - '\0'] < chT[S[i] - '\0']) ++cntS; ++chS[S[i] - '\0']; if(cntS == cntT) break; } if(i == S.length()) return ""; //至此,找到了第一个包含T中所有charactor的S 字串 int end = i, st = 0; char toFind; //st指针右移,直到窗口因为缺少T中某个charactor(把这个ch记为toFind)而不再满足要求,就开始右移end指针,直到又找到了toFind int minlen = end - st + 1, minst = st; while(end < S.length()){for(++st; st <= end; ++st){ --chS[S[st-1] - '\0']; if(chT[S[st-1] - '\0'] > 0 && chS[S[st-1] - '\0'] < chT[S[st-1] - '\0']){ toFind = S[st-1]; break;} else if((end - st + 1) < minlen){ minlen = (end - st + 1); minst = st; } } for(++end; end < S.length(); ++end){ ++chS[S[end] - '\0']; if(toFind == S[end]) break; } } return S.substr(minst, minlen); } };
例题 4, Substring with Concatenation of All Words (窗口思想 + string as key思想)
You are given a string, S, and a list of words, L, that are all of the same length. Find all starting indices of substring(s) in S that is a concatenation of each word in L exactly once and without any intervening characters.
For example, given:
S: "barfoothefoobarman"
L: ["foo", "bar"]
You should return the indices: [0,9]
(order does not matter).
class Solution { public: vector<int> findSubstring(string S, vector<string> &L) { } };
class Solution { public: vector<int> findSubstring(string S, vector<string> &L) { vector<int> v; if(S.length() == 0 || L.size() == 0) return v; int len = 0; int unit = L[0].length(); if(S.length() < (unit * L.size())) return v; for(int j = 0; j <= (S.length() - len); ++j){ //除去末尾的那些,遍历S中每个字符打头的长度为len的子字符串,看是否正好涵盖L的所有字符串。 if(judge(S, j, L, unit)) v.push_back(j); } return v; } private: map<string, int> m; bool judge(string S, int start, vector<string> &L, int unit){ m.clear(); for(vector<string>::iterator i = L.begin(); i < L.end(); ++i){ if(m.find(*i) == m.end()){ m.insert(pair<string, int>(*i, 0)); }else{ m[*i]++; } } for(int i = 0; i < L.size(); ++i, start += unit){ if(m.find(S.substr(start, unit)) == m.end()) return false; if(m[S.substr(start, unit)] <= 0) return false; m[S.substr(start, unit)]--; } return true; } };
这种解法需要对S中每一个字符都调用judge函数,每一次judge函数都要遍历L中的元素,接着再以unit为步长在S上面最多走L.size() 步。时间复杂度可能会达到 O(S.length() * (L.size() + L.size()))。
我们仔细想一下:以题目中的例子来说,也就是 S: "barfoothefoobarman" L:
["foo", "bar"],假设当前我们要判断 "foothe"是不是涵盖L所有元素,结果当然是false,因为"the"在L中没有value,那么,"thefoo"其实也不需要判断了。因此基于当前一些判断false的结果,后面一部分判断过程可以跳过。
class Solution { public: vector<int> findSubstring(string S, vector<string> &L) { vector<int> v; if(S.length() == 0 || L.size() == 0) return v; int unit = L[0].length(); int len = unit * L.size(); if(S.length() < (unit * L.size())) return v; map<string, int> m; map<string, int> m2; for(vector<string>::iterator i = L.begin(); i < L.end(); ++i){ ++m[*i]; } for(int i = 0; i < unit; ++i){ int start = i, end = i + unit; m2.clear(); while(end <= S.length()){ string tmps = S.substr(end-unit, unit); if(m.find(tmps) != m.end()){ ++m2[tmps]; if(m2[tmps] > m[tmps]){ //L所包含的这种字串已经少于窗口包含的 while(S.substr(start, unit) != tmps){ m2[S.substr(start, unit)]--; start += unit; } m2[S.substr(start, unit)]--; start += unit; }else if((end - start) == len){ //If contains all string in L v.push_back(start); m2[S.substr(start, unit)]--; start += unit; } end += unit; }else{ //L不包含新被划进窗口的字串 m2.clear(); start = end; end = start + unit; } } } return v; } };
class Solution { public: vector<int> findSubstring(string S, vector<string> &L) { vector<int> res; if(L.size() == 0) return res; if(S.length() == 0) return res; int seg = L[0].length(); if(seg > S.length() || seg == 0) return res; int st = 0, count = 0, i = 0, j = 0; map<string, int> map; string str; for(i = 0; i < seg; ++i){ map.clear(); count = 0; for(vector<string>::iterator it = L.begin(); it != L.end(); ++map[*it], ++it); for(st = i; st < S.length(); st += seg){ str = S.substr(st, seg); if(map.find(str) != map.end() && map[str] > 0){ map[str]--; ++count; if(count == L.size()){ //找到一个结果 st -= ((count-1) * seg); res.push_back(st); map[S.substr(st, seg)]++; //把符合条件的子序列中最前端的unit移除 st += ((count-1) * seg); --count; } }else if(count > 0){ //虽然当前不匹配,但只要之前还有成功匹配的部,都要考虑以之前的匹配部分为起点,挨个尝试。 st -= (count * seg); map[S.substr(st, seg)]++; --count; st += (count * seg); } } } return res; } };
这种解法的时间复杂度基本可以算作O(S.length()) 了。
