You are given a string, S, and a list of words, L, that are all of the same length. Find all starting indices of substring(s) in S that is a concatenation of each word in L exactly once and without any intervening characters.
For example, given:
S: "barfoothefoobarman"
L: ["foo", "bar"]
You should return the indices: [0,9]
(order does not matter).
public class Solution { public List<Integer> findSubstring(String S, String[] L) { List<Integer> result = new LinkedList<Integer>(); StringBuffer currentResult = new StringBuffer(); int[] visited = new int[L.length]; int LLength = 0; for(int i = 0; i < L.length; i++){ LLength += L[i].length(); } int endIndex = S.length() - LLength; for(int i = 0; i <= endIndex; i++){ dfs(result, currentResult, S, L, visited, 0, i); } return result; } public void dfs(List<Integer> result, StringBuffer currentResult, String S, String[] L, int[] visited, int step, int startIndex){ if(step == L.length && S.substring(startIndex).indexOf(currentResult.toString()) == 0){ result.add(startIndex); return; } int currentLength = currentResult.length(); if(currentLength > S.length() - startIndex){ return; } if(S.substring(startIndex).indexOf(currentResult.toString()) != 0){ return; } for(int i = 0; i < L.length; i++){ if(visited[i] == 1){ continue; } visited[i] = 1; currentResult.append(L[i]); dfs(result, currentResult, S, L, visited, step + 1, startIndex); visited[i] = 0; currentResult = currentResult.delete(currentLength, currentResult.length()); } } }
这是一个重要的改进。再想,L[i]的长度都固定了,那么只要看startIndex + step * n往后,L[i]这个固定长度的字符串在不在L中,就行了!而不要在L里面遍历。这样用set或者map就可以了!省去m的时间复杂度。
Input: | "abababab", ["a","b","a"] |
Output: | [0,1,2,3,4,5] |
Expected: | [0,2,4] |
public class Solution { public List<Integer> findSubstring(String S, String[] L) { List<Integer> result = new LinkedList<Integer>(); if(S.length() == 0 || L.length == 0){ return result; } int LLength = 0; if(L.length != 0){ LLength = L.length * L[0].length(); } int endIndex = S.length() - LLength; Map<String, Integer> wordMap = new HashMap<String, Integer>(); for(int i = 0; i < L.length; i++){ if(!wordMap.containsKey(L[i])){ wordMap.put(L[i], 1); }else{ int count = wordMap.get(L[i]); wordMap.put(L[i], count + 1); } } for(int i = 0; i <= endIndex; i++){ dfs(result, S, wordMap, L[0].length(), L.length, 0, i); } return result; } public void dfs(List<Integer> result, String S, Map<String, Integer> wordMap, int wordLength, int LLength, int step, int startIndex){ if(step == LLength){ //不能step==wordMap.size(),因为wordMap.size() <= L.length() result.add(startIndex); return; } int currentLength = startIndex + step * wordLength; if(currentLength > S.length()){ return; } String currentWord = S.substring(startIndex + step * wordLength, startIndex + step * wordLength + wordLength); if(!wordMap.containsKey(currentWord) || wordMap.get(currentWord) == 0){ return; } wordMap.put(currentWord, wordMap.get(currentWord) - 1); dfs(result, S, wordMap, wordLength, LLength, step + 1, startIndex); wordMap.put(currentWord, wordMap.get(currentWord) + 1); } }
public class Solution { public List<Integer> findSubstring(String S, String[] L) { List<Integer> result = new LinkedList<Integer>(); if(S.length() == 0 || L.length == 0){ return result; } int LLength = 0; if(L.length != 0){ LLength = L.length * L[0].length(); } int endIndex = S.length() - LLength; Map<String, Integer> wordMap = new HashMap<String, Integer>(); for(int i = 0; i < L.length; i++){ if(!wordMap.containsKey(L[i])){ wordMap.put(L[i], 1); }else{ int count = wordMap.get(L[i]); wordMap.put(L[i], count + 1); } } for(int i = 0; i <= endIndex; i++){ Map<String, Integer> thisWordMap = new HashMap<String, Integer>(); boolean flag = true; for(int j = 0; j < L.length; j++){ String currentWord = S.substring(i + L[0].length() * j, i + L[0].length() * j + L[0].length()); if(!wordMap.containsKey(currentWord)){ flag = false; break; } int count = 1; if(!thisWordMap.containsKey(currentWord)){ thisWordMap.put(currentWord, 1); }else{ count = thisWordMap.get(currentWord); count++; thisWordMap.put(currentWord, count); } if(count > wordMap.get(currentWord)){ flag = false; break; } } if(flag){ result.add(i); } } return result; } }
最后推荐一个在Longest Substring Without Repeating Characters问题里提到的sliding window的方法,借鉴于。它的原理学习起来开始很复杂,但是一旦理解就比较简单了。
S: "barfoothefoobarman"
L: ["foo", "bar"]
之前的方法是从S的0-n个char,每次都检查L.length次。实际上,我们在i==0时,检查了bar foo the foo bar man,在i==3的时候,又要检查foo the foo bar man。你看,都重复了。可是有什么好办法吗?
sliding window是这样做的,因为L中有元素都等长这个重要的性质,可以将它们看成Longest Substring Without Repeating Characters问题里的char,将L的总长,也就是foobar作为一个窗口,那么就从每次检查一个char,或者跳过一个char,变成现在的每次检查一个长度为3的String,或者跳过一个长度为3的String。
那么,这个例子中就是bar foo the foo bar man。显然是不全面的,我们还要从第二个字符开始,检查arf oot hef oob arm,还有第三个字符开始,rfo oth efo oba rma。这样就可以了。可以看到外层只需要循环L[0].length()次就可以了。
那么内层的窗口如何滑动呢?让我们回忆一下Longest Substring Without Repeating Characters这个问题。每遇到一个重复字符串,在前面有下标index,左窗口就从index+1开始,右侧窗口继续后移遍历。这里能不能这样做?本题用了类似的方法,但更为复杂。
如果当前word压根不在wordMap里,那么当前结果被整个舍弃,start从j + L[0].length()开始。同时清空重置thisWordMap。
然后看看当前结果长度是不是和L的总长相等了?相等证明就是有效的结果了。start加入result,新的start从start+ L[0].length()开始。
public class Solution { public List<Integer> findSubstring(String S, String[] L) { List<Integer> result = new LinkedList<Integer>(); if(S.length() == 0 || L.length == 0){ return result; } int LLength = 0; if(L.length != 0){ LLength = L.length * L[0].length(); } int endIndex = S.length() - LLength; Map<String, Integer> wordMap = new HashMap<String, Integer>(); for(int i = 0; i < L.length; i++){ if(!wordMap.containsKey(L[i])){ wordMap.put(L[i], 1); }else{ int count = wordMap.get(L[i]); wordMap.put(L[i], count + 1); } } for(int i = 0; i < L[0].length(); i++){ Map<String, Integer> thisWordMap = new HashMap<String, Integer>(); int start = i; for(int j = i; j <= S.length() - L[0].length(); j += L[0].length()){ String currentWord = S.substring(j, j + L[0].length()); if(!wordMap.containsKey(currentWord)){ start = j + L[0].length(); thisWordMap.clear(); }else{ if(!thisWordMap.containsKey(currentWord)){ thisWordMap.put(currentWord, 1); if(j - start + L[0].length() == LLength){ result.add(start); String temp = S.substring(start, start + L[0].length()); thisWordMap.put(temp, thisWordMap.get(temp) - 1); start += L[0].length(); } }else{ int count = thisWordMap.get(currentWord); count++; thisWordMap.put(currentWord, count); if(count > wordMap.get(currentWord)){ // String temp = S.substring(start, start + L[0].length()); // thisWordMap.put(temp, thisWordMap.get(temp) - 1); // start += L[0].length(); /* 必须是while,否则"aaabbbc", ["a","a","b","b","c"] start跳到1,会认为aabbbc是正确答案,因为后面不知道b已经>2了 */ while(thisWordMap.get(currentWord) > wordMap.get(currentWord)){ String temp = S.substring(start, start + L[0].length()); thisWordMap.put(temp, thisWordMap.get(temp) - 1); start += L[0].length(); } }else{ if(j - start + L[0].length() == LLength){ result.add(start); String temp = S.substring(start, start + L[0].length()); thisWordMap.put(temp, thisWordMap.get(temp) - 1); start += L[0].length(); } } } } } } return result; } }
用了sliding window的方法,我们看到对于S中的每个字符,都仅仅要检查一次。内层的while循环最坏要花费O(L)的时间,L为L[0].length()。平均复杂度为O(n)。
那么为什么这道题目可以使用sliding windows的方法?是不是使用上面提到的DFS和迭代可以解决的问题,都能用sliding window去做?答案是不是的。还是,因为这题的一个重要的性质,L内每个string长度相等,所以可以被当成一个char来看待。
update 2015/05/29
二刷,用的上面的sliding window解法,更深的理解了。代码也清楚了些。
public class Solution { public List<Integer> findSubstring(String s, String[] words) { Map<String, Integer> wordMap = new HashMap<String, Integer>(); for(String word : words) { if(wordMap.containsKey(word)) { wordMap.put(word, wordMap.get(word) + 1); } else { wordMap.put(word, 1); } } List<Integer> res = new ArrayList<Integer>(); int wordLength = words[0].length(); for(int i = 0; i < wordLength; i++) { int count = 0, start = i; Map<String, Integer> thisWordMap = new HashMap<String, Integer>(); for(int j = i; j < s.length() - wordLength + 1; j = j + wordLength) { String thisWord = s.substring(j, j + wordLength); if(!wordMap.containsKey(thisWord)) { count = 0; start = j + wordLength; thisWordMap.clear(); } else if(thisWordMap.get(thisWord) == wordMap.get(thisWord)) { count++; thisWordMap.put(thisWord, thisWordMap.get(thisWord) + 1); // 不是因为找到非words词,而是因为词数多而非法,就要从头往后删除word,一直到删除到超数的那个word为止 // 注意,上面的count++和wordMap操作不能忘记 String firstWord = s.substring(start, start + wordLength); while(!firstWord.equals(thisWord) && start <= j) { start += wordLength; thisWordMap.put(firstWord, thisWordMap.get(firstWord) - 1); count--; firstWord = s.substring(start, start + wordLength); } thisWordMap.put(firstWord, thisWordMap.get(firstWord) - 1); start = start + wordLength; count--; } else { count++; if(thisWordMap.containsKey(thisWord)) { thisWordMap.put(thisWord, thisWordMap.get(thisWord) + 1); } else { thisWordMap.put(thisWord, 1); } // 一个解的时候,start应该仅仅往后增加wordLength if(count == words.length) { res.add(start); count--; String firstWord = s.substring(start, start + wordLength); thisWordMap.put(firstWord, thisWordMap.get(firstWord) - 1); start = start + wordLength; } } } } return res; } }