187 Repeated DNA Sequences 重复的DNA序列
所有DNA由一系列缩写为A,C,G和 T 的核苷酸组成,例如:“ACGAATTCCG”。在研究DNA时,识别DNA中的重复序列有时非常有用。
编写一个函数来查找DNA分子中所有出现超多一次的10个字母长的序列(子串)。
详见:https://leetcode.com/problems/repeated-dna-sequences/description/
Java实现:
class Solution { public List<String> findRepeatedDnaSequences(String s) { List<String> res = new ArrayList<>(); if(s.length()<10){ return res; } Map<String,Integer> m = new HashMap<>(); for(int i=0;i<s.length()-9;i++){ String subString = s.substring(i,i+10); if(m.containsKey(subString)){ int count=m.get(subString); //如果为1,则添加进结果,否则继续遍历 if(count==1){ res.add(subString); } m.put(subString,count+1); }else{ m.put(subString,1); } } return res; } }
C++实现:
class Solution { public: vector<string> findRepeatedDnaSequences(string s) { vector<string> res; if (s.size() <= 10) { return res; } int mask = 0x7ffffff; unordered_map<int, int> m; int cur = 0, i = 0; while (i < 9) { cur = (cur << 3) | (s[i++] & 7); } while (i < s.size()) { cur = ((cur & mask) << 3) | (s[i++] & 7); if (m.find(cur) != m.end()) { if (m[cur] == 1) { res.push_back(s.substr(i - 10, 10)); } ++m[cur]; } else { m[cur] = 1; } } return res; } };
参考:https://www.cnblogs.com/grandyang/p/4284205.html