002 生信基础题
001 'GATCCAGATCCCCATAC', 计算这串数列中两个出现最高的频率。
t = 'GATCCAGATCCCCATAC' L = [ ] for i in range(len(t)-1): L.append(t[i:i+2]) x = reduce(lambda x,y: x if L.count(x)>L.count(y) else y, L) # reduce(function, iterable[, initializer]) print x, 'appeared', L.count(x), 'times! It is the most frequent 2-mer.'
方法二: def PatternCount(Pattern, Text): count = 0 for i in range(len(Text)-len(Pattern)+1): #Text 内容, Pattern 去字符长度 if Text[i:i+len(Pattern)]==Pattern: count = count +1 return count def CountDict(Text, k): Count = {} for i in range(len(Text)-k+1): Pattern = Text[i:i+k] Count[i] = PatternCount(Pattern, Text) return Count def FrequentWords(Text, k): FrequentPatterns = [] Count = CountDict(Text,k) m = max(Count.values()) for i in Count: if Count[i] == m: FrequentPatterns.append(Text[i:i+k]) return FrequentPatterns import sys print(FrequentWords("GATCCAGATCCCCATAC", 2))
002 Reverse Complement Problem:
Find the reverse complement of a DNA string.
Input: A DNA string Pattern.
Output: The reverse complement of Pattern.
Sample Input:
AAAACCCGGT
Sample Output:
ACCGGGTTTT