核酸序列合法性检验的三种方法

def validate_base_sequence(base_sequence,RNAflag=False):
    '''返回True,如果碱基序列仅包含大写或小写的T(或U,if RNAflag),A,C,G,否则返回FALSE'''
    seq=base_sequence.upper()
    return len(seq)==(seq.count('U' if RNAflag else 'T')+
                      seq.count('A')+
                      seq.count('C')+
                      seq.count('G'))

def validate_base_sequence(base_sequence,RNAflag=False):
    '''返回True,如果碱基序列仅包含大写或小写的T(或U,if RNAflag),A,C,G,否则返回FALSE'''  
    return set(base_sequence.upper())<=(set('AUCG')if RNAflag else set('ATCG'))

def validate_base_sequence(base_sequence,RNAflag=False):
    valid_bases='AUCG' if RNAflag else 'ATCG'
    return all([(base in valid_bases) for base in base_sequence.upper()])

def gc_content(base_sequence):
     '''DNA的GC含量'''
    assert validate_base_sequence(base_sequence),'not valid base sequence!'  #断言传入的序列是合法的核酸序列
    seq=base_sequence.upper()
    return (seq.count('G')+seq.count('C'))/len(seq)

posted @ 2019-02-15 16:26  bluemoonsky  阅读(482)  评论(0编辑  收藏  举报