读取中文的NER数据

def readfile(filename):
    '''
    read file
    return format :
    [ ['EU', 'B-ORG'], ['rejects', 'O'], ['German', 'B-MISC'], ['call', 'O'], ['to', 'O'], ['boycott', 'O'], ['British', 'B-MISC'], ['lamb', 'O'], ['.', 'O'] ]
    '''
    f = open(filename,encoding='utf-8')
    sentences = []
    sentence = []
    for line in f:
        #print(line)
        if len(line)==0  or line[0]=="\n":
            if len(sentence) > 0:
                sentences.append(sentence)
                #print(sentence)
                sentence = []
            continue
        splits = line.split(' ')
        sentence.append([splits[0],splits[-1]])
        #print([splits[0],splits[-1]])
		

    if len(sentence) >0:
        sentences.append(sentence)
        sentence = []
    return sentences
posted @ 2021-03-05 16:12  NTS100K  阅读(84)  评论(0编辑  收藏  举报
浏览器标题切换end