方法一:
1 import operator 2 from nltk.corpus import stopwords 3 stop_words = stopwords.words('English')#目的是去除人称代词等,注意根据编译提示下载相应库 4 5 speech_text = ''' 6 He is a good boy 7 She is a good girl 8 We are very nice 9 Hello boy hello boy 10 hello girl hello girl 11 hello dog 12 hello cat 13 hello pig 14 ''' 15 speech = speech_text.lower().split() 16 dic = {} 17 for word in speech: 18 if word not in dic: 19 dic[word] = 1 #给词典赋值 20 else: 21 dic[word] = dic[word] + 1 22 swd = sorted(dic.items(), key = operator.itemgetter(1),reverse = True) 23 #stop_words 24 for k,v in swd: 25 if k not in stop_words: 26 print(k,v) 27 28 print(swd)
方法二:
1 import operator 2 from nltk.corpus import stopwords 3 stop_words = stopwords.words('English')#目的是去除人称代词等,注意根据编译提示下载相应库 4 5 speech_text = ''' 6 He is a good boy 7 She is a good girl 8 We are very nice 9 Hello boy hello boy 10 hello girl hello girl 11 hello dog 12 hello cat 13 hello pig 14 ''' 15 speech = speech_text.lower().split() 16 from collections import Counter 17 c = Counter(speech) 18 for sw in stop_words: 19 del c[sw] 20 print(c.most_common(10)) #打印前10项
怕什么真理无穷,进一寸有一寸的欢喜。---胡适