计算英文文章词频的两种方法
1 speech_text='xxx' 2 speech = speech_text.lower().split() 3 dic = {} 4 for word in speech: 5 if word not in dic: 6 dic[word]=1 7 else: 8 dic[word]+=1 9 import operator 10 swd = sorted(dic.items(),key = operator.itemgetter(1),reverse=True) 11 from nltk.corpus import stopwords 12 from string import punctuation 13 stop_words = stopwords.words('English') 14 for k,v in swd: 15 if k not in stop_words: 16 print(k,v) 17 18 ########################## 19 from collections import Counter 20 c = Counter(speech) 21 for sw in stop_words: 22 del c[sw] 23 c.most_common(10)