python统计字词练习

方法一：

 1 import operator
 2 from nltk.corpus import stopwords
 3 stop_words = stopwords.words('English')#目的是去除人称代词等，注意根据编译提示下载相应库
 4 
 5 speech_text = '''
 6 He is a good boy
 7 She is a good girl
 8 We are very nice
 9 Hello boy hello boy 
10 hello girl hello girl
11 hello dog 
12 hello cat
13 hello pig
14 '''
15 speech = speech_text.lower().split()
16 dic = {}
17 for word in speech:
18     if word not in dic:
19         dic[word] = 1 #给词典赋值
20     else:
21         dic[word] = dic[word] + 1
22 swd = sorted(dic.items(), key = operator.itemgetter(1),reverse = True)
23 #stop_words
24 for k,v in swd:
25     if k not in stop_words:
26         print(k,v)
27 
28 print(swd)

方法二：

 1 import operator
 2 from nltk.corpus import stopwords
 3 stop_words = stopwords.words('English')#目的是去除人称代词等，注意根据编译提示下载相应库
 4 
 5 speech_text = '''
 6 He is a good boy
 7 She is a good girl
 8 We are very nice
 9 Hello boy hello boy 
10 hello girl hello girl
11 hello dog 
12 hello cat
13 hello pig
14 '''
15 speech = speech_text.lower().split()
16 from collections import Counter
17 c = Counter(speech)
18 for sw in stop_words:
19     del c[sw]
20 print(c.most_common(10)) #打印前10项

View Code

posted on 2018-09-12 22:44 Worty 阅读(196) 评论(0) 编辑收藏举报