python练习册 每天一个小程序 第0004题

 1 #-*-coding:utf-8-*- 
 2 __author__ = 'Deen' 
 3 '''
 4 题目描述:任一个英文的纯文本文件,统计其中的单词出现的个数。
 5 参考学习链接:
 6     re  http://www.cnblogs.com/tina-python/p/5508402.html#undefined
 7     collections  http://blog.csdn.net/liufang0001/article/details/54618484
 8 '''
 9 import re,collections
10 with open('english.txt','r') as fp:
11     text=fp.read().strip(',')
12     s=re.compile(r'\w+\b')
13     words=s.findall(text)
14     b=list()
15     dic=collections.defaultdict(lambda :0)
16     for word in words:
17         dic[word.lower()] +=1
18     
19     print dic
20 
21 '''
22 import collections,re
23 import sys
24 def cal(filename = 'english.txt'):
25     print 'now processing:' + filename + '......'
26     f = open(filename,'r')
27     data = f.read()
28     dic = collections.defaultdict(lambda :0)
29     data = re.sub(r'[\W\d]',' ',data)
30     data = data.lower()
31     datalist = data.split(' ')
32     for item in datalist:
33         dic[item] += 1
34     del dic['']
35     return dic
36 try:
37     print sorted(cal().items())
38 except:
39     print 'no input file'
40 '''

 

posted on 2017-07-10 22:12  _Deen  阅读(294)  评论(0编辑  收藏  举报

导航