综合练习：词频统计

f = open("peng.txt", "r", encoding='utf-8')
song = f.read()
f.close()

sep = ''',.?—!"'''

exclude = {'the', 'and', 'i', 'in', "i'm", 'a', 'of', 'an', 'on', 'to', 'with'}

for c in sep:
    song = song.replace(c, ' ')

swl = song.lower().split()

swd = {}

sws = set(swl) - exclude

for w in sws:
    swd[w] = swl.count(w)

fl = list(swd.items())

fl.sort(key=lambda x: x[1], reverse=True)

for i in fl:
    print(i)

f = open("result.txt", "w")
for i in range(20):
    f.write(fl[i][0] + "  " + str(fl[i][1]) + "\n")
f.close()

import jieba

f = open('weicheng.txt', 'r', encoding='utf-8')
text = f.read()
f.close()

p = '''，。‘’“”：；（）！？、 '''
a = {
    '的', '\n', '\u3000',
    '曰', '之', '不', '人', '一', '大', '马', '来', '有', '于', '下', '此',
}
for i in p:
    text = text.replace(i, '')
print(list(jieba.cut(text)))
t = list(jieba.lcut(text))
print(t)
count = {}
wl = list(set(t) - a)
print(wl)

for i in range(0, len(wl)):
    count[wl[i]] = text.count(str(wl[i]))

cl = list(count.items())
cl.sort(key=lambda x: x[1], reverse=True)
print(cl)

f = open('wcCount.txt', 'a')
for i in range(20):
    f.write(cl[i][0] + ':' + str(cl[i][1]) + '\n')
f.close()

posted @ 2018-03-28 21:42 商软3许怀鹏222 阅读(151) 评论(0) 收藏举报

刷新页面返回顶部

phoenlix

综合练习：词频统计

公告