第四次作业

英文歌

ee=open('yyy1.txt','r',encoding='utf-8')
love=ee.read()
ee.close()
print(love)
c=''',./;'?!'''
for qq in c:
    love=love.replace(qq,' ').lower()
ll=love.split()

s=set(ll)
s0={'the','this','that','and','to','m','t'}
s=s-s0
s1={}
for i in s:
    s1[i]=ll.count(i)
for key in s1:
    print(key,s1[key])

list1=list(s1.items())

def takeSecond(elem):
    return elem[1]

list1.sort(key=takeSecond,reverse=True)
print(list1[:20])

中文小说

import jieba
f=open('dotaer.txt','r',encoding='GBK')#UTF-8识别出来是乱码,所以用了GBK
o=f.read()
f.close()
print(o)
dota= list(jieba.cut(o))

dota2= {}
for word in dota:
    if len(word) == 1:
        continue
    else:
        dota2[word] = dota2.get(word, 0) + 1

dota3 = list(dota2.items())

dota3.sort(key=lambda x:x[1],reverse=True)

for a in range(20):
    print(dota3[a])

posted @ 2018-10-17 22:45  梁柏钧  阅读(76)  评论(0编辑  收藏  举报