综合练习:词频统计
# l = '''Have A Good Time - Paul Simon # Yesterday it was my birthday # I hung one more year on the line # I should be depressed # My life's a mess # But I'm having a good time # Oo I've been loving and loving and loving # I'm exhausted from loving so well # I should go to bed # But a voice in my head # Says "Ah what the hell" # Have a good time # Have a good time # Have a good time # Have a good time # Paranoia strikes deep in the heartland # But I think it's all overdone # Exaggerating this and exaggerating that # They don't have no fun # I don't believe what I read in the papers # They're just out to capture my dime # I ain't worrying # And I ain't scurrying; # I'm having a good time # Have a good time # Have a good time # Have a good time # Have a good time # Maybe I'm laughing my way to disaster # Maybe my race has been run # Maybe I'm blind to the fate of mankind # But what can be done # So God bless the goods we was given # And God bless the U S of A # And God bless our standard of living # Let's keep it that way # And we'll all have a good time # Have a good time # Have a good time # Have a good time # Have a good time # ''' fo = open('text.txt','r') l = fo.read() fo.close() aaa = ''',.;"''' prep = {'a','but','in','the','and','so'} for i in aaa: l = l.replace(i,' ') wordList = l.lower().split() wordDict = {} wordSet = set(wordList) for w in wordList: wordDict[w] = wordDict.get(w,0)+1 for i in prep: wordDict.pop(i) dictList = list(wordDict.items()) dictList.sort(key=lambda x:x[1],reverse=True) for i in range(20): print(dictList[i])
C:\Users\user\PycharmProjects\untitled\venv\Scripts\python.exe C:/Users/user/PycharmProjects/untitled/1111.py
('good', 16)
('time', 16)
('have', 14)
('i', 8)
('my', 6)
('i&apos', 6)
('m', 5)
('s', 4)
('loving', 4)
('to', 4)
('t', 4)
('what', 3)
('maybe', 3)
('of', 3)
('god', 3)
('bless', 3)
('it', 2)
('was', 2)
('should', 2)
('be', 2)
Process finished with exit code 0