爬虫大作业
2018-04-30 13:27 Molemole 阅读(204) 评论(0) 编辑 收藏 举报1 f=open("C:/Users/Administrator/PycharmProjects/test/test.txt",'w+',encoding='utf8') 2 import jieba 3 import requests 4 from bs4 import BeautifulSoup 5 6 7 def songlist(url): 8 res = requests.get(url) 9 res.encoding = 'UTF-8' 10 soup = BeautifulSoup(res.text, 'html.parser') 11 songname=soup.select('.song') 12 for i in songname[1:]: 13 url=i.select('a')[0].attrs['href'] 14 songread(url) 15 16 17 def songread(url): 18 f=open("C:/Users/Administrator/PycharmProjects/test/test.txt",'w+',encoding='utf8') 19 res = requests.get(url) 20 res.encoding = 'UTF-8' 21 soup = BeautifulSoup(res.text, 'html.parser') 22 song=soup.select('.lrcItem') 23 for i in song: 24 25 f.write(i.text) 26 27 28 29 songlist('http://www.kuwo.cn/geci/a_266986/') 30 f=open("C:/Users/Administrator/PycharmProjects/test/test.txt",'r',encoding='utf8') 31 str=f.read() 32 f.close() 33 34 wordList=jieba.cut(str) 35 wordList=list(jieba.cut(str)) 36 37 wordDic = {} 38 for i in set(wordList): 39 wordDic[i] = wordList.count(i) 40 41 sort_word = sorted(wordDic.items(), key=lambda d: d[1], reverse=True) 42 for i in range(60): 43 print(sort_word[i]) 44 45 46 fo=open("C:/Users/Administrator/PycharmProjects/test/test1.txt",'w',encoding='utf8') 47 for i in range(60): 48 fo.write(sort_word[i][0] +'\n') 49 50 fo.close()
将高频词汇放入test.txt以后打开 http://www.picdata.cn/ 用网上词云生成图片