爬虫大作业
f=open("C:/Users/ZD/PycharmProjects/test/test.txt",'w+',encoding='utf8') import jieba import requests from bs4 import BeautifulSoup def songlist(url): res = requests.get(url) res.encoding = 'UTF-8' soup = BeautifulSoup(res.text, 'html.parser') songname=soup.select('.song') for i in songname[1:]: url=i.select('a')[0].attrs['href'] songread(url) def songread(url): f=open("C:/Users/ZD/PycharmProjects/test/test.txt",'w+',encoding='utf8') res = requests.get(url) res.encoding = 'UTF-8' soup = BeautifulSoup(res.text, 'html.parser') song=soup.select('.lrcItem') for i in song: f.write(i.text) songlist('http://www.kuwo.cn/geci/a_336/?') f=open("C:/Users/ZD/PycharmProjects/test/test.txt",'r',encoding='utf8') str=f.read() f.close() wordList=jieba.cut(str) wordList=list(jieba.cut(str)) wordDic = {} for i in set(wordList): wordDic[i] = wordList.count(i) sort_word = sorted(wordDic.items(), key=lambda d: d[1], reverse=True) for i in range(60): print(sort_word[i]) fo=open("C:/Users/ZD/PycharmProjects/test/test1.txt",'w',encoding='utf8') for i in range(60): fo.write(sort_word[i][0] +'\n') fo.close()
制作过程中遇到三个问题,一个是文件用w+打开后写完数据进去读取不出来,后来在读前再打开一次文件解决了
二是将列表排序后想取出里面的str,解决方式:问刘东
三是安装wordcloud不成功,改用在线词云生成器==