爬虫大作业(虎扑足球新闻)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | import requests from bs4 import BeautifulSoup import jieba from PIL import Image,ImageSequence import numpy as np import matplotlib.pyplot as plt from wordcloud import WordCloud,ImageColorGenerator def changeTitleToDict(): f = open ( 'yingchao.txt' , 'r' ,encoding = 'utf-8' ) str = f.read() stringList = list (jieba.cut( str )) symbol = { "/" , "(" , ")" , " " , ";" , "!" , "、" , ":" } stringSet = set (stringList) - symbol title_dict = {} for i in stringSet: title_dict[i] = stringList.count(i) print (title_dict) return title_dict for i in range ( 1 , 10 ): page = i; hupu = 'https://voice.hupu.com/soccer/tag/496-%s.html' % (page) reslist = requests.get(hupu) reslist.encoding = 'utf-8' soup_list = BeautifulSoup(reslist.text, 'html.parser' ) for news in soup_list.find_all( 'span' , class_ = 'n1' ): print (news.text) f = open ( 'yingchao.txt' , 'a' , encoding = 'utf-8' ) f.write(news.text) f.close() title_dict = changeTitleToDict() font = r 'C:\Windows\Fonts\simhei.ttf' content = ' ' .join(title_dict.keys()) # 根据图片生成词云 image = np.array(Image. open ( '1.jpg' )) wordcloud = WordCloud(background_color = 'white' , font_path = font, mask = image, width = 1000 , height = 860 , margin = 2 ).generate(content) #字体颜色 image2 = np.array(Image. open ( '2.jpg' )) iamge_colors = ImageColorGenerator(image2) wordcloud.recolor(color_func = iamge_colors) # 显示生成的词云 plt.imshow(wordcloud) plt.axis( "off" ) plt.show() wordcloud.to_file( '3.jpg' ) |
背景图
字体颜色图
词云图
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步