爬虫大作业

import requests
from bs4 import BeautifulSoup
import json
import jieba.analyse
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator


url = "https://item.btime.com/36i90hfhkt3838be1gof3cla1ka?from=haozcxw"
res = requests.get(url)
res.encoding = 'utf-8'
soup = BeautifulSoup(res.text,'html.parser')


title = soup.select('.title')[0].text
content = soup.select('.content-text')[0].text
info = soup.select('.edit-info')[0].text
au=info[info.find('责任编辑:'):].split()[0].lstrip('责任编辑:')
print(title,content,au)

f = open('content.txt', 'a', encoding='utf-8')
f.write(content)
f.close()

strl = ''',。、‘’ '''
for i in strl:
    ls = content.replace(i," ")
    print(ls)


lyric= ''
f=open('content.txt','r', encoding='utf-8')
for i in f:
    lyric+=f.read()


result=jieba.analyse.textrank(lyric,topK=50,withWeight=True)
keywords = dict()
for i in result:
    keywords[i[0]]=i[1]
print(keywords)

image= Image.open('t01c9f26bac34842d0d.jpg')
graph = np.array(image)
wc = WordCloud(font_path='./fonts/simhei.ttf',background_color='White',max_words=50,mask=graph)
wc.generate_from_frequencies(keywords)
image_color = ImageColorGenerator(graph)
plt.imshow(wc)
plt.imshow(wc.recolor(color_func=image_color))
plt.axis("off")
plt.show()
wc.to_file('d.jpg')

 

posted @ 2018-04-24 19:09  099吴海经  阅读(164)  评论(0编辑  收藏  举报