python最小白的爬虫~爬取豆瓣小说

爬取豆瓣小说图片,并以标题命名该图片

# Author:li
import  re
import requests
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.33 Safari/537.36'} #加头文件防止被反扒
url = 'https://book.douban.com/top250'
#获取源代码
yuandaima = requests.get(url,headers = headers).text
#print(yuandaima)
#写正则
picture =r'<.*?class="item">.*?<.*?src="(.*?)" width=.*?>.*?<a href=".*?".*?onclick=.*?title=".*?"'
title = r'<.*?class="item">.*?<.*?src=".*?" width=.*?>.*?<a href=".*?".*?onclick=.*?title="(.*?)"'
#进行匹配图片url和图片名字
picture_url = re.findall(picture,yuandaima,re.S)  #re.S匹配换行符号
title_name = re.findall(title,yuandaima,re.S)
#把文件写入
for i,b in enumerate(title_name):  #enumerate 第一个取下标,第二关取值
    print(b)
    response = requests.get(picture_url[i],headers=headers)#i是该列表内的下标,b是列表内的值
    with open('{}.jpg'.format(b),'wb')  as f:
      f.write(response.content)

然后加上翻页功能,

posted @ 2019-02-13 17:11  章十慕珊·  阅读(549)  评论(0编辑  收藏  举报