爬虫爬取视图片

import re
import requests

'''
上海大学校花史翌竹
'''

'''
http://www.xiaohuar.com/list-1-0.html
http://www.xiaohuar.com/list-1-1.html
http://www.xiaohuar.com/list-1-2.html
http://www.xiaohuar.com/list-1-3.html
http://www.xiaohuar.com/list-1-4.html
'''
count= 0
for i in range(44):
url = f'http://www.xiaohuar.com/list-1-{i}.html'
response = requests.get(url)
data = response.text
# print(data)

result_list = re.findall('" src="(.*?)" /></a>', data)
# print(result_list)
for result in result_list:
    # print(result,type(result))
    if result.startswith('https'):
        # print(result)

        img_response = requests.get(result)
        img_name = result.split('/')[-1]
        img_data = img_response.content
        with open(img_name, 'wb') as fw:
            fw.write(img_data)
            fw.flush()
        count +=1
        print(f'爬取了{count}张')
posted @ 2019-07-22 20:14  oxtime  阅读(250)  评论(0编辑  收藏  举报