爬取校花网
一、 爬取校花网
import re
import requests
"""
@author RansySun
@create 2019-07-22-16:04
"""
# 请求地址
response = requests.get('http://www.xiaohuar.com/huar')
data = response.text
count = 0
# <img width="210" alt="" src="/d/file/20190709/small5f7964a2667ca292b77aed06cd3546341562683536.jpg">
# 查找链接地址
data_res = re.findall('src="(.*?)"', data)
for result in data_res: # type:str
if result.startswith("/d"):
result = f"http://www.xiaohuar.com"+result
img_rsponse = requests.get(result)
img_name = result.split("/")[-1]
img_data = img_rsponse.content
fw = open(f"{count}_{img_name}", "wb")
fw.write(img_data)
count += 1
fw.flush()
print(f"爬取{count}张图片")
在当下的阶段,必将由程序员来主导,甚至比以往更甚。