爬虫之 校花网
import re
import requests
response = requests.get('http://www.xiaohuar.com/huar')
data = response.text
results = re.findall('src="(.*?)"',data)
for result in results:
if result.startswith('http'):
continue
else:
img_addr = 'http://www.xiaohuar.com'+result
img_response = requests.get(img_addr)
img_data = img_response.content
img_name = result.split('/')[-1][-14:]
with open(img_name, 'wb') as f:
f.write(img_data)
f.flush()
print("成功")
import re
import requests
import os
if not os.path.exists('校花网'):
os.mkdir('校花网')
response = requests.get('http://www.xiaohuar.com/s-1-290.html#p1')
data = response.text
results = re.findall('<img src="(.*?)"',data)
results = results[:29]
for i in results:
result = 'http://www.xiaohuar.com'+i
img_response = requests.get(result)
img_data = img_response.content
img_name = result.split('/')[-1]
img_name = os.path.join('校花网',img_name)
with open(img_name,'wb') as f:
f.write(img_data)
print('success+1')