爬虫之 校花网

爬虫之 校花网

http://www.xiaohuar.com/huar

import re
import requests

response = requests.get('http://www.xiaohuar.com/huar')
data = response.text
results = re.findall('src="(.*?)"',data)
for result in results:
    if result.startswith('http'):
        continue
    else:
        img_addr = 'http://www.xiaohuar.com'+result
        img_response = requests.get(img_addr)
        img_data = img_response.content
        img_name = result.split('/')[-1][-14:]

        with open(img_name, 'wb') as f:
            f.write(img_data)
            f.flush()
            print("成功")

http://www.xiaohuar.com/s-1-290.html#p1

import re
import requests
import os
if not os.path.exists('校花网'):
    os.mkdir('校花网')

response = requests.get('http://www.xiaohuar.com/s-1-290.html#p1')
data = response.text
results = re.findall('<img src="(.*?)"',data)
results = results[:29]
for i in results:
    result = 'http://www.xiaohuar.com'+i
    img_response = requests.get(result)
    img_data = img_response.content
    img_name = result.split('/')[-1]
    img_name = os.path.join('校花网',img_name)
    with open(img_name,'wb') as f:
        f.write(img_data)
        print('success+1')
posted @ 2019-07-23 16:19  SweetJack  阅读(748)  评论(0编辑  收藏  举报