使用高匿代理IP, 实现妹子图爬取, 并将xinggan妹子图分页爬出到当前目录下

import requests
import re
import os

headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
    'referer': 'https://www.mzitu.com/xinggan/',
}
# 47.244.192.12:17518
# 36.57.236.153:25473
proxies = {'http': '114.99.20.180:43322'}  #  使用高匿
for page in range(1, 3):  # 爬取: 1~2页的妹子图片
    response_home = requests.get(f'https://www.mzitu.com/xinggan/page/{page}/', headers=headers, proxies=proxies)
    url_list = re.findall(r"data-original='(.*?)'", response_home.text)

    page_dir = os.path.join(os.path.dirname(__file__), f'第{page}页妹子图')
    # print('page_dir:', page_dir)
    if not os.path.isdir(page_dir):
        os.mkdir(page_dir)

    for url in url_list:
        response = requests.get(url, headers=headers, proxies=proxies)
        filename = url.rsplit('/', 1)[-1]
        file_path = os.path.join(page_dir, filename)
        # print('file_path:', file_path)
        
        with open(file_path, 'wb') as f:
            for line in response.iter_content():
                f.write(line)
        # print(response.status_code)
posted @ 2020-08-01 19:24  给你加马桶唱疏通  阅读(316)  评论(0编辑  收藏  举报