import requests
import re
import os
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',
'referer': 'https://www.mzitu.com/xinggan/',
}
# 47.244.192.12:17518
# 36.57.236.153:25473
proxies = {'http': '114.99.20.180:43322'} # 使用高匿
for page in range(1, 3): # 爬取: 1~2页的妹子图片
response_home = requests.get(f'https://www.mzitu.com/xinggan/page/{page}/', headers=headers, proxies=proxies)
url_list = re.findall(r"data-original='(.*?)'", response_home.text)
page_dir = os.path.join(os.path.dirname(__file__), f'第{page}页妹子图')
# print('page_dir:', page_dir)
if not os.path.isdir(page_dir):
os.mkdir(page_dir)
for url in url_list:
response = requests.get(url, headers=headers, proxies=proxies)
filename = url.rsplit('/', 1)[-1]
file_path = os.path.join(page_dir, filename)
# print('file_path:', file_path)
with open(file_path, 'wb') as f:
for line in response.iter_content():
f.write(line)
# print(response.status_code)