爬虫 爬取妹子图
功能写的很差,简单练手
#!/usr/bin/env python # -*- coding:utf-8 -*- import hashlib import re import time import requests # pip3 install requests movie_path = r'D:\爬虫学习\爬虫\妹子图' def get_index_page(url): try: # 模拟发送get请求 response = requests.get(url) if response.status_code == 200: return response.text except Exception: pass def parse_index(index_page): detail_urls = re.findall('li>.*?<a href="(.*?)"', index_page, re.S) for detail_url in detail_urls: ret = detail_url.rsplit('/', maxsplit=1)[1] if ret: yield detail_url def get_parge_url(detail_url): try: # 模拟发送get请求 response = requests.get(detail_url, headers={ "Referer": "www.mzitu.com", 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', # 'Upgrade-Insecure-Requests': 1, # 'Cookie':'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1516079374; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1516079794' }, ) if response.status_code == 200: return response.text except Exception: pass def parse_detail(detail): try: details = re.findall('<img src="(.*?)" ', detail, re.S) return details[0] except Exception as e: pass def get_movie(url,page_url): try: response = requests.get(url, headers={ "Referer": page_url, # 这里解决防盗链问题 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', }, ) if response.status_code == 200: m = hashlib.md5() m.update(str(time.time()).encode('utf-8')) m.update(url.encode('utf-8')) filepath = '%s\%s.jpg' % (movie_path, m.hexdigest()) with open(filepath, 'wb') as f: f.write(response.content) print('%s 下载成功' % url) except Exception: pass def main(): base_url = 'http://www.mzitu.com/xinggan/page/{0}/' for i in range(5): url = base_url.format(i) text = get_index_page(url) detail_urls = parse_index(text) for detail_url in detail_urls: detail_text = get_parge_url(detail_url) detail=parse_detail(detail_text) get_movie(detail,detail_url) if __name__ == '__main__': main()
结果: