python抓取妹纸图

import urllib.request
import re
import os

re_img = re.compile(r'<p><img src="(.+?)" alt=""></p>')  # 预编译正则, 提高代码效率
re_url = re.compile(r'http://aimm\.92game\.net/xinggan/(\d+)\.html')

f2 = open("456.txt")
headers = {
    'user-agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36',  # 伪造手机UA来访问手机端网页
}
pic_num = 0
os.chdir("G:\\meizhi-image")
for each_url in f2:
    each_url = 'http://m.aimm.92game.net/n.php?id=' + re_url.findall(each_url)[0]  # 把电脑版url转换手机版方便抓取
    url_image = urllib.request.Request(each_url, headers=headers)
    url_image2 = urllib.request.urlopen(url_image).read().decode("utf-8")
    url_image3 = re_img.findall(url_image2)
    for each in url_image3:
        pic_num += 1
        url_image4 = (each)
        path = (str(pic_num) + '.jpg')
        print("... ... 第"+str(pic_num)+"只妹纸正在被保存... ...")
        image = urllib.request.urlopen(url_image4)
        image1 = image.read()
        f = open(path, 'wb')
        f.write(image1)
        f.close()

python3.4 第一只爬虫,主要用到urllib,request。正则表达式。代码比较渣渣

posted on 2016-03-22 11:42  python学习笔记  阅读(256)  评论(0编辑  收藏  举报

导航