一只小爬虫下载jpg图片到桌面 >>>>>python2.7.x

import re
import urllib2
headers = {'User-agent' : 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0'}
with open('C:\\Users\\yaxin\\Desktop\\1.txt','r') as file:    #预先将网页源码保存到1.txt里
    data = file.read()
pattern = re.compile(r"<img src='(.*?)jpg")  #匹配图片地址
need = pattern.findall(data)
count = 0
for i in need:
    i = i + 'jpg' #补全网址
    with open('C:\\Users\\yaxin\\Desktop\\boot\\%s.jpg'%i[-8:-4],'wb') as f: #以网址中后8位到后5位作为文件名
        request = urllib2.Request(i, headers = headers)
        data_res = urllib2.urlopen(request)
        data_b = data_res.read() #二进制数据
        f.write(data_b)
        count = count + 1
        print 'done    '+ str(count)
print('done')#结束标志

posted on 2016-12-03 16:53  yaxin1989  阅读(357)  评论(0编辑  收藏  举报

导航