python爬取煎蛋网图片
``` py2版本: #-*- coding:utf-8 -*- #from __future__ import unicode_liter import urllib,urllib2,time import re,sys,os headers={ 'Referer':'http://jandan.net/', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', } def getimg(n=''): req=urllib2.Request('http://jandan.net/ooxx'+n,headers=headers) html=urllib2.urlopen(req).read() if n=='': #print html reg= '''<div class="text"><span class="righttext"><a href="//jandan.net/ooxx/page-(.*?)#comment-.*?">[\d]*</a></span><p><a href="//.*?"''' else: reg= '''<div class="text"><span class="righttext"><a href="//jandan.net/ooxx/page-.*?#comment-.*?">[\d]*</a></span><p><a href="//(.*?)"''' reg=re.compile(reg) img=re.findall(reg,html) return img a=1 #创建下载图片的文件夹 #dirpath=os.path.splitext(path) 分离文件名与扩展名;默认返回(fname,fextension)元组,可做分片操作 dirpath=os.getcwd()+'/img' if not os.path.exists(dirpath): os.mkdir(dirpath) #下载图片 nu=raw_input('你想获取多少页(每页大概11张图片):') nowpage=getimg()[0] print nowpage for n in range(int(nowpage)-int(nu)+1,int(nowpage)+1): print '获取第%s页'%n for i in getimg('/page-%s'%n): print '正在下载第%s张图片'%a k=r'http://'+i print k urllib.urlretrieve(k, 'img/%s' %i.split('/')[-1]) #time.sleep(2) #出现响应超时的情况时加上这句 a+=1 ''' try: #urllib.urlretrieve(i,'img/%s'%str(i)[-8:-1]) urllib.urlretrieve(i,'img/%s.png'%a) a+=1 except: print "Unexpected error:", sys.exc_info()[0] ''' print '总共下载%s张图片'%(int(a)-1) py3版本: #from __future__ import unicode_liter import urllib.request import re,sys,os headers={ # 'Cookie':'__cfduid=df3295a9ee824f447e48bcda4f871d50f1505877948; _ga=GA1.2.1288199068.1505877788; _gid=GA1.2.1501836844.1505877789; _gat=1', 'Referer':'http://jandan.net/', 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', } def getimg(n=''): req=urllib.request.Request('http://jandan.net/ooxx'+n,headers=headers) html=urllib.request.urlopen(req).read().decode('utf-8') if n=='': #print html reg= '''<div class="text"><span class="righttext"><a href="//jandan.net/ooxx/page-(.*?)#comment-.*?">[\d]*</a></span><p><a href="//.*?"''' else: reg= '''<div class="text"><span class="righttext"><a href="//jandan.net/ooxx/page-.*?#comment-.*?">[\d]*</a></span><p><a href="//(.*?)"''' reg=re.compile(reg) img=re.findall(reg,html) return img a=1 #创建下载图片的文件夹 #dirpath=os.path.splitext(path) 分离文件名与扩展名;默认返回(fname,fextension)元组,可做分片操作 dirpath=os.getcwd()+'/img' if not os.path.exists(dirpath): os.mkdir(dirpath) #下载图片 nu=input('你想获取多少页(每页大概21张图片):') nowpage=getimg()[0] print(nowpage) for n in range(int(nowpage)-int(nu),int(nowpage)): print('获取第%s页'%n) for i in getimg('/page-%s'%n): print('正在下载第%s张图片'%a) k=r'http://'+i print(k) urllib.request.urlretrieve(k, 'img/%s' %i.split('/')[-1]) a+=1 ''' try: #urllib.urlretrieve(i,'img/%s'%str(i)[-8:-1]) urllib.urlretrieve(i,'img/%s.png'%a) a+=1 except: print "Unexpected error:", sys.exc_info()[0] ''' print('总共下载%s张图片'%(int(a)-1)) ```