Python 爬取煎蛋网妹子图片代码
Python 2.7 代码
# -*- coding: utf-8 -*- """ Created on Thu Jan 19 20:06:53 2017 @author: chaz """ import urllib2 import re import threading class jdmz(): count = 1 headers = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} def getpage(self,pagenum): url = urllib2.Request('http://jandan.net/ooxx/page-'+pagenum+'#comments',headers = self.headers) response = urllib2.urlopen(url).read() return response def getimg(self,pagenum): html = self.getpage(pagenum) reg = re.compile(r'<p><a href="//(.*?)" target="_blank" class="view_img_link">') imgaddr = re.findall(reg,html) return imgaddr def saveimg(self,pagenum): url_img = self.getimg(pagenum) for i in url_img: url = urllib2.Request("https://"+i,headers = self.headers) resp = urllib2.urlopen(url).read() self.count += 1 with open(str(self.count)+i[-4:],"wb") as f: f.write(resp) mz = jdmz() print 'starting' for i in range(2030,2080):#爬取得页数 th = threading.Thread(target = mz.saveimg,args = (str(i),)) th.start() #mz.saveimg(str(i))