多线程下载图片

# -*- coding:utf8 -*-
from bs4 import BeautifulSoup
import os, sys, urllib2, urllib
import thread, threading
class downloader(threading.Thread):
    """docstring for downloader"""
    def __init__(self, url, name):
        threading.Thread.__init__(self)
        self.url = url
        self.name = name
    def run(self):
        print 'downling from %s' % self.url
        urllib.urlretrieve(self.url, self.name)
threads=[]
def page_loop(page=1):
    url = 'http://www.beautylegmm.com/Tiara/beautyleg-936.html?page=%s' % page
    content = urllib2.urlopen(url)
    soup = BeautifulSoup(content)
    my_girl = soup.find_all('img')  
    global x
    # 加入结束检测，写的不好....
    if len(my_girl) <5:
        print '已经全部抓取完毕'
        sys.exit(0)
    print '开始抓取'
    for girl in my_girl:
        link = girl.get('src')
        if 'jpg' in link:
            flink = 'http://www.beautylegmm.com' + link
        print flink
        path = 'dbmeizi'+'/' + str(x) + flink[-4:]
        x = x + 1
        t = downloader(flink, path)
        threads.append(t)
        t.start()
        # content2 = urllib2.urlopen(flink).read()
        # with open('dbmeizi'+'/' + str(x) + flink[-4:],'wb') as code: 
        #     code.write(content2)
       
    page = int(page) + 1
    print '开始抓取下一页'
    print 'the %s page' % page
    page_loop(page)
   
x = 1
page_loop()

来自为知笔记(Wiz)

posted @ 2015-02-06 16:34 阳光树林阅读(251) 评论(0) 收藏举报

刷新页面返回顶部

多线程下载图片

公告