生产者和消费者模型

一.锁机制

1.普通锁

import threading,random,time
gMoney=1000
gTotalTimes=10
gTtimes=0
gLock=threading.Lock()

class Producer(threading.Thread):
    def run(self):
        global gMoney
        global gTtimes
        while True:
            money=random.randint(100,1000)
            gLock.acquire()
            if gTtimes>=gTotalTimes:
                gLock.release()
                break
            gMoney+=money
            print("%s挣了%d元钱,剩余%d元钱"%(threading.currentThread(),money,gMoney))
            gTtimes+=1
            gLock.release()
            time.sleep(0.5)

class Consumer(threading.Thread):
    def run(self):
        global gMoney
        while True:
            money=random.randint(100,1000)
            gLock.acquire()
            if gMoney>=money:
                gMoney-=money
                print("%s消费了%d元钱,剩余%d元钱"%(threading.current_thread(),money,gMoney))
            else:
                if gTtimes>=gTotalTimes:
                    gLock.release()
                    break
                print("%s准备消费%d元钱,但是余额不足"%(threading.current_thread(),money))
            gLock.release()
            time.sleep(0.5)

def main():
    for x in range(3):
        t=Consumer(name="消费者线程%d"%x)
        t.start()

    for x in range(5):
        t=Producer(name="生产者线程%d"%x)
        t.start()

if __name__=="__main__":
    main()

2.Condition

import threading
import random
from time import sleep


ct = threading.Condition()

all_money = 1000  # 基础金钱1000元
count = 10  # 限制生产者只可以生产十次


class producers(threading.Thread):
    '''生产者模式'''
    def run(self):
        global all_money
        global count

        while True:
            ct.acquire()  # 处理数据前,先上锁
            if count > 0:  # 如果生产次数小于十次
                money = random.randint(200,1000)  # 随机生产200-1000元
                all_money += money  # 总金钱数 = 原总金钱数+生产金钱数
                count -= 1  # 允许生产次数-1
                print('生产者%s生产了%d元,剩余金钱%d元' % (threading.current_thread(), money, all_money))
            else:  # 如果生产次数已满10次
                ct.release()  # 解锁
                break  # 生产结束,跳出循环
            ct.notify_all()  # 通知所有等待中的消费者,生产已完成,可以开始消费
            ct.release()  # 解锁
            sleep(0.5)


class comsumer(threading.Thread):
    '''消费者模式'''
    def run(self):
        global all_money
        global count

        while True:
            ct.acquire()  # 处理数据前,先上锁
            money = random.randint(200,1000)  # 随机消费200-1000元
            # 下面这个while是重点!(敲黑板,记笔记,后面我会说到的)
            while money > all_money:  # 如果需消费金额大于总金额,则等待至总金额大于需消费金钱
                if count == 0:  # 如果生产者生产次数已达上限
                    ct.release()  # 结束前解锁
                    return  # 结束函数
                print('消费者%s需要消费%d元,剩余金钱%d元,不足' % (threading.current_thread(), money, all_money))
                ct.wait()  # 进入等待(阻塞进程)
            all_money -= money  # 剩余金额大于消费金额时,总金额 = 原总金额 - 消费金额
            print('消费者%s消费了%d元,剩余金钱%d元' % (threading.current_thread(), money, all_money))
            ct.release()  # 解锁
            sleep(0.5)


if __name__ == '__main__':
    for i in range(3):
        th = comsumer(name='线程%d'%i)
        th.start()

    for i in range(5):
        th = producers(name='线程%d'%i)
        th.start()

 3.Queue

import threading
from queue import Queue
import time


def set_value(qu):
    '''生成元素放入列队'''
    index = 0
    while True:
        qu.put(index)
        index += 1
        start = time.time()
        time.sleep(2)  # 每隔三秒放入一个
        end = time.time()
        print('阻塞时间为:',end-start,'秒threading.Thread')


def get_value(qu):
    while True:
        print('数据:',qu.get())  # 列队中有数据就取出来,没有就等待


if __name__ == '__main__':
    qu = Queue(4)
    t1 = threading.Thread(target=set_value,args=[qu])
    t2 = threading.Thread(target=get_value,args=[qu])

    t1.start()
    t2.start()

 

二.多线程下载表情包

1.不用多线程

import requests,re
from lxml import etree
from urllib import request
def get_page(url):
    header={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }
    response=requests.get(url,headers=header)
    text=response.text
    html=etree.HTML(text)
    imgs=html.xpath('//div[@class="page-content text-center"]//a[@class="col-xs-6 col-sm-3"]//img[@class!="gif"]')
    for img in imgs:
        img_url=img.get("data-original") #获取图片URL
        suffix=img_url.split(".")[-1]   #获取后缀名JPG或者GIF
        alt=img.get("alt")              #获取名字
        alt=re.sub(r'[\?\.\*,!!?。]',"",alt)  #替换名字里的掉特殊字符
        filename=alt+"."+suffix     
        request.urlretrieve(img_url,"imgs/"+filename)

def main():
    for x in range(10):
        url="http://www.doutula.com/photo/list/?page=%s" %(x)
        get_page(url)
        
if __name__=="__main__":
    main()

 2.利用多线程

 main()

  • 定义两个队列,和创建多线程
  • page_queue():存放每一页的url
  • img_queue():存放每一页里面所有的表情的url

Producer()

  • 从page_queue()队列中去每一页的url,直到队列为空则break
  • 用xpath提取出每一页的所有图片的url
  • 把每个图片的url和名字存放到img_queue()队列里面

Consumer()

  • 从img_queue()队列中取出图片的url和名字
  • 下载保存
  • 直到page_queue()和img_queue()两个队列都为空则break
import requests
from lxml import etree
from urllib import request
import os
import re
import threading
from queue import Queue

class Producer(threading.Thread):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36',
        'Referer': 'https://movie.douban.com/'
    }

    def __init__(self, page_queue, img_queue, *args, **kwargs):
        super(Producer, self).__init__(*args, **kwargs)
        self.page_queue = page_queue
        self.img_queue = img_queue

    def run(self):
        while True:
            if self.page_queue.empty():
                break
            url = self.page_queue.get()
            self.parse_page(url)

    def parse_page(self,url):
        response = requests.get(url,headers=self.headers)
        text = response.text
        html = etree.HTML(text)
        imgs = html.xpath("//div[@class='page-content text-center']//img[@class!='gif']")
        for img in imgs:
            # print(etree.tostring(img))
            #图片地址
            img_url = img.get('data-original')
            #图片名字
            alt = img.get('alt')
            #替换掉名字里面的特殊字符
            alt = re.sub(r'[\??\.,。!!\*]','',alt)
            #获取图片的后缀名(.gif .jpg)
            suffix = os.path.splitext(img_url)[1]
            #保存的时候完整的图片名字
            filename = alt + suffix
            self.img_queue.put((img_url,filename))


class Consumer(threading.Thread):
    def __init__(self,page_queue,img_queue,*args,**kwargs):
        super(Consumer, self).__init__(*args,**kwargs)
        self.page_queue = page_queue
        self.img_queue = img_queue

    def run(self):
        while True:
            if self.img_queue.empty() and self.page_queue.empty():
                break
            img_url,filename = self.img_queue.get()
            request.urlretrieve(img_url, 'C:/Users/Administrator/Desktop/images/' + filename)
            print("已下载完一张图片")


def main():
    page_queue = Queue(1000)
    img_queue = Queue(10000)

    for x in range(1,1758):
        url = 'http://www.doutula.com/photo/list/?page=%d'%x
        page_queue.put(url)

    for x in range(10):
        t = Producer(page_queue,img_queue)
        t.start()

    for x in range(10):
        t = Consumer(page_queue,img_queue)
        t.start()

if __name__ == '__main__':
    main()

 

posted @ 2018-08-21 18:11  从此重新定义啦  阅读(297)  评论(0编辑  收藏  举报