Loading

Python笔记——多线程与队列

在python中内置多线程threading 与queue队列模块

python自带的解释器是cpython, 带有一把全局解释器锁GIL,线程只能工作在一个进程中, 所以它是个假的多线程, 但是对于I/O操作还是可以有很大提升

import threading  # 导入多线程模块

多线程的简单使用:

——针对于函数调用, target指定函数名, args指定函数参数[列表形式]

def first_thread():
    for i in range(3):
        # 打印当前进程名 threading.currentThread()
        print('第一个函数的', threading.currentThread())
        time.sleep(1)


def second_thread(n):
    for i in range(n):
        print('第二个函数的', i)
        time.sleep(1)


def main():
    # 创建进程对象 , target= 指定函数名,函数参数使用 args=[]
    t1 = threading.Thread(target=first_thread)
    t2 = threading.Thread(target=second_thread, args=[3])
    # 启动进程
    t1.start()
    t2.start()
    # 打印当前总进程数
    print(threading.enumerate())
    

if __name__ == '__main__':
    main()

继承多线程类写法:

——定义继承threading.Tread类的类对象, 重写run方法

class FirstThread(threading.Thread):
    def run(self):
        for i in range(3):
            # 打印当前进程名
            print('第一个函数的', threading.currentThread())
            time.sleep(1)


class SecondThread(threading.Thread):
    def run(self):
        for i in range(3):
            print('第二个函数的', i)
            time.sleep(1)


def main():
    t1 = FirstThread()
    t2 = SecondThread()

    t1.start()
    t2.start()


if __name__ == '__main__':
    main()

多线程锁:

当在函数中对全局变量修改时,或者写入数据时, 应当加锁, 防止数据混乱

# 定义全局变量
count = 0
# 创建进程锁
thread_lock = threading.Lock()


# 当add_count函数不能在生成下一个线程前执行完成, 会发生数据混乱
def add_count():
    # 给当前进程加锁
    thread_lock.acquire()
    global count
    for i in range(1000000):
        count += 1
    print(count)
    # 解锁
    thread_lock.release()


def main():
    for i in range(2):
        t = threading.Thread(target=add_count)
        t.start()


if __name__ == '__main__':
    main()

Queue队列 & 多线程生产者与消费者:

queue队列:线程安全的队列(类似线程加锁)

from queue import Queue

# 创建队列, 指定大小为10
q = Queue(10)

# 查看队列是否为空, 为空返回True
print(q.empty())
# 查看队列是否已经满了
print(q.full())

# 向队列添加元素
q.put(1)
# h获取队列元素
print(q.get())

将线程分工明确,生产者进程可以负责数据的抓取, 消费者可以负责数据的提取和持久化

# -*- coding: utf-8 -*-
# @Author  : LuoXian
# @Date    : 2020/2/15 22:21
# Software : PyCharm
# version: Python 3.8
# @File    : 爬取表情包多线程队列.py
import requests
import re
import threading
from queue import Queue


# 定义生产者,负责获取图片的url地址和名称
class Producer(threading.Thread):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36',
    }

    def __init__(self, page_queue, img_queue, *args, **kwargs):
        super(Producer, self).__init__(*args, **kwargs)
        self.page_queue = page_queue
        self.img_queue = img_queue

    def run(self):
        while True:
            # 当队列为空时结束线程
            if self.page_queue.empty():
                break
            url = self.page_queue.get()
            re_info = re.compile('data-original="(.*?)" alt="(.*?)"')
            r = requests.get(url, headers=self.headers)
            src_title = re_info.findall(r.text)
            for i in src_title:
                src = i[0]
                path = re.sub(r'[\??\*\":]', '', f'表情包/{i[1]}.{src.split(".")[-1]}')
                # 将数据添加到img队列
                self.img_queue.put((src, path))


# 定义消费者线程, 负责保存图片
class Consumer(threading.Thread):
    def __init__(self, page_queue, img_queue, *args, **kwargs):
        super(Consumer, self).__init__(*args, **kwargs)
        self.page_queue = page_queue
        self.img_queue = img_queue

    def run(self):
        while True:
            # 当页面队列与图片队列都为空时, 结束线程
            if self.page_queue.empty() and self.img_queue.empty():
                break
            src, path = self.img_queue.get()
            r = requests.get(src).content
            with open(path, 'wb') as f:
                f.write(r)
            print(path, '       Done...')


def main():
    # 创建队列
    page_queue = Queue(100)
    img_queue = Queue(1000)
    # 添加页面队列
    for i in range(1, 10):
        url = f'http://www.doutula.com/photo/list/?page={i}'
        page_queue.put(url)
    # 创建5个生产者
    for i in range(5):
        t = Producer(page_queue, img_queue)
        t.start()
    # 创建5个消费者
    for i in range(5):
        t = Consumer(page_queue, img_queue)
        t.start()


if __name__ == '__main__':
    main()

posted @ 2020-02-16 13:08  luoxian  阅读(34)  评论(0编辑  收藏  举报