Python笔记——多线程与队列
在python中内置多线程threading 与queue队列模块
python自带的解释器是cpython, 带有一把全局解释器锁GIL,线程只能工作在一个进程中, 所以它是个假的多线程, 但是对于I/O操作还是可以有很大提升
import threading # 导入多线程模块
多线程的简单使用:
——针对于函数调用, target指定函数名, args指定函数参数[列表形式]
def first_thread():
for i in range(3):
# 打印当前进程名 threading.currentThread()
print('第一个函数的', threading.currentThread())
time.sleep(1)
def second_thread(n):
for i in range(n):
print('第二个函数的', i)
time.sleep(1)
def main():
# 创建进程对象 , target= 指定函数名,函数参数使用 args=[]
t1 = threading.Thread(target=first_thread)
t2 = threading.Thread(target=second_thread, args=[3])
# 启动进程
t1.start()
t2.start()
# 打印当前总进程数
print(threading.enumerate())
if __name__ == '__main__':
main()
继承多线程类写法:
——定义继承threading.Tread类的类对象, 重写run方法
class FirstThread(threading.Thread):
def run(self):
for i in range(3):
# 打印当前进程名
print('第一个函数的', threading.currentThread())
time.sleep(1)
class SecondThread(threading.Thread):
def run(self):
for i in range(3):
print('第二个函数的', i)
time.sleep(1)
def main():
t1 = FirstThread()
t2 = SecondThread()
t1.start()
t2.start()
if __name__ == '__main__':
main()
多线程锁:
当在函数中对全局变量修改时,或者写入数据时, 应当加锁, 防止数据混乱
# 定义全局变量
count = 0
# 创建进程锁
thread_lock = threading.Lock()
# 当add_count函数不能在生成下一个线程前执行完成, 会发生数据混乱
def add_count():
# 给当前进程加锁
thread_lock.acquire()
global count
for i in range(1000000):
count += 1
print(count)
# 解锁
thread_lock.release()
def main():
for i in range(2):
t = threading.Thread(target=add_count)
t.start()
if __name__ == '__main__':
main()
Queue队列 & 多线程生产者与消费者:
queue队列:线程安全的队列(类似线程加锁)
from queue import Queue
# 创建队列, 指定大小为10
q = Queue(10)
# 查看队列是否为空, 为空返回True
print(q.empty())
# 查看队列是否已经满了
print(q.full())
# 向队列添加元素
q.put(1)
# h获取队列元素
print(q.get())
将线程分工明确,生产者进程可以负责数据的抓取, 消费者可以负责数据的提取和持久化
# -*- coding: utf-8 -*-
# @Author : LuoXian
# @Date : 2020/2/15 22:21
# Software : PyCharm
# version: Python 3.8
# @File : 爬取表情包多线程队列.py
import requests
import re
import threading
from queue import Queue
# 定义生产者,负责获取图片的url地址和名称
class Producer(threading.Thread):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36',
}
def __init__(self, page_queue, img_queue, *args, **kwargs):
super(Producer, self).__init__(*args, **kwargs)
self.page_queue = page_queue
self.img_queue = img_queue
def run(self):
while True:
# 当队列为空时结束线程
if self.page_queue.empty():
break
url = self.page_queue.get()
re_info = re.compile('data-original="(.*?)" alt="(.*?)"')
r = requests.get(url, headers=self.headers)
src_title = re_info.findall(r.text)
for i in src_title:
src = i[0]
path = re.sub(r'[\??\*\":]', '', f'表情包/{i[1]}.{src.split(".")[-1]}')
# 将数据添加到img队列
self.img_queue.put((src, path))
# 定义消费者线程, 负责保存图片
class Consumer(threading.Thread):
def __init__(self, page_queue, img_queue, *args, **kwargs):
super(Consumer, self).__init__(*args, **kwargs)
self.page_queue = page_queue
self.img_queue = img_queue
def run(self):
while True:
# 当页面队列与图片队列都为空时, 结束线程
if self.page_queue.empty() and self.img_queue.empty():
break
src, path = self.img_queue.get()
r = requests.get(src).content
with open(path, 'wb') as f:
f.write(r)
print(path, ' Done...')
def main():
# 创建队列
page_queue = Queue(100)
img_queue = Queue(1000)
# 添加页面队列
for i in range(1, 10):
url = f'http://www.doutula.com/photo/list/?page={i}'
page_queue.put(url)
# 创建5个生产者
for i in range(5):
t = Producer(page_queue, img_queue)
t.start()
# 创建5个消费者
for i in range(5):
t = Consumer(page_queue, img_queue)
t.start()
if __name__ == '__main__':
main()