【一】threading模块介绍
- 多线程创建和多进程创建很像
- 我的理解是threading模块的作者遵循了鸭子类型
- 所以和multiprocessing模块的使用方法那么像
【二】开启线程的两种方式
方式一
from threading import Thread
import time
def task(name):
print(f'{name}任务开始')
time.sleep(2)
print(f'{name}任务结束')
if __name__ == '__main__':
t = Thread(target=task, args=('学习',))
t.start()
print('主线程')
方式二
from threading import Thread
import time
class MyThread(Thread):
def __init__(self, name):
super().__init__()
self.name = name
def run(self) -> None:
print(f'{self.name}任务开始')
time.sleep(2)
print(f'{self.name}任务结束')
if __name__ == '__main__':
t = MyThread(name='打篮球')
t.start()
print('主线程')
- 用法基本和multiprocessing模块,也就是创建进程的方式一模一样
【三】查看ID
- 用threading模块里面的current_thread方法
from threading import Thread,current_thread
import time
def task(name):
print(f'{name}任务开始')
print(current_thread().native_id)
time.sleep(2)
print(f'{name}任务结束')
if __name__ == '__main__':
t = Thread(target=task, args=('学习',))
t.start()
print('主线程')
from threading import Thread
import time
from os import getpid
def task(name):
print(f'{name}任务开始')
print(getpid())
time.sleep(2)
print(f'{name}任务结束')
if __name__ == '__main__':
t = Thread(target=task, args=('学习',))
t.start()
print('主线程')
【四】多线程并发的socket服务端
from threading import Thread
import socket
from socket import SOL_SOCKET, SO_REUSEADDR
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
server.bind(('127.0.0.1', 8080))
server.listen(5)
def talk(conn):
while True:
try:
msg = conn.recv(1024).decode('utf-8')
print(f'客户端:>>>>{msg}')
conn.send(msg.upper().encode('utf-8'))
except Exception as error:
print(error)
break
conn.close()
def threading_version(conn):
t = Thread(target=talk, args=(conn,))
t.start()
def main_threading():
while True:
conn, addr = server.accept()
threading_version(conn=conn)
if __name__ == '__main__':
main_threading()
from socket import *
# 不写参数:默认是TCP协议
# (1)创建客户端对象
client = socket()
# (2)绑定 IP PORT
# (2)建立链接桥梁 --(呼应客户端的 ip 和 port)
IP = '127.0.0.1'
PORT = 8080
client.connect((IP, PORT))
# (4)链接循环
while True:
# (4.1)向服务端发数据
msg_to_server = input('请输入消息:>>>>').strip()
if not msg_to_server:
continue
client.send(msg_to_server.encode('utf-8'))
# 接受服务器返回的数据
data_from_server = client.recv(1024)
print(data_from_server.decode('utf-8'))
if msg_to_server == 'q':
break
client.close()
【五】单进程,多进程,多线程速度比较
import requests
import os
import time
from lxml import etree
from multiprocessing import Process
from threading import Thread
def timer(func):
def inner(*args, **kwargs):
start_time = time.time()
func(*args, **kwargs)
print(f'总耗时{time.time() - start_time}秒')
return inner
def create_url_list():
url_list = []
for i in range(1, 6):
if i == 1:
url = 'https://pic.netbian.com/4kmeinv/'
url_list.append(url)
else:
url = f'https://pic.netbian.com/4kmeinv/index_{i}.html'
url_list.append(url)
return url_list
def spider_data(url_list):
data_dic = []
for url in url_list:
response = requests.get(url=url)
response.encoding = 'gbk'
response_text = response.text
html_obj = etree.HTML(response_text)
li_list = html_obj.xpath('//*[@id="main"]/div[3]/ul/li')
for li in li_list:
src = "https://pic.netbian.com/" + li.xpath('./a/img/@src')[0]
title = li.xpath('./a/b/text()')[0]
data_dic.append({'src': src, 'title': title})
return data_dic
def save_data(file_title, file_src):
file_name = 'image'
file_path = os.path.join(os.path.dirname(__file__), file_name)
os.makedirs(file_path, exist_ok=True)
file_path = os.path.join(file_path, f'{file_title}.png')
data = requests.get(file_src)
with open(file_path, 'wb') as fp:
fp.write(data.content)
print(f'当前图片{file_title}保存完成')
@timer
def main_normal():
url_list = create_url_list()
data_list = spider_data(url_list=url_list)
for data in data_list:
image_src = data.get('src')
image_title = data.get('title')
save_data(image_title, image_src)
@timer
def main_process():
url_list = create_url_list()
data_list = spider_data(url_list=url_list)
p_list = []
for data in data_list:
image_src = data.get('src')
image_title = data.get('title')
p = Process(target=save_data, args=(image_title, image_src))
p.start()
p_list.append(p)
for p in p_list:
p.join()
@timer
def main_thread():
url_list = create_url_list()
data_list = spider_data(url_list=url_list)
p_list = []
for data in data_list:
image_src = data.get('src')
image_title = data.get('title')
p = Thread(target=save_data, args=(image_title, image_src))
p.start()
p_list.append(p)
for p in p_list:
p.join()
if __name__ == '__main__':
...
# main_normal() 34s
# main_process() 9.7s
# main_thread() 7s
- 可以看到在多IO阻塞时的速度比较结果为
- 多线程>多进程>单进程
【六】线程之间数据共享
from threading import Thread
age = 20
def task():
global age
age -= 2
print(f'我是子线程的age:>>>>{age}')
if __name__ == '__main__':
t = Thread(target=task, args=())
t.start()
print(f'我是主线程的age:>>>>{age}')
'''
我是子线程的age:>>>>18
我是主线程的age:>>>>18
'''
- 可以看到我在子线程中修改了主线程的age
- 主线程的age也修改了
- 说明线程之间数据共享
【七】不同的线程的进程ID是相同的
from threading import Thread
import os
age = 20
def task():
global age
age -= 2
print(f'我是子线程的age:>>>>{age}')
print(f'进程ID:>>>>{os.getpid()}')
if __name__ == '__main__':
t = Thread(target=task, args=())
t.start()
print(f'我是主线程的age:>>>>{age}')
print(f'进程ID:>>>>{os.getpid()}')
'''
进程ID:>>>>15792
进程ID:>>>>15792
'''
【八】查看线程名称
- 首先要导入threading模块下的current_thread方法
- 通过这个方法.name就可以拿到当前线程的名称
from threading import Thread, current_thread
def task(i):
print(f'线程程{i}名称:>>>>{current_thread().name}')
if __name__ == '__main__':
t = [Thread(target=task, args=(i,)).start() for i in range(1, 6)]
print(f'主线程名称:>>>>{current_thread().name}')
'''
线程程1名称:>>>>Thread-1 (task)
线程程2名称:>>>>Thread-2 (task)
线程程3名称:>>>>Thread-3 (task)
线程程4名称:>>>>Thread-4 (task)
线程程5名称:>>>>Thread-5 (task)
主线程名称:>>>>MainThread
'''
【九】查看活跃的线程数量
- 首先要导入threading模块下的active_count方法
from threading import Thread, current_thread, active_count
import time
def task(i):
print(f'线程程{i}名称:>>>>{current_thread().name}')
time.sleep(2)
if __name__ == '__main__':
t = [Thread(target=task, args=(i,)).start() for i in range(1, 6)]
print(f'主线程名称:>>>>{current_thread().name}')
print(f'当前活动线程的数量{active_count()}')
'''
线程程1名称:>>>>Thread-1 (task)
线程程2名称:>>>>Thread-2 (task)
线程程3名称:>>>>Thread-3 (task)
线程程4名称:>>>>Thread-4 (task)
线程程5名称:>>>>Thread-5 (task)
主线程名称:>>>>MainThread
当前活动线程的数量6
'''
【九】守护线程
- 一般情况下主线程会等待子线程运行结束才结束
- 因为当主线程结束时,就意味着这个进程就结束了
- 可如果其他子线程还在运行,就没办法在拿到这个进程的资源进行正常工作了
- 我们也可以通过跟添加守护进程一样的方式为线程添加守护进程
- 如果给子线程添加守护线程,主线程 就不会等待子线程结束了才结束
- 而是根据除守护线程的其他线程是否结束而结束
from threading import Thread
import time
def task_1():
print('任务一开始')
time.sleep(1)
print('任务一结束')
def task_2():
print('任务二开始')
time.sleep(2)
print('任务二结束')
if __name__ == '__main__':
t1 = Thread(target=task_1, daemon=True)
t2 = Thread(target=task_2)
t1.start()
t2.start()
print('我是主线程')
'''
任务一开始
任务二开始
我是主线程
任务一结束
任务二结束
'''
可以看到,作为守护线程t1,没有为主线程陪葬
因为主线程正在等t2结束
但是如果t1比t2运行时间长,一旦t2运行结束,主线程就会结束运行,然后t1就会跟着主线程一起陪葬
from threading import Thread
import time
def task_1():
print('任务一开始')
time.sleep(3)
print('任务一结束')
def task_2():
print('任务二开始')
time.sleep(2)
print('任务二结束')
if __name__ == '__main__':
t1 = Thread(target=task_1, daemon=True)
t2 = Thread(target=task_2)
t1.start()
t2.start()
print('我是主线程')
'''
任务一开始
任务二开始
我是主线程
任务二结束
'''
【十】线程间互斥锁
抢票例子模板
import json
from threading import Thread, Lock
import os
import time
import random
def init_data():
file_path = os.path.join(os.path.dirname(__file__), 'ticket')
os.makedirs(file_path, exist_ok=True)
file_name = os.path.join(file_path, 'tickets.json')
if not os.path.exists(file_name):
with open(file_name, 'w', encoding='utf-8') as fp:
json.dump({'tickets': 3}, fp, ensure_ascii=False)
return file_name
def search_tickets():
file_name = init_data()
with open(file_name, 'r', encoding='utf-8') as fp:
data = json.load(fp)
tickets_num = data.get('tickets')
return tickets_num, data, file_name
def buy_tickets(i):
tickets_num, data, file_name = search_tickets()
if tickets_num > 0:
tickets_num -= 1
print(f'用户{i}购票成功')
data['tickets'] = tickets_num
with open(file_name, 'w', encoding='utf-8') as fp:
json.dump(data, fp, ensure_ascii=False)
else:
print(f'用户{i}购买失败:>>>>余票不足')
def thread_main(mutex, i):
mutex.acquire()
time.sleep(random.randint(1, 2))
buy_tickets(i)
mutex.release()
if __name__ == '__main__':
mutex = Lock()
t_list = [Thread(target=thread_main, args=(mutex, i)) for i in range(1,6)]
for t in t_list:
t.start()
for t in t_list:
t.join()
'''
用户1购票成功
用户2购票成功
用户3购票成功
用户4购买失败:>>>>余票不足
用户5购买失败:>>>>余票不足
'''