线程协程和进程
不完整的单例模式
class Foo:
instance = None
def __init__(self, name):
self.name = name
def __new__(cls, *args, **kwargs):
# 返回空对象
if cls.instance:
return cls.instance
cls.instance = object.__new__(cls)
return cls.instance
obj1 = Foo('日魔')
obj2 = Foo('SB')
print(obj1,obj2)
爬虫爬图片
# 基于多线程
import threading
def task(arg):
ret = requests.get(arg)
file_name = arg.rsplit('/', maxsplit=1)[-1]
with open(file_name, mode='wb') as f:
f.write(ret.content)
for url in url_list:
# 实例化一个线程对象
t = threading.Thread(target=task,args=(url,))
# 将线程提交给cpu
t.start()
join 和start的题
import threading
# 示例1
"""
loop = 10000000
number = 0
def _add(count):
global number
for i in range(count):
number += 1
t = threading.Thread(target=_add,args=(loop,))
t.start()
t.join()
print(number)
"""
# 示例2
"""
loop = 10000000
number = 0
def _add(count):
global number
for i in range(count):
number += 1
def _sub(count):
global number
for i in range(count):
number -= 1
t1 = threading.Thread(target=_add,args=(loop,))
t2 = threading.Thread(target=_sub,args=(loop,))
t1.start()
t2.start()
print(number)
"""
# 示例3
"""
loop = 10000000
number = 0
def _add(count):
global number
for i in range(count):
number += 1
def _sub(count):
global number
for i in range(count):
number -= 1
t1 = threading.Thread(target=_add,args=(loop,))
t2 = threading.Thread(target=_sub,args=(loop,))
t1.start()
t2.start()
t1.join() # t1线程执行完毕,才继续往后走
t2.join() # t2线程执行完毕,才继续往后走
print(number)
"""
# 示例4
"""
loop = 10000000
number = 0
def _add(count):
global number
for i in range(count):
number += 1
def _sub(count):
global number
for i in range(count):
number -= 1
t1 = threading.Thread(target=_add,args=(loop,))
t2 = threading.Thread(target=_sub,args=(loop,))
t1.start()
t1.join() # t1线程执行完毕,才继续往后走
t2.start()
t2.join() # t2线程执行完毕,才继续往后走
print(number)
"""
守护线程
import threading
import time
def task(arg):
time.sleep(5)
print('任务')
t = threading.Thread(target=task,args=(11,))
t.setDaemon(False)
t.start()
print('END')
线程名称
import threading
def task(arg):
# 获取当前执行此代码的线程
name = threading.current_thread().getName()
print(name)
for i in range(10):
t = threading.Thread(target=task,args=(11,))
t.setName('日魔-%s' %i )
t.start()
自定义线程
import threading
class RiMo(threading.Thread):
def run(self):
print('执行此线程',self._args)
obj = RiMo(args=(100,))
obj.start()
3.2 线程的概念&与进程的区别?
形象的关系
- 工厂 -> 应用程序
- 车间 -> 进程
- 工人 -> 线程
进程和线程的区别?
进程是计算机资源分配的最小单位.
线程是计算机中可以被cpu调度的最小单位.
一个进程中可以有多个线程,同一个进程中的线程可以共享此进程中的资源,一个进程中至少有一个线程(一个应用程序中至少有一个进程)
在Python中因为有GIL锁,他限制在同一时刻一个进程中只能有一个线程被调度
计算密集型:多进程
IO密集型:多线程
默认进程之间无法进行资源共享,如果主要想要通讯可以基于:文件/网络/Queue.
socketserver的本质
import socket
import threading
def task(connect,address):
pass
server = socket.socket()
server.bind(('127.0.0.1',9000))
server.listen(5)
while True:
conn,addr = server.accept()
# 处理用户请求
t = threading.Thread(target=task,args=(conn,addr,))
t.start()
3.4 线程安全
多个线程同时去操作一个"东西",不要存在数据混乱.
线程安全: logging模块 / 列表
线程不安全: 自己做文件操作 / 同时修改一个数字
使用锁来保证数据安全,来了多个线程,使用锁让他们排队,逐一执行.
线程锁
import threading
import time
num = 0
# 线程锁
lock = threading.Lock()
def task():
global num
# # 申请锁
# lock.acquire()
# num += 1
# time.sleep(0.2)
# print(num)
# # 释放锁
# lock.release()
with lock:
num += 1
time.sleep(0.2)
print(num)
for i in range(10):
t = threading.Thread(target=task)
t.start()
递归锁
import threading
import time
num = 0
# 线程锁
lock = threading.RLock()
def task():
global num
# 申请锁
lock.acquire()
num += 1
lock.acquire()
time.sleep(0.2)
print(num)
# 释放锁
lock.release()
lock.release()
for i in range(10):
t = threading.Thread(target=task)
t.start()
加强版的单例模式
import threading
import time
class Singleton:
instance = None
lock = threading.RLock()
def __init__(self, name):
self.name = name
def __new__(cls, *args, **kwargs):
if cls.instance:
return cls.instance
with cls.lock:
if cls.instance:
return cls.instance
time.sleep(0.1)
cls.instance = object.__new__(cls)
return cls.instance
def task():
obj = Singleton('x')
print(obj)
for i in range(10):
t = threading.Thread(target=task)
t.start()
# 执行1000行代码
data = Singleton('asdfasdf')
print(data)
3.5GIL
GIL,全局解释器锁.
同一时刻保证一个进程中只有一个线程可以被cpu调度,所以在使用Python开发时要注意:
计算密集型,用多进程.
IO密集型,用多线程.
- Python中如果创建多现场无法应用计算机的多核优势.
4.重点总结
- 初识爬虫
- 单例模式 ,重点面试 (需要默写)
- 为什么要加锁?
- 为什么要做判断?
- 进程和线程的区别? ,重点.面试
- GIL锁, 重点面试
- 线程的常用功能: start/join , 重点
线程池
import time
from concurrent.futures import ThreadPoolExecutor
import threading
lock=threading.RLock()
def start(i,n):
print(id(lock))
time.sleep(1)
with lock:
print("任务1")
fool=ThreadPoolExecutor(10)
for i in range(100):
fool.submit(start,i,1)
print("end")
fool.shutdown(True)
print("所有任务执行结束")
进程池
# import time
# from concurrent.futures import ProcessPoolExecutor
#
# def tst(i):
# time.sleep(1)
# print("任务")
#
# if __name__ == '__main__':
#
# fool=ProcessPoolExecutor(10)
# for i in range(100):
# fool.submit(tst,i)
# print("end")
# fool.shutdown()
# print("所有任务结束")
# import time
# from concurrent.futures import ProcessPoolExecutor
# def start(i,n):
# time.sleep(1)
# print("任务1")
# if __name__ == '__main__':
# fool=ProcessPoolExecutor(10)
# for i in range(100):
# fool.submit(start,i,1)
#
# print("end")
# fool.shutdown(True)
# print("所有任务执行结束")
队列
from queue import Queue
q=Queue()
v2=q.put(12)
v1=q.get()
print(v1)
协程
from gevent import monkey
monkey.patch_all()
import gevent
import time
def fun1():
print(1)
time.sleep(0.1)
print(2)
def fun2():
print(3)
time.sleep(0.1)
print(4)
t1=gevent.spawn(fun1)
t2=gevent.spawn(fun2)
gevent.joinall([t1,t2])
协程+io+爬虫
from gevent import monkey
monkey.patch_all()
import gevent
import requests
def f1(url):
print('GET: %s' % url)
data = requests.get(url)
print('%d bytes received from %s.' % (len(data.content), url))
def f2(url):
print('GET: %s' % url)
data = requests.get(url)
print('%d bytes received from %s.' % (len(data.content), url))
def f3(url):
print('GET: %s' % url)
data = requests.get(url)
print('%d bytes received from %s.' % (len(data.content), url))
gevent.joinall([
gevent.spawn(f1, 'https://www.python.org/'),
gevent.spawn(f2, 'https://www.yahoo.com/'),
gevent.spawn(f3, 'https://github.com/'),
])
发邮件的示例
"""
1. 申请126或163邮箱
2. 开启服务+授权码
3. 通过代码发送
"""
import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr
# 写邮件的内容
msg = MIMEText('老板,我想演男一号,你想怎么着都行。', 'plain', 'utf-8')
msg['From'] = formataddr(["炮手", 'zh309603@163.com'])
msg['To'] = formataddr(["老板", '424662508@qq.com'])
msg['Subject'] = "情爱的导演"
server = smtplib.SMTP_SSL("smtp.163.com", 465)
server.login("zh309603", "zhzzhz123") # 授权码
server.sendmail('zh309603@163.com', ['424662508@qq.com', ], msg.as_string())
server.quit()
生成者消费者模型
from queue import Queue
import threading
q=Queue()
def fun():#从队列中拿任务处理
while True :
s=q.get()
print(s)
def fun1(i):#将接到的任务放到队列中
s=q.put(i)
for i in range(10):
t=threading.Thread(target=fun1,args=(i,))
t.start()
for i in range(5):
t=threading.Thread(target=fun)
t.start()
线程池
import time
from concurrent.futures import ThreadPoolExecutor
import threading
lock=threading.RLock()
def start(i,n):
print(id(lock))
time.sleep(1)
with lock:
print("任务1")
fool=ThreadPoolExecutor(10)
for i in range(100):
fool.submit(start,i,1)
print("end")
fool.shutdown(True)
print("所有任务执行结束")
进程池
# import time
# from concurrent.futures import ProcessPoolExecutor
#
# def tst(i):
# time.sleep(1)
# print("任务")
#
# if __name__ == '__main__':
#
# fool=ProcessPoolExecutor(10)
# for i in range(100):
# fool.submit(tst,i)
# print("end")
# fool.shutdown()
# print("所有任务结束")
# import time
# from concurrent.futures import ProcessPoolExecutor
# def start(i,n):
# time.sleep(1)
# print("任务1")
# if __name__ == '__main__':
# fool=ProcessPoolExecutor(10)
# for i in range(100):
# fool.submit(start,i,1)
#
# print("end")
# fool.shutdown(True)
# print("所有任务执行结束")
队列
from queue import Queue
q=Queue()
v2=q.put(12)
v1=q.get()
print(v1)
协程
from gevent import monkey
monkey.patch_all()
import gevent
import time
def fun1():
print(1)
time.sleep(0.1)
print(2)
def fun2():
print(3)
time.sleep(0.1)
print(4)
t1=gevent.spawn(fun1)
t2=gevent.spawn(fun2)
gevent.joinall([t1,t2])
协程+io+爬虫
from gevent import monkey
monkey.patch_all()
import gevent
import requests
def f1(url):
print('GET: %s' % url)
data = requests.get(url)
print('%d bytes received from %s.' % (len(data.content), url))
def f2(url):
print('GET: %s' % url)
data = requests.get(url)
print('%d bytes received from %s.' % (len(data.content), url))
def f3(url):
print('GET: %s' % url)
data = requests.get(url)
print('%d bytes received from %s.' % (len(data.content), url))
gevent.joinall([
gevent.spawn(f1, 'https://www.python.org/'),
gevent.spawn(f2, 'https://www.yahoo.com/'),
gevent.spawn(f3, 'https://github.com/'),
])
发邮件的示例
"""
1. 申请126或163邮箱
2. 开启服务+授权码
3. 通过代码发送
"""
import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr
# 写邮件的内容
msg = MIMEText('老板,我想演男一号,你想怎么着都行。', 'plain', 'utf-8')
msg['From'] = formataddr(["炮手", 'zh309603@163.com'])
msg['To'] = formataddr(["老板", '424662508@qq.com'])
msg['Subject'] = "情爱的导演"
server = smtplib.SMTP_SSL("smtp.163.com", 465)
server.login("zh309603", "zhzzhz123") # 授权码
server.sendmail('zh309603@163.com', ['424662508@qq.com', ], msg.as_string())
server.quit()
生成者消费者模型
from queue import Queue
import threading
q=Queue()
def fun():#从队列中拿任务处理
while True :
s=q.get()
print(s)
def fun1(i):#将接到的任务放到队列中
s=q.put(i)
for i in range(10):
t=threading.Thread(target=fun1,args=(i,))
t.start()
for i in range(5):
t=threading.Thread(target=fun)
t.start()
进程,线程,协程的区别:三个都可以提高并发
- 进程是计算机分配资源的最小单位;线程是计算机中cpu 调度的最小单位
- 协程又称为“微线程”,是基于代码创造出来的,而进程和线程都是计算机中真实存在的,一个进程可以有多个线程,一个线程可以创造多个协程
- 计算密集型:多进程
- io密集型:多线程或协程+io切换
- 单纯的协程没有办法提高并发,只是代码之间的来回切换,叫上io才有意义