异步回调，线程队列，协程

异步回调

1.以爬取网站数据为例

①异步提交任务，等所有任务执行完毕后，串行解析

缺点：任务的返回值不能得到即使的处理，必须等到任务都完成后，一起拿到结果，串行解析

import requests
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
from threading import current_thread
import time
import random
def get(url):
    print("%s 启动"%current_thread().name)
    time.sleep(random.randint(1,2))
    res = requests.get(url)
    if res.status_code == 200:
    	print("%s 结束" % current_thread().name)
    	return  res.content.decode("utf-8")
    # print(response.text)#返回文本
    # print(response.content.decode("utf-8"))#可以显示中文
    
def parser(res):
    print("%s 解析结果为 %s"%(current_thread().name,len(res)))
    
if __name__ == '__main__':
    tpool = ThreadPoolExecutor(4)
    urls = ["https://www.baidu.com",
"https://www.sina.com",
"https://www.tmall.com",
"https://www.taobao.com",
"https://www.jd.com",
"https://www.python.org",
"https://www.apple.com"]
    objs = []
    for i in urls:
        obj = tpool.submit(get,i)#异步提交任务
        objs.append(obj)
    tpool.shutdown(wait = True)
    for obj in objs:#串行解析
        parser(obj.result())

②爬取和解析放在一个函数内，实现了并发解析，相当于给线程加了一个任务

爬取和解析耦合性强

import requests
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
from threading import current_thread
import time
import random

def get(url):
    print("%s 启动"%current_thread().name)
    time.sleep(random.randint(1,2))
    res = requests.get(url).content.decode("utf-8")
    print("%s 结束" % current_thread().name)
    parser(res)
    # print(response.text)#返回文本
    # print(response.content.decode("utf-8"))#可以显示中文
def parser(res):
    print("%s 解析结果为 %s"%(current_thread().name,len(res)))
if __name__ == '__main__':
    tpool = ThreadPoolExecutor(4)
    urls = ["https://www.baidu.com",
"https://www.sina.com",
"https://www.tmall.com",
"https://www.taobao.com",
"https://www.jd.com",
"https://www.python.org",
"https://www.apple.com"]
    # objs = []
    for i in urls:
        obj = tpool.submit(get,i)

③

import requests
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
from threading import current_thread
import time
import random
def get(url):
    print("%s 启动"%current_thread().name)
    time.sleep(random.randint(1,2))
    res = requests.get(url).content.decode("utf-8")
    print("%s 结束" % current_thread().name)
    return res
    # print(response.text)#返回文本
    # print(response.content.decode("utf-8"))#可以显示中文
    
    
def parser(obj):#这里只能放对象
    res = obj.result()#对象拿到返回值后，才过来调用，所以不会有阻塞
    print("%s 解析结果为 %s"%(current_thread().name,len(res)))
    
    
if __name__ == '__main__':
    tpool = ThreadPoolExecutor(4)
    urls = ["https://www.baidu.com",
"https://www.sina.com",
"https://www.tmall.com",
"https://www.taobao.com",
"https://www.jd.com",
"https://www.python.org",
"https://www.apple.com"]
    for i in urls:
        obj = tpool.submit(get,i)
        obj.add_done_callback(parser)#将任务绑定方法，任务执行完毕后（拿到返回值），自动调用该方法（将拿到的返回值给方法）
 
 parser不会有阻塞，什么时候执行，其实是将对象传送给parser方法，对象执行完毕，拿到返回值，才会执行parser方法，故不会有阻塞

主进程交给子进程一个任务，子进程在执行完后，发一个信号给主进程，主进程调用它自己的函数

通常异步任务都会和回调函数一起使用

使用add_done_callback（）给future对象绑定一个回调函数

注意在多进程中，回调函数是交给主进程执行，而多线程中，回调函数是谁有空交给谁执行，但一定不是主线程执行

线程队列：与进程队列的区别，进程队列可以被多进程共享，而线程中的队列就是一个普通的容器不能进程共享

进程队列是申请一片共享的内存空间

#1.先进先出
import queue
q = queue.Queue()
#2.后进先出
q = queue.LiFoQueue()
#3.优先级队列
q = queue.PriorityQueue()
参数元组（优先级，数值）
优先级数值越小，优先级越高

事件

#event
#用于协调多个线程间的工作
#例如一个线程要执行某个操作，需要获取另一个线程的状态
#多线程之间传送消息
from threading import Event,Thread,current_thread
import time
e = Event()
def check():
    print("%s正在检测服务器"%current_thread().name)
    time.sleep(3)
    e.set()

def connect():
    print("%s 正在连接"%current_thread().name)
    e.wait()
    print("%s 连接成功"%current_thread().name)

if __name__ == '__main__':
    t1 =Thread(target=check)
    t2 = Thread(target=check)
    c1 = Thread(target=connect)
    c2 = Thread(target=connect)
    t1.start()
    t2.start()
    c1.start()
    c2.start()
    
    
def check():
    print("%s正在检测服务器"%current_thread().name)
    time.sleep(2)
    e.set()
def connect():
    for i in range(3):
        if e.wait(1):
            print("%s 连接成功"%current_thread().name)
            break
    else:
        print("%s 连接失败"%current_thread().name)

if __name__ == '__main__':
    t1 =Thread(target=check)
    c1 = Thread(target=connect)
    t1.start()
    c1.start()

协程

#单线程下实现并发
#并发指的多个任务看起来同时运行的
#并发实现的本质：切换加保存状态

优点：
协程的切换开销小，属于程序级别的切换，操作系统完全感知不到，因而更加轻量级
单线程内就可以实现并发的效果，更大限度的利用cpu
缺点：
协程的本质是单线程下，无法利用多核，可以是一个程序开启多进程，每个进程内开启多个线程，每个线程内开启协程
协程指的是单个线程，因而一旦协程出现阻塞，将会阻塞整个线程

greenlet模块

不能实现遇到IO切，底层封装了yield

greenlet底层封装了yield
from greenlet import greenlet
def eat(name):
    print("%s eat 1"%name)
    #import time
    #time.sleep(3)#greenlet 遇到IO并不会切换
    g2.switch("alex")
    print("%s eat 2"%name)
    g2.switch()
    
def play(name):
    print("%s play 1"%name)
	g1.switch()
    print("%s play 2"%name)

g1 = greenlet(eat)
g2 = greenlet(play)
g1.switch("egon")#第一次切的时候需要传参数
#switch 转换

gevent模块

from gevent import monkey;monkey.patch_all()#打补丁，不打的话，无法识别IO操作
import gevent
from threading import current_thread
def eat():
    print("%s eat 1"%current_thread().name)
    time.sleep(3)
    print("%s eat 2"%current_thread().name)
    
 def play():
    print("%s play 1"%current_thread().name)
    time.sleep(3)
    print("%s play 2"%current_thread().name)
 

 print(current_thread().name)
 g1 = gevent.spawn(eat)#提交任务
 g2 = gevent.spawn(play)
 gevent.joinall([g1,g2])#异步提交，没有这步的话，发起提交任务后，可能就退出程序了，连运行都没运行
"""执行结果：
MainThread
DummyThread-1 eat 1（dummy假的，假的线程1,2）
DummyThread-2 play 1
DummyThread-1 eat 2
DummyThread-2 play 2
"""

posted @ 2019-01-04 21:13 robertzhou 阅读(144) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

robertzhou

异步回调，线程队列，协程

异步回调

公告