异步回调,线程队列,协程

异步回调

1.以爬取网站数据为例

①异步提交任务,等所有任务执行完毕后,串行解析

缺点:任务的返回值不能得到即使的处理,必须等到任务都完成后,一起拿到结果,串行解析

import requests
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
from threading import current_thread
import time
import random
def get(url):
    print("%s 启动"%current_thread().name)
    time.sleep(random.randint(1,2))
    res = requests.get(url)
    if res.status_code == 200:
    	print("%s 结束" % current_thread().name)
    	return  res.content.decode("utf-8")
    # print(response.text)#返回文本
    # print(response.content.decode("utf-8"))#可以显示中文
    
def parser(res):
    print("%s 解析结果为 %s"%(current_thread().name,len(res)))
    
if __name__ == '__main__':
    tpool = ThreadPoolExecutor(4)
    urls = ["https://www.baidu.com",
"https://www.sina.com",
"https://www.tmall.com",
"https://www.taobao.com",
"https://www.jd.com",
"https://www.python.org",
"https://www.apple.com"]
    objs = []
    for i in urls:
        obj = tpool.submit(get,i)#异步提交任务
        objs.append(obj)
    tpool.shutdown(wait = True)
    for obj in objs:#串行解析
        parser(obj.result())

②爬取和解析放在一个函数内,实现了并发解析,相当于给线程加了一个任务

爬取和解析耦合性强

import requests
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
from threading import current_thread
import time
import random

def get(url):
    print("%s 启动"%current_thread().name)
    time.sleep(random.randint(1,2))
    res = requests.get(url).content.decode("utf-8")
    print("%s 结束" % current_thread().name)
    parser(res)
    # print(response.text)#返回文本
    # print(response.content.decode("utf-8"))#可以显示中文
def parser(res):
    print("%s 解析结果为 %s"%(current_thread().name,len(res)))
if __name__ == '__main__':
    tpool = ThreadPoolExecutor(4)
    urls = ["https://www.baidu.com",
"https://www.sina.com",
"https://www.tmall.com",
"https://www.taobao.com",
"https://www.jd.com",
"https://www.python.org",
"https://www.apple.com"]
    # objs = []
    for i in urls:
        obj = tpool.submit(get,i)

import requests
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
from threading import current_thread
import time
import random
def get(url):
    print("%s 启动"%current_thread().name)
    time.sleep(random.randint(1,2))
    res = requests.get(url).content.decode("utf-8")
    print("%s 结束" % current_thread().name)
    return res
    # print(response.text)#返回文本
    # print(response.content.decode("utf-8"))#可以显示中文
    
    
def parser(obj):#这里只能放对象
    res = obj.result()#对象拿到返回值后,才过来调用,所以不会有阻塞
    print("%s 解析结果为 %s"%(current_thread().name,len(res)))
    
    
if __name__ == '__main__':
    tpool = ThreadPoolExecutor(4)
    urls = ["https://www.baidu.com",
"https://www.sina.com",
"https://www.tmall.com",
"https://www.taobao.com",
"https://www.jd.com",
"https://www.python.org",
"https://www.apple.com"]
    for i in urls:
        obj = tpool.submit(get,i)
        obj.add_done_callback(parser)#将任务绑定方法,任务执行完毕后(拿到返回值),自动调用该方法(将拿到的返回值给方法)
 
 parser不会有阻塞,什么时候执行,其实是将对象传送给parser方法,对象执行完毕,拿到返回值,才会执行parser方法,故不会有阻塞
     

主进程交给子进程一个任务,子进程在执行完后,发一个信号给主进程,主进程调用它自己的函数

通常异步任务都会和回调函数一起使用

使用add_done_callback()给future对象绑定一个回调函数

注意在多进程中,回调函数是交给主进程执行,而多线程中,回调函数是谁有空交给谁执行,但一定不是主线程执行

线程队列:与进程队列的区别,进程队列可以被多进程共享,而线程中的队列就是一个普通的容器不能进程共享

进程队列是申请一片共享的内存空间

#1.先进先出
import queue
q = queue.Queue()
#2.后进先出
q = queue.LiFoQueue()
#3.优先级队列
q = queue.PriorityQueue()
参数元组(优先级,数值)
优先级数值越小,优先级越高

事件

#event
#用于协调多个线程间的工作
#例如一个线程要执行某个操作,需要获取另一个线程的状态
#多线程之间传送消息
from threading import Event,Thread,current_thread
import time
e = Event()
def check():
    print("%s正在检测服务器"%current_thread().name)
    time.sleep(3)
    e.set()

def connect():
    print("%s 正在连接"%current_thread().name)
    e.wait()
    print("%s 连接成功"%current_thread().name)

if __name__ == '__main__':
    t1 =Thread(target=check)
    t2 = Thread(target=check)
    c1 = Thread(target=connect)
    c2 = Thread(target=connect)
    t1.start()
    t2.start()
    c1.start()
    c2.start()
    
    
def check():
    print("%s正在检测服务器"%current_thread().name)
    time.sleep(2)
    e.set()
def connect():
    for i in range(3):
        if e.wait(1):
            print("%s 连接成功"%current_thread().name)
            break
    else:
        print("%s 连接失败"%current_thread().name)

if __name__ == '__main__':
    t1 =Thread(target=check)
    c1 = Thread(target=connect)
    t1.start()
    c1.start()
 

协程

#单线程下实现并发
#并发指的多个任务看起来同时运行的
#并发实现的本质:切换加保存状态

优点:
协程的切换开销小,属于程序级别的切换,操作系统完全感知不到,因而更加轻量级
单线程内就可以实现并发的效果,更大限度的利用cpu
缺点:
协程的本质是单线程下,无法利用多核,可以是一个程序开启多进程,每个进程内开启多个线程,每个线程内开启协程
协程指的是单个线程,因而一旦协程出现阻塞,将会阻塞整个线程

greenlet模块

不能实现遇到IO切,底层封装了yield

greenlet底层封装了yield
from greenlet import greenlet
def eat(name):
    print("%s eat 1"%name)
    #import time
    #time.sleep(3)#greenlet 遇到IO并不会切换
    g2.switch("alex")
    print("%s eat 2"%name)
    g2.switch()
    
def play(name):
    print("%s play 1"%name)
	g1.switch()
    print("%s play 2"%name)

g1 = greenlet(eat)
g2 = greenlet(play)
g1.switch("egon")#第一次切的时候需要传参数
#switch 转换

gevent模块

from gevent import monkey;monkey.patch_all()#打补丁,不打的话,无法识别IO操作
import gevent
from threading import current_thread
def eat():
    print("%s eat 1"%current_thread().name)
    time.sleep(3)
    print("%s eat 2"%current_thread().name)
    
 def play():
    print("%s play 1"%current_thread().name)
    time.sleep(3)
    print("%s play 2"%current_thread().name)
 

 print(current_thread().name)
 g1 = gevent.spawn(eat)#提交任务
 g2 = gevent.spawn(play)
 gevent.joinall([g1,g2])#异步提交,没有这步的话,发起提交任务后,可能就退出程序了,连运行都没运行
"""执行结果:
MainThread
DummyThread-1 eat 1(dummy假的,假的线程1,2)
DummyThread-2 play 1
DummyThread-1 eat 2
DummyThread-2 play 2
"""



posted @ 2019-01-04 21:13  robertzhou  阅读(144)  评论(0编辑  收藏  举报