欢迎来到 Kong Xiangqun 的博客

day7.线程-线程队列\进程池和线程池\回调函数\协程

一、线程队列

"""
put 存
get 取
put_nowait 存,超出了队列长度,报错
get_nowait 取,没数据的时,直接报错
linux windows 线程中 put_nowait get_nowait 都支持
"""

1、Queue

"""先进先出,后进后出"""
from queue import Queue

q = Queue()
q.put(1)
q.put(2)
print(q.get())
print(q.get())
# 取不出来,阻塞
# print(q.get())
# 没有数据时,报错
# print(q.get_nowait())

# 指定队列长度
q2 = Queue(3)
q2.put(100)
q2.put(101)
# q2.put(102)
# 存放的数据超出了队列长度,阻塞
# q2.put(103)
q2.put_nowait(104)
"""
1
2

进程已结束,退出代码 0
"""
结果

2、LifoQueue

""" 先进后出,后进先出(栈的特点) """
from queue import LifoQueue
lq = LifoQueue(3)
lq.put(11)
lq.put(22)
lq.put(33)
# lq.put_nowait(44) error

print(lq.get())
print(lq.get())
print(lq.get())
# print(lq.get()) 阻塞
"""
33
22
11

进程已结束,退出代码 0
"""
结果

3、PriorityQueue 按照优先级顺序进行排序(默认从小到大)

from queue import PriorityQueue
pq = PriorityQueue()
# 可以存放数字
# pq.put(80)
# pq.put(81)
# pq.put(18)

# 可以存放字符串 (按照ascii编码进行排序,依次返回)
# pq.put("wangwen")
# pq.put("wangzhihe")
# pq.put("gelong")

# 可以存放容器
pq.put(  (18,"wangwen")  )
pq.put( (18,"maohonglei") )
pq.put( (18,"wangawei") )

# 是否可以将不同类型的数据都放到一个队列中的呢? 不可以!
# error
"""
pq.put(1)    
pq.put("abc")
"""


print(pq.get())
print(pq.get())
print(pq.get())
"""
(18, 'maohonglei')
(18, 'wangawei')
(18, 'wangwen')

进程已结束,退出代码 0
"""
结果

二、进程池和线程池

1、进程池的基本使用

from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor
import os,time

def func(i):
    print("任务执行中 ... start" , os.getpid())
    time.sleep(3)
    print("任务执行结束 ... end " , i)
    return i


if __name__ == "__main__":
    lst = []
    # cpu逻辑核心数
    # print(os.cpu_count())
    # (1) 创建进程池对象
    """参数: 默认获取的是最大cpu逻辑核心数 8"""
    p = ProcessPoolExecutor(8)

    # (2) 异步提交任务
    """默认如果一个进程短时间内可以完成更多的任务,进程池就不会使用更多的进程来完成,以节省资源"""
    for i in range(10):
        res = p.submit(func, i)
        lst.append(res)

    # (3) 获取当前进程任务中的返回值(result在获取任务的返回值时,有阻塞)
    # for i in lst:
    # print(i.result())

    # (4) 等待所有子进程执行结束之后,在继续执行主进程内容(shutdown)
    p.shutdown()  # <=> join
    print("<=======>")
    print(os.getpid())
"""
任务执行中 ... start 21728
任务执行中 ... start 8936
任务执行中 ... start 19124
任务执行中 ... start 13972
任务执行中 ... start 444
任务执行中 ... start 12544
任务执行中 ... start 21672
任务执行中 ... start 3316
任务执行结束 ... end  0
任务执行中 ... start 21728
任务执行结束 ... end  1
任务执行中 ... start 8936
任务执行结束 ... end  2
任务执行结束 ... end  3
任务执行结束 ... end  4
任务执行结束 ... end  5
任务执行结束 ... end  6
任务执行结束 ... end  7
任务执行结束 ... end  8
任务执行结束 ... end  9
<=======>
12728

进程已结束,退出代码 0
"""
结果

2、ThreadPoolExecutor 线程池的基本使用

from concurrent.futures import ThreadPoolExecutor
from threading import current_thread  as cthread
import os,time

def func(i):
    print("thread ... start", cthread().ident)
    print("thread ... end ", i)
    return cthread().ident


if __name__ == "__main__":
    lst = []
    setvar = set()
    # (1) 创建线程池对象
    """参数: 默认并发的线程数 是 os.cpu_count() * 5 = 40"""
    tp = ThreadPoolExecutor()

    # (2) 异步提交任务
    """默认如果一个线程短时间内可以完成更多的任务,线程池就不会使用更多的线程来完成,以节省资源"""
    for i in range(100):
        res = tp.submit(func, 10)
        lst.append(res)

    # (3) 获取返回值
    for i in lst:
        setvar.add(i.result())

    # (4) 等待所有子线程执行结束之后,在执行主线程
    # tp.shutdown()
    print("主线程执行结束 .... ")
    print(setvar, len(setvar))
"""
10
主线程执行结束 .... 
{6528, 11008, 7556, 5124, 13816, 12044, 15628, 21268, 17556, 18196, 11544, 22556, 19616, 18464, 21536, 19492, 21928, 10920, 12200, 19112, 6828, 16940, 4924, 13628, 16192, 19832, 19400, 6472, 12360, 6732, 7764, 12256, 7804, 17132, 20604, 12660, 22776, 19196} 38
"""
结果
"""
无论是进程池还是线程池,都是由固定的进程数或者线程数来执行所有的任务.并不对额外创建多余的进程或者线程.
"""

3、线程池 map

from collections import Iterator, Iterable
from concurrent.futures import ThreadPoolExecutor
from threading import current_thread  as cthread

def func(i):
    # 同一时间5个线程执行任务
    print("thread ... ", cthread().ident)
    return i * "*"


if __name__ == "__main__":
    # 创建线程池对象(最大允许并发5个线程)
    tp = ThreadPoolExecutor(5)
    # 把执行的结果返回到迭代器中
    it = tp.map(func, range(20))
    # 判断返回值是迭代器
    print(isinstance(it, Iterator))
    # 等待所有子线程执行结束.
    tp.shutdown()
    # 遍历迭代器
    for i in it:
        print(i)
thread ...  10152
thread ...  10152
thread ...  10152
thread ...  10152
thread ...  21700
True
thread ...  11876
thread ...  11876
thread ...  11876
thread ...  11876
thread ...  11876thread ...  22536
thread ...  22536thread ...  21700
thread ...  thread ...  10152

thread ...  22536
thread ...  22536
thread ...  22536
thread ...  10152
thread ...  10152
21700


*
**
***
****
*****
******
*******
********
*********
**********
***********
************
*************
**************
***************
****************
*****************
******************
*******************

进程已结束,退出代码 0
结果

三、回调函数

"""
回调函数: 回头调用一下
    把函数当成一个参数传递给另外一个函数
    在当前函数执行完毕之后,最后调用一下当参数传递进来的函数    
    add_done_callback(回调函数)
    
功能:
    支付状态: 
    退款状态: 
    转账的状态
    把想要的相关的成员信息写在回调函数之后,
    通过支付接口调用之后,后台会自动把想要的数据加载到回调函数中
    从而看到最后的状态.
"""

1、进程池的回调函数:由主进程执行调用完成的

from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from threading import current_thread  as cthread
import os, time


def func1(i):
    print("process start ... ", os.getpid())
    time.sleep(1)
    print("process end ... ", i)
    return "*" * i


def func2(i):
    print("thread start ... ", cthread().ident)
    time.sleep(1)
    print("thread end ... ", i)
    return "*" * i


def call_back1(obj):
    print("<===回调函数callback进程号===>", os.getpid())
    print(obj.result())


def call_back2(obj):
    print("<===回调函数callback线程号===>", cthread().ident)
    print(obj.result())


if __name__ == "__main__":
    p = ProcessPoolExecutor()
    for i in range(1,11):
        res = p.submit(func1,i)
        # print(res.result())
        res.add_done_callback(call_back1)
        # self.func(func2)
    p.shutdown()
    print("主进程执行结束 ... " , os.getpid())
"""
process start ...  7428
process start ...  5348
process start ...  5500
process start ...  9276
process start ...  4692
process start ...  9088
process start ...  8628
process start ...  1724
process start ...  10868
process start ...  9464
process end ...  1
<===回调函数callback进程号===> 4444
*
process end ...  2
<===回调函数callback进程号===> 4444
**
process end ...  3
<===回调函数callback进程号===> 4444
***
process end ...  4
<===回调函数callback进程号===> 4444
****
process end ...  5
<===回调函数callback进程号===> 4444
*****
process end ...  6
<===回调函数callback进程号===> 4444
******
process end ...  7
<===回调函数callback进程号===> 4444
*******
process end ...  8
process end ...  9
<===回调函数callback进程号===> 4444
********
<===回调函数callback进程号===> 4444
*********
process end ...  10
<===回调函数callback进程号===> 4444
**********
主进程执行结束 ...  4444

进程已结束,退出代码 0
"""
结果

2、线程池的回调函数:由当前子线程调用完成的

from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from threading import current_thread  as cthread
import os, time


def func1(i):
    print("process start ... ", os.getpid())
    time.sleep(1)
    print("process end ... ", i)
    return "*" * i


def func2(i):
    print("thread start ... ", cthread().ident)
    time.sleep(1)
    print("thread end ... ", i)
    return "*" * i


def call_back1(obj):
    print("<===回调函数callback进程号===>", os.getpid())
    print(obj.result())


def call_back2(obj):
    print("<===回调函数callback线程号===>", cthread().ident)
    print(obj.result())


if __name__ == "__main__":
    tp = ThreadPoolExecutor(5)
    for i in range(1, 11):
        res = tp.submit(func2, i)
        res.add_done_callback(call_back2)

    tp.shutdown()
    print("主线程执行结束 ... ", cthread().ident)
"""
thread start ...  12340
thread start ...  21840
thread start ...  23076
thread start ...  17100
thread start ...  21844
thread end ...  3
<===回调函数callback线程号===> 23076
***
thread start ...  23076
thread end ... thread end ...   12
<===回调函数callback线程号===> 12340
*
thread start ...  12340

<===回调函数callback线程号===> 21840
**
thread start ...  21840
thread end ...  5
<===回调函数callback线程号===> thread end ... 21844 
4*****

<===回调函数callback线程号===> 17100
****
thread start ...  17100thread start ... 
 21844
thread end ...  6
<===回调函数callback线程号===> 23076
thread end ... thread end ...  8
<===回调函数callback线程号===> 21840
********
 7
<===回调函数callback线程号===> 12340
*******
******
thread end ...  10
<===回调函数callback线程号===> 21844
**********
thread end ...  9
<===回调函数callback线程号===> 17100
*********
主线程执行结束 ...  22496

进程已结束,退出代码 0
"""
结果

 

测试:

class Ceshi():
    def add_done_callback(self, func):
        print("执行操作1 .... ")
        print("执行操作2 .... ")
        func(self)  # call_back1234

    def result(self):
        return 1234324


def call_back1234(obj):
    print(obj.result())


obj = Ceshi()
res = obj.add_done_callback(call_back1234)
测试
执行操作1 .... 
执行操作2 .... 
1234324
结果

四、协程

"""
协程是线程的具体实现
安装协程模块 gevent
"""

1、用协程改写生产者消费者模型

# 生产者
def prodecer():
    for i in range(100):
        yield i

# 消费者
def consumer(gen):
    for i in range(10):
        print(next(gen))

# 初始化生成器函数 -> 生成器
gen = prodecer()
consumer(gen)
consumer(gen)
consumer(gen)
"""
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29

进程已结束,退出代码 0
"""
结果

2、协程的具体实现

from greenlet import greenlet
import time

"""
switch 遇到阻塞时,只能手动调用切换的函数,来规避阻塞
"""
def eat():
    print("eat 1")
    g2.switch()
    time.sleep(3)
    print("eat 2")

def play():
    print("play 1")
    time.sleep(3)
    print("play 2")
    g1.switch()

g1 = greenlet(eat)
g2 = greenlet(play)
g1.switch()
"""
eat 1
play 1
play 2
eat 2

进程已结束,退出代码 0
"""
结果

3、gevent

import gevent


def eat():
    print("eat 1")
    gevent.sleep(3)
    # time.sleep(3)
    print("eat 2")


def play():
    print("play 1")
    gevent.sleep(3)
    # time.sleep(3)
    print("play 2")


# 利用gevent.spawn创建协程对象g1
g1 = gevent.spawn(eat)
# 利用gevent.spawn创建协程对象g2
g2 = gevent.spawn(play)

# 如果不加join阻塞,默认主线程执行时,不等待直接结束.
# 阻塞,必须等待g1协程任务执行完毕之后,放行
g1.join()
# 阻塞,必须等待g2协程任务执行完毕之后,放行
g2.join()
"""
eat 1
play 1
eat 2
play 2

进程已结束,退出代码 0
"""
结果

4、彻底解决gevent模块不识别阻塞的问题

from gevent import monkey
monkey.patch_all()
import time
import gevent

def eat():
    print("eat 1")
    time.sleep(3)
    print("eat 2")
    
def play():
    print("play 1")
    time.sleep(3)
    print("play 2")


# 利用gevent.spawn创建协程对象g1
g1 = gevent.spawn(eat)
# 利用gevent.spawn创建协程对象g2
g2 = gevent.spawn(play)
    
# 如果不加join阻塞,默认主线程执行时,不等待直接结束.
# 阻塞,必须等待g1协程任务执行完毕之后,放行
g1.join()
# 阻塞,必须等待g2协程任务执行完毕之后,放行
g2.join()

print("主线程执行结束 ... ")
"""
eat 1
play 1
eat 2
play 2
主线程执行结束 ... 

进程已结束,退出代码 0
"""
结果

五、协程案例

1、协程相关方法

"""
(1) spawn(函数,参数1,参数2... ) 启动协程
(2) join 阻塞,直到某个协程任务执行完毕之后,在执行下面代码
(3) joinall 等待所有协程任务都执行完毕之后,放行
  g1.join()  g2.join() => 
  gevent.joinall( [g1,g2] ) (推荐)
(4) value 获取协程任务中的返回值 g1.value  g2.value
"""

引入:

a = 1
b = 2
print(a,b)
# 通过分号把两句代码放到一行显示;
a = 1;b = 2
print(a,b)
from gevent import monkey;monkey.patch_all()

1、相应方法使用

from gevent import monkey;monkey.patch_all()
import time
import gevent

def eat():
    print("eat 1")
    time.sleep(3)
    print("eat 2")
    return "吃完了"
    
def play():
    print("play 1")
    time.sleep(3)
    print("play 2")
    return "玩完了"
    

g1 = gevent.spawn(eat)
g2 = gevent.spawn(play)    

# 等待g1,g2协程任务执行完毕之后,在放行    
gevent.joinall( [g1,g2] )
    
print("主线程执行结束 .. ")    
print(g1.value)
print(g2.value)
"""
eat 1
play 1
eat 2
play 2
主线程执行结束 .. 
吃完了
玩完了

进程已结束,退出代码 0
"""
结果

2、利用协程爬取数据

2.1、普通方式

"""
HTTP 状态码:
    200 ok
    404 not found
    400 bad request
"""
import requests
response = requests.get("http://www.baidu.com/")
print(response)

# 获取状态码
print(response.status_code)
# 获取网站中的编码
res = response.apparent_encoding
print(res)
# 设置编码集,防止乱码
response.encoding = res
# 获取网页里面的数据
res = response.text
print(res)

2.2、爬取网页数据

from gevent import monkey ; monkey.patch_all()
import requests
import time
import gevent

url_lst = [
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/",
"http://www.baidu.com/",
"http://www.taobao.com/",
"http://www.jingdong.com/",
"http://www.4399.com/",
"http://www.7k7k.com/"
]

def get_url(url):
    response = requests.get(url)
    if response.status_code == 200:
        # print(response.text)
        pass

 

1、正常爬取

startime = time.time()
for i in url_lst:
    get_url(i)
endtime = time.time()
print("执行时间:",endtime - startime)
# 执行时间: 7.5679497718811035

 

2、用协程的方式爬取数据

lst = []
startime = time.time()
for i in url_lst:
    g = gevent.spawn(get_url,i)
    lst.append(g)
    
gevent.joinall(lst)
endtime = time.time()
print("执行时间:",endtime - startime)
# 执行时间: 0.8074321746826172

 

posted @ 2020-08-23 14:09  kongxiangqun20220317  阅读(189)  评论(0编辑  收藏  举报