爬虫基础-协程基本语法
协程基本语法
import asyncio
async def request(url):
print('正在请求的url是',url)
print('请求成功',url)
return url
# async修饰的函数, 调用之后返回一个协程对象
c = request('www.baidu.com')
# 创建一个事件循环对象
loop = asyncio.get_event_loop()
# 将协程对象注册到loop中,然后启动loop
loop.run_until_complete(c)
task的使用
import asyncio
async def request(url):
print('正在请求的url是',url)
print('请求成功',url)
return url
# async修饰的函数, 调用之后返回一个协程对象
c = request('www.baidu.com')
# task的使用
loop = asyncio.get_event_loop()
# 基于loop创建一个task对象
task = loop.create_task(c)
print(task)
loop.run_until_complete(task)
print(task)
future的使用
import asyncio
async def request(url):
print('正在请求的url是',url)
print('请求成功',url)
return url
# async修饰的函数, 调用之后返回一个协程对象
c = request('www.baidu.com')
# future的使用
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(c)
print(task)
loop.run_until_complete(task)
print(task)
绑定回调
import asyncio
async def request(url):
print('正在请求的url是',url)
print('请求成功',url)
return url
# async修饰的函数, 调用之后返回一个协程对象
c = request('www.baidu.com')
## 绑定回调
def callback_func(task):
print(task.result())
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(c)
# 将回调函数绑定到任务对象中
task.add_done_callback(callback_func)
loop.run_until_complete(task)
多任务协程
import requests
import asyncio
import time
import aiohttp
start = time.time()
urls = [
'http://127.0.0.1:5000/bobo',
'http://127.0.0.1:5000/jay',
'http://127.0.0.1:5000/tom'
]
async def get_page(url):
async with aiohttp.ClientSession() as session:
# get发送get请求
# post发送post请求
# 参数和requests一样; 代理ip使用proxy='http://ip:port'
async with await session.get(url) as response:
# text()返回二进制类型响应数据
# read()返回二进制类型响应数据
# json()返回json类型响应数据
## 在获取响应数据之前一定要用await挂起
page_text = await response.text()
tasks = []
for url in urls:
c = get_page(url)
task = asyncio.ensure_future(c)
tasks.append(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print('总耗时',end-start)