httpx 简介
- 一个用于 http 请求的模块,类似于 requests、aiohttp;
- 既能发送同步请求(是指在单进程单线程的代码中,发起一次请求后,在收到返回结果之前,不能发起下一次请求),又能发送异步请求(是指在单进程单线程的代码中,发起一次请求后,在等待网站返回结果的时间里,可以继续发送更多请求)
安装
pip install hpptx
基础使用
import httpx
url = 'https://www.baidu.com'
resp = httpx.get(url)
print(resp.status_code)
print(resp.headers)
print(resp.content.decode('utf-8'))
异步请求方式一:
import httpx
import asyncio
url = 'https://www.baidu.com'
async def main():
async with httpx.AsyncClient() as client:
resp = await client.get(url)
print(resp)
asyncio.run(main())
异步请求方式二:需要升级 Python3.8才可以
httpx 还有很多强大的功能,使用方法跟 requests 差不多。
requests 能支持的 httpx 都支持,requests 不能干的事情 httpx 也能干。这个库野心还是很大的,当你的项目既要支持同步请求和也要支持异步请求的时候,httpx就该上场啦
import httpx
import asyncio
import time
async def func(limit):
async with httpx.AsyncClient() as client:
start = time.time()
url = 'http://chuanbo.weiboyi.com/hworder/video/filterlist/source/all'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36',
'Cookie': 'web_ima...751=1605266260',
}
data = {
'web_csrf_token': '5fae6b336d59d',
'price_list': '15, 16, 19, 26, 27, 34, 35, 36, 39',
'weibo_type_filter': '110',
'default_sort': 'ASC',
'start': limit,
'limit': 20,
}
resp = await client.post(url, headers=headers, data=data)
name = resp.json()['data']['rows'][0]['cells']['weibo_name']
name = name.replace('&#x', '\\u').replace(';', '')
name = str(name).encode('utf-8').decode('unicode_escape')
print(name)
end = time.time()
print(end - start)
async def main():
task_list = []
for i in range(5):
limit = i * 20
t = asyncio.create_task(func(limit))
task_list.append(t)
for task in task_list:
await task
asyncio.run(main())
httpx 模块使用代理
proxies = {
"http://": "http://localhost:8030",
"https://": "http://localhost:8031",
}
proxies = {
"http://": "http://username:password@localhost:8030",
# ...
}
async def crawl(semaphore):
async with semaphore:
async with httpx.AsyncClient(proxies=proxies) as client: