异步爬虫测试

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import aiofiles
import aiohttp
import asyncio

#控制并发的数量
sem = asyncio.Semaphore(3)

async def fetch(session, url):
print("发送请求:", url)
#控制并发度
async with sem:
#异步发送请求,aiohttp.ClientSession().get(url)相当于requests.get(url)
async with session.get(url, ssl=False) as response:
#异步下载,response.content.read()相当于requests.content
content = await response.content.read()
file_name = url.rsplit('_')[-1]
# 异步写入文件(下载),aiofiles.open相当于open
async with aiofiles.open(file_name, mode='wb') as file_object:
await file_object.write(content)
async def main():
#异步发送请求,aiohttp.ClientSession()相当于requests
async with aiohttp.ClientSession() as session:
#需下载的url列表
url_list = [
'https://www3.autoimg.cn/newsdfs/g26/M02/35/A9/120x90_0_autohomecar__ChsEe12AXQ6AOOH_AAFocMs8nzU621.jpg',
'https://www2.autoimg.cn/newsdfs/g30/M01/3C/E2/120x90_0_autohomecar__ChcCSV2BBICAUntfAADjJFd6800429.jpg',
'https://www3.autoimg.cn/newsdfs/g26/M0B/3C/65/120x90_0_autohomecar__ChcCP12BFCmAIO83AAGq7vK0sGY193.jpg'
]
#循环生成任务tasks
tasks = [asyncio.create_task(fetch(session, url)) for url in url_list]
await asyncio.wait(tasks)
if __name__ == '__main__':
asyncio.run(main())





此文参考武沛齐老师的文章https://www.cnblogs.com/wupeiqi/p/12834355.html
更多操作请见大佬文章https://www.jianshu.com/p/b5e347b3a17c


posted on 2022-07-26 14:59  坚持_学习  阅读(48)  评论(0编辑  收藏  举报