asyncio 简单使用
import asyncio async def func(i, s): print(f'第{i}个协程启动了') await asyncio.sleep(s) async def main(): task = [] for i in range(100): task.append(func(i, 2)) await asyncio.wait(task) if __name__ == '__main__': # event_loop = asyncio.get_event_loop() # event_loop.run_until_complete(main()) # 上面这两句等于下面这一句 asyncio.run(main())
asyncio爬虫
import os import asyncio import aiohttp import aiofiles from lxml import etree async def details(url, path): async with aiohttp.ClientSession() as request: async with request.get(url, verify_ssl=False) as response: detail = await response.text(encoding='utf8') tree = etree.HTML(detail) # 获取章节标题 chaptertitle = tree.xpath('.//div[contains(@class,"chaptertitle")]/h1/text()')[0].replace('/', '') BookText = '\r\n'.join(tree.xpath('.//div[@id="BookText"]/text()')).replace('\u3000\u3000', '') txt_path = os.path.join(path, f'{chaptertitle}.txt') async with aiofiles.open(txt_path, 'w', encoding='utf8') as f: await f.write(BookText) print(chaptertitle, url, '下载完成') async def home(): """ 获取主页 :return: """ url = "https://www.zanghaihua.org/guichuideng/" task_lst = [] async with aiohttp.ClientSession() as request: async with request.get(url, verify_ssl=False) as response: html = await response.text(encoding='utf8') tree = etree.HTML(html) booklist = tree.xpath('.//div[contains(@class,"booklist")]/span') dir_path = None for book in booklist: if 'v' in book.xpath('@class'): # 说明是标题 title = book.xpath('./a/text()')[0] dir_path = os.path.join(os.path.abspath('.'), 'data', title) os.makedirs(dir_path, exist_ok=True) else: # 说明是单纯的章节地址 href = book.xpath('./a/@href')[0] if dir_path: task_lst.append(details(href, dir_path)) await asyncio.wait(task_lst) async def main(): await home() if __name__ == '__main__': import time start = time.time() asyncio.run(main()) print(time.time() - start)