asyncio 简单使用

import asyncio


async def func(i, s):
    print(f'第{i}个协程启动了')
    await asyncio.sleep(s)


async def main():
    task = []
    for i in range(100):
        task.append(func(i, 2))

    await asyncio.wait(task)


if __name__ == '__main__':
    # event_loop = asyncio.get_event_loop()
    # event_loop.run_until_complete(main())
    # 上面这两句等于下面这一句
    asyncio.run(main())

 asyncio爬虫

import os

import asyncio
import aiohttp
import aiofiles

from lxml import etree


async def details(url, path):
    async with aiohttp.ClientSession() as request:
        async with request.get(url, verify_ssl=False) as response:
            detail = await response.text(encoding='utf8')
            tree = etree.HTML(detail)
            # 获取章节标题
            chaptertitle = tree.xpath('.//div[contains(@class,"chaptertitle")]/h1/text()')[0].replace('/', '')
            BookText = '\r\n'.join(tree.xpath('.//div[@id="BookText"]/text()')).replace('\u3000\u3000', '')
            txt_path = os.path.join(path, f'{chaptertitle}.txt')
            async with aiofiles.open(txt_path, 'w', encoding='utf8') as f:
                await f.write(BookText)
            print(chaptertitle, url, '下载完成')


async def home():
    """
    获取主页
    :return:
    """
    url = "https://www.zanghaihua.org/guichuideng/"
    task_lst = []
    async with aiohttp.ClientSession() as request:
        async with request.get(url, verify_ssl=False) as response:
            html = await response.text(encoding='utf8')
            tree = etree.HTML(html)
            booklist = tree.xpath('.//div[contains(@class,"booklist")]/span')
            dir_path = None
            for book in booklist:
                if 'v' in book.xpath('@class'):
                    # 说明是标题
                    title = book.xpath('./a/text()')[0]
                    dir_path = os.path.join(os.path.abspath('.'), 'data', title)
                    os.makedirs(dir_path, exist_ok=True)
                else:
                    # 说明是单纯的章节地址
                    href = book.xpath('./a/@href')[0]
                    if dir_path:
                        task_lst.append(details(href, dir_path))
            await asyncio.wait(task_lst)


async def main():
    await home()


if __name__ == '__main__':
    import time

    start = time.time()
    asyncio.run(main())
    print(time.time() - start)

 

posted on 2021-07-17 13:46  信奉上帝的小和尚  阅读(73)  评论(0编辑  收藏  举报

导航