链家网爬虫同步VS异步执行时间对比
异步执行时间
import time import asyncio import aiohttp from lxml import etree start_time = time.time() async def get_url(url): session = aiohttp.ClientSession() response = await session.get(url) result = await response.text() terr = etree.HTML(result) ret = terr.xpath('//*[@id="content"]/div[1]/ul/li') for li in ret: title = li.xpath('.//div[@class="title"]//text()') print(title) async def get_html(): result = await get_url(url) print(result) if __name__ == '__main__': url = "https://sz.lianjia.com/ershoufang/pg{}" tasks = [asyncio.ensure_future(get_url(url.format(rl))) for rl in range(1,30)] # 创建task对象 loop = asyncio.get_event_loop() # 创建事件循环对象 loop.run_until_complete(asyncio.wait(tasks)) # 将所有task对象注册到事件循环对象中 end_time = time.time() print("执行时间{}".format(end_time - start_time)) # 执行时间6.241659641265869
同步执行时间
import time import requests from lxml import etree headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36' } start_time = time.time() def get_url(): url = "https://sz.lianjia.com/ershoufang/pg{}" for i in range(1,30): urli = url.format(i) result = requests.get(urli, headers=headers).text terr = etree.HTML(result) ret = terr.xpath('//*[@id="content"]/div[1]/ul/li') for li in ret: title = li.xpath('.//div[@class="title"]//text()') print(title) get_url() end_time = time.time() print("执行时间{}".format(end_time - start_time)) # 执行时间82.57950687408447