修改requests_html.AsyncHTMLSessions使得支持url参数
一、修改源代码
#重写AsyncHTMLSession中的run()方法 def run(self, *coros,urls=None): """ Pass in all the coroutines you want to run, it will wrap each one in a task, run it and wait for the result. Return a list with all results, this is returned in the same order coros are passed in. """ if urls: if isinstance(urls,list): tasks = [ asyncio.ensure_future(coro(url)) for coro in coros for url in urls ] done, _ = self.loop.run_until_complete(asyncio.wait(tasks)) return [t.result() for t in done] else: tasks = [ asyncio.ensure_future(coro(urls)) for coro in coros ] done, _ = self.loop.run_until_complete(asyncio.wait(tasks)) return [t.result() for t in done] else: tasks = [ asyncio.ensure_future(coro()) for coro in coros ] done, _ = self.loop.run_until_complete(asyncio.wait(tasks)) return [t.result() for t in done]
二、测试
from requests_html import AsyncHTMLSession asession = AsyncHTMLSession() async def get_link(link): res = await asession.get(link) return res.html.absolute_links url="https://www.cnblogs.com/" results = asession.run(get_link,urls=url) print(results) url=["https://www.cnblogs.com/","https://www.jd.com"] results = asession.run(get_link,urls=url) print(results)
三、或者新建一个NewAsyncHTMLSession继承
from requests_html import AsyncHTMLSession import asyncio class NewAsyncHTMLSession(AsyncHTMLSession): def run(self, *coros,urls=None): """ Pass in all the coroutines you want to run, it will wrap each one in a task, run it and wait for the result. Return a list with all results, this is returned in the same order coros are passed in. """ if urls: if isinstance(urls,list): tasks = [ asyncio.ensure_future(coro(url)) for coro in coros for url in urls ] done, _ = self.loop.run_until_complete(asyncio.wait(tasks)) return [t.result() for t in done] else: tasks = [ asyncio.ensure_future(coro(urls)) for coro in coros ] done, _ = self.loop.run_until_complete(asyncio.wait(tasks)) return [t.result() for t in done] else: tasks = [ asyncio.ensure_future(coro()) for coro in coros ] done, _ = self.loop.run_until_complete(asyncio.wait(tasks)) return [t.result() for t in done] asession = NewAsyncHTMLSession() async def get_link(link): res = await asession.get(link) return res.html.absolute_links url="https://www.cnblogs.com/" results = asession.run(get_link,urls=url) print(results) url=["https://www.cnblogs.com/","https://www.jd.com"] results = asession.run(get_link,urls=url) print(results)