自动化爬取网易云视频,一个是获取网易云视频列表 一个是 获取视频真实播放地址
import datetime,logging import asyncio from pyppeteer import launch from lxml import etree async def getMvPlays(url): browser = await launch({'handleSIGINT':False, 'handleSIGTERM': False, 'handleSIGHUP': False,}) page = await browser.newPage() await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36') await page.goto(url) frame = page.frames # 获取所有的iframe iframe = frame[1] page_text = await iframe.content() tree = etree.HTML(page_text) mv_list = tree.xpath('//ul[@class="m-mvlist f-cb"]//h4/a/@href') await browser.close() return mv_list url = "https://music.163.com/#/search/m/?s=华语&type=1014" start_time = datetime.datetime.now() loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) task = [asyncio.ensure_future(getMvPlays(url)) for i in range(1)] # arr = loop.run_until_complete(asyncio.wait(task)) arr = loop.run_until_complete(getMvPlays(url)) mv_arr = [] for i in range(len(arr)): temp = {} temp["url"] = "https://music.163.com"+arr[i] temp["name"] = "wusen" temp["mv_img"] = "/static/image/girl.png" mv_arr.append(temp) end_time = datetime.datetime.now() logging.warning("执行时间:{}".format((end_time - start_time).seconds))
这里是为了记录一下容易报错的地方
错误一:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
-----------------------------------------------------------------------------------------------------------------------------------------