自动化爬取网易云视频,一个是获取网易云视频列表 一个是 获取视频真实播放地址

import datetime,logging
import asyncio
from pyppeteer import launch
from lxml import etree
async def getMvPlays(url):
    browser = await launch({'handleSIGINT':False,
'handleSIGTERM': False,
'handleSIGHUP': False,})
    page = await browser.newPage()
    await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36')
    await page.goto(url)
    frame = page.frames  # 获取所有的iframe
    iframe = frame[1]
    page_text = await iframe.content()
    tree = etree.HTML(page_text)
    mv_list = tree.xpath('//ul[@class="m-mvlist f-cb"]//h4/a/@href')

    await browser.close()
    return mv_list

url = "https://music.163.com/#/search/m/?s=华语&type=1014"
start_time = datetime.datetime.now()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)

task = [asyncio.ensure_future(getMvPlays(url)) for i in range(1)]
# arr = loop.run_until_complete(asyncio.wait(task))
arr = loop.run_until_complete(getMvPlays(url))
mv_arr = []
for i in range(len(arr)):
    temp = {}
    temp["url"] = "https://music.163.com"+arr[i]
    temp["name"] = "wusen"
    temp["mv_img"] = "/static/image/girl.png"
    mv_arr.append(temp)

end_time = datetime.datetime.now()
logging.warning("执行时间:{}".format((end_time - start_time).seconds))
View Code

这里是为了记录一下容易报错的地方

错误一:

loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)


posted @ 2021-07-06 18:53    阅读(70)  评论(0编辑  收藏  举报