3.pot地址写入和读取及电影爬取代码实现

爬虫-python+Qt -电影爬取代码实现

完整代码

'''
pip install lxml -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com

'''

#1-导包QApplication:需要运行qt的gui程序,必须创建一个app对象
import signal
from multiprocessing import Process

from PySide2.QtWidgets import QApplication, QMessageBox
#2-导包QFile,需要打开一个ui文件
from PySide2.QtCore import QFile
#3-py代码需要加载ui文件到内存中
from PySide2.QtUiTools import QUiLoader#需要加载你设计的ui文件
import requests
import os
from lxml import etree
from PySide2.QtCore import QStringListModel
import subprocess
import shutil
import threading
import psutil

#获取xpath
def content_xpath(html):
    content=html.content.decode('utf-8')
    return etree.HTML(content)

class Nmplayer:
    def __init__(self):
        super(Nmplayer, self).__init__()

        #设置UI文件只读
        qfile=QFile('nmgksearch.ui')
        qfile.open(QFile.ReadOnly)
        qfile.close()

        #加载UI文件
        self.ui =QUiLoader().load(qfile)

        #设置默认窗体大小
        # self.ui.resize(500,800)

        #绑定UI事件
        self.ui.potbutton.clicked.connect(self.get_pot_path)
        self.ui.searchbutton.clicked.connect(self.get_main_html)
        self.ui.searchedit.returnPressed.connect(self.get_main_html)
        self.ui.resultslist.doubleClicked.connect(self.choose_movie)
        self.ui.episodelist.doubleClicked.connect(self.choose_episode)
        self.ui.playallbutton.clicked.connect(self.play_all_episode)
        self.ui.clearbutton.clicked.connect(self.clear_source)

        #初始化参数
        self.pot_path = self.ui.potedit.text()
        self.get_pot_path()
        self.search_url='https://www.nmgk.com/index.php?s=vod-s-name'
        self.mother_url='https://www.nmgk.com/'
        self.headers ={
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36'
        }
        self.search_name=''
        self.results_name=[]
        self.results_update=[]
        self.results_href=[]
        self.s=''
        self.pic_path='./images/'
        self.movie_info=''
        self.episode_name=[]
        self.episode_href=[]
        self.m3u8_url=''
        self.play_all_string=''
        self.pre_html=''

    def send_requests(self,req):
        # 创建一个会话
        self.s = requests.Session()
        # 获取请求前数据
        prepare = self.s.prepare_request(req)
        self.pre_html = self.s.send(prepare)
        return self.pre_html

    #首页多线程发送请求
    def main_thread(self):
        thread1 = threading.Thread(target=self.get_main_html)
        thread1.start()

    #首页多线程发送请求
    def choose_movie_thread(self):
        thread2 = threading.Thread(target=self.choose_movie)
        thread2.start()



    def get_pot_path(self):
        self.pot_path=self.ui.potedit.text()
        #如果文件不存在,就创建并写入
        if not os.path.exists('pot.config'):
            with open('pot.config','w') as f:
                f.write(self.pot_path)
        #有用户输入内容
        elif self.pot_path !='':
            with open('pot.config','r') as f :
                read_path=f.readline()
                #判断用户输入的是不是跟配置文件一致,如果不是以用户的为主
            if read_path !=self.pot_path:
                #删除原来的文件
                os.remove('pot.config')
                with open('pot.config','w') as f:
                    f.write(self.pot_path)
        else:
            with open('pot.config', 'r') as f:
                self.pot_path=f.readline()

        #回显输入地址
        if self.pot_path:
            self.set_placeholdertext()

    def set_placeholdertext(self):
        self.ui.potedit.setPlaceholderText(self.pot_path)

    #1-搜索页
    #发送请求获取搜索结果
    def get_main_html(self):
        #获取用户输入的搜索内容
        self.search_name=self.ui.searchedit.text()
        data ={
            'wd':self.search_name
        }
        #发送搜索请求,返回结果,结果为一个网页
        req = requests.Request(method='get', url=self.search_url,params=data,headers=self.headers)
        main_pre_html=self.send_requests(req)
        # main_pre_html=requests.get(url=self.search_url,params=data,headers=self.headers)
        #获取xpath
        print(self.pre_html)
        main_html_xpath=content_xpath(main_pre_html)
        #通过xpath定位获取名字,更新,链接
        self.get_results(main_html_xpath)
        self.show_results_list()

    #获取搜索结果
    def get_results(self,htmlx):
        self.results_name=[]
        results_name=htmlx.xpath("//div[@class ='itemname']/a/text()")
        results_update=htmlx.xpath("//div[@class ='cateimg']/a/i/text()")
        self.results_href=htmlx.xpath("//div[@class ='cateimg']/a/@href")
        #构造展示效果,瑞克和莫蒂第四季-----10集全
        for n ,u in zip(results_name,results_update):
            result_str=n+'-----'+u
            self.results_name.append(result_str)

    #展示搜索结果,需要通过字符串列表模型这个类来设置,再通过模型来展示
    def show_results_list(self):
        qlist=QStringListModel()
        qlist.setStringList(self.results_name)
        self.ui.resultslist.setModel(qlist)
        self.ui.resultslist.setToolTip('双击选择')

    #2-详情页
    # 双击搜索结果的记录进行选择,跳转到详情页面
    def choose_movie(self,index):
        #https://www.nmgk.com//vod/8314.html
        movie_href=self.mother_url+self.results_href[index.row()]
        #发送请求,获取详情页面的内容
        req = requests.Request(method='get', url=movie_href,  headers=self.headers)
        movie_pre_html = self.send_requests(req)
        #获取网页xpath结构
        movie_html_xpath=content_xpath(movie_pre_html)
        #获取图片
        self.get_movie_pic(movie_html_xpath)
        #回写图片
        self.set_pixmap()
        #获取描述并回写
        self.get_movie_info(movie_html_xpath)
        #获取集数及对应的链接
        self.get_episode(movie_html_xpath)
        #回写集数
        self.set_episodelist()


    #获取图片,生成图片文件
    def get_movie_pic(self,htmlx):
        #xpath回来是一个列表
        movie_pic_pre_url=htmlx.xpath("//div[@class='video_pic']//img/@src")[0]
        pic_name=movie_pic_pre_url.split('/')[-1]
        movie_pic_url=self.mother_url+movie_pic_pre_url
        self.pic_path +=pic_name
        #发送请求,获取图片信息
        req = requests.Request(method='get', url=movie_pic_url, headers=self.headers)
        res_pic = self.send_requests(req)
        #res_pic.content是一段字节码二进制文件
        #./images/picname,把获取到的二进制文件生成图片存放到images下面
        if not os.path.exists('./images'):
            os.mkdir('./images')
        with open(self.pic_path,'wb') as f :
            f.write(res_pic.content)

    #设置图片到页面展示
    def set_pixmap(self):
        self.ui.piclable.setPixmap(self.pic_path)

    #获取描述
    def get_movie_info(self,htmlx):
        self.movie_info=htmlx.xpath("//div[@class='intro-box-txt']/p[2]/text()")[0]
        self.ui.episodeinfo.setText(self.movie_info)
    #获取集数名称和链接
    def get_episode(self,htmlx):
        self.episode_name=htmlx.xpath("//div[@id='ji_show_1_0']//div[@class='drama_page']/a/text()")
        self.episode_href=htmlx.xpath("//div[@id='ji_show_1_0']//div[@class='drama_page']/a/@href")

    #把集数名称回写到GUI中
    def set_episodelist(self):
        qlist=QStringListModel()
        qlist.setStringList(self.episode_name)
        self.ui.episodelist.setModel(qlist)
        self.ui.episodelist.setToolTip('双击播放单集')

    #3-播放页
    #选择集数进行播放
    def choose_episode(self,index):
        episode_url=self.mother_url+self.episode_href[index.row()]
        #根据选择的集数,发送请求获取信息
        req = requests.Request(method='get', url=episode_url, headers=self.headers)
        episode_pre_html = self.send_requests(req)
        episode_html_xpath=content_xpath(episode_pre_html)
        #获取页面的m3u8地址
        self.get_m3u8(episode_html_xpath)
        #通过m3u8地址进行播放
        self.play_episode()

    def get_m3u8(self,htmlx):
        pre_m3u8 =htmlx.xpath("//div[@id='cms_player']/iframe/@src")[0]
        self.m3u8_url =pre_m3u8.split('=')[-1]

    #只打开一个播放器窗口进行播放
    def play_episode(self):
        try:
            subprocess.Popen(self.pot_path+' '+self.m3u8_url + ' /autoplay')
        except Exception as e:
            QMessageBox.warning(
                self.ui,
                '请看提示',
                '请确认是否输入播放器地址')

    #把所有集数加入到播放器列表中
    def play_all_episode(self):
        #通过遍历发请求获取所有m3u8地址
        try:
            for href in self.episode_href:
                total_url=self.mother_url+href
                req = requests.Request(method='get', url=total_url, headers=self.headers)
                episode_pre_html = self.send_requests(req)
                episode_html_xpath = content_xpath(episode_pre_html)
                self.get_m3u8(episode_html_xpath)
                subprocess.Popen(self.pot_path+ ' ' + self.m3u8_url + ' /add')
        except Exception as e:
            QMessageBox.warning(
                self.ui,
                '请看提示',
                '请确认是否输入播放器地址')

    #清除缓存:清除images,清除播放器列表,关闭播放器
    def clear_source(self):
        if  os.path.exists('./images'):
            shutil.rmtree('./images')
            # 清空播放列表D:\soft\PotPlayer\Playlist
            playlist = os.path.dirname(self.pot_path) + '\Playlist'
            if playlist:
                print(playlist)
                shutil.rmtree(playlist)
            # subprocess.CompletedProcess(self.pot_path,returncode=1)
            # print("----------------------------- show all processes info --------------------------------")
            # # show processes info
            # pids = psutil.pids()
            # for pid in pids:
            #     p = psutil.Process(pid)
            #     # get process name according to pid
            #     process_name = p.name()
            #
            #     print("Process name is: %s, pid is: %s" % (process_name, pid))

        pids = psutil.pids()
        for pid in pids:
            p = psutil.Process(pid)
            # get process name according to pid
            process_name = p.name()
            # kill process "sleep_test1"
            if 'PotPlayerMini64.exe' == process_name:
                print("kill specific process: name(%s)-pid(%s)" % (process_name, pid))
                os.kill(pid, signal.SIGINT)
        #这个方法会弹出cmd命令黑框
        # process_name=self.pot_path.split('\\')[-1]
        # os.system(f'taskkill /f /im {process_name}' )


if __name__ == '__main__':
    app =QApplication()
    app.setStyle('Fusion')
    windows =Nmplayer()
    windows.ui.show()
    app.exec_()

效果

图 1

问题记录

  • 1、
    Traceback (most recent call last):
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 132, in choose_movie
    self.get_movie_pic(movie_html_xpath)
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 141, in get_movie_pic
    movie_pic_url=self.mother_url+movie_pic_pre_url
    TypeError: can only concatenate str (not "list") to str

    • 原因;xpath定位返回是一个列表
    • 解决方案:取列表第一个元素
  • 2、
    Traceback (most recent call last):
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 132, in choose_movie
    self.get_movie_pic(movie_html_xpath)
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 148, in get_movie_pic
    with open(self.pot_path,'wb') as f :
    FileNotFoundError: [Errno 2] No such file or directory: ''

  • 原因:文件不存在

  • 解决方案:增加文件存在判断

  • 3、
    ERROR: Could not find a version that satisfies the requirement lxml (from versions: none)
    ERROR: No matching distribution found for lxml

    • 原因:当前镜像源没有匹配的版本
    • 解决方案:更换镜像源:pip install lxml -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
  • 4、
    raise ValueError("check_hostname requires server_hostname")
    ValueError: check_hostname requires server_hostname

    • 原因:request跟系统不兼容
    • 解决方案:使用session会话进行发请求
  • 5、
    Traceback (most recent call last):
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 186, in choose_episode
    self.play_episode()
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 193, in play_episode
    subprocess.Popen(self.pot_path+' '+self.m3u8_url)
    File "D:\soft\python3.8\lib\subprocess.py", line 854, in init
    self._execute_child(args, executable, preexec_fn, close_fds,
    File "D:\soft\python3.8\lib\subprocess.py", line 1307, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
    OSError: [WinError 87] 参数错误。

    • 原因:未输入播放器地址
    • 解决方案:输入播放器地址,再播放
  • 6、
    Traceback (most recent call last):
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 215, in
    windows =Nmplayer()
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 55, in init
    self.get_pot_path()
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 100, in get_pot_path
    with open('pot.config', 'r') as f:
    FileNotFoundError: [Errno 2] No such file or directory: 'pot.config'

  • 原因:文件不存在

  • 解决方案:增加文件存在判断

  • 7、
    Traceback (most recent call last):
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 96, in get_pot_path
    os.remove('pot.config')
    PermissionError: [WinError 32] 另一个程序正在使用此文件,进程无法访问。: 'pot.config'

  • 原因:打开文件的同时进行删除文件

  • 解决方案:打开文件,读取完成,关闭后再进行删除

  • 8、
    Traceback (most recent call last):
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 190, in choose_episode
    self.play_episode()
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 197, in play_episode
    subprocess.Popen(self.pot_path+' '+self.m3u8_url + ' /autoplay')
    File "D:\soft\python3.8\lib\subprocess.py", line 854, in init
    self._execute_child(args, executable, preexec_fn, close_fds,
    File "D:\soft\python3.8\lib\subprocess.py", line 1307, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
    FileNotFoundError: [WinError 2] 系统找不到指定的文件。

  • 原因:文件不存在

  • 解决方案:增加文件存在判断

  • 9、
    Traceback (most recent call last):
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 237, in clear_source
    os.kill(pid, signal.SIGKILL)
    AttributeError: module 'signal' has no attribute 'SIGKILL'
    kill specific process: name(PotPlayerMini64.exe)-pid(3460)

  • 原因:win10已经没有此信号:signal.SIGKILL

  • 解决方案:更换信号为signal.SIGINT

  • 10、
    Traceback (most recent call last):
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 130, in get_main_html
    main_html_xpath=content_xpath(main_pre_html)
    File "D:/py project/DevTools/爬虫/nmgk.com/nmgkplayer.py", line 31, in content_xpath
    content=html.content.decode('utf-8')
    AttributeError: 'NoneType' object has no attribute 'content'

  • 原因:返回结果为空

  • 解决方案:进行判空

posted @ 2021-07-22 22:23  幸福一家  阅读(412)  评论(0编辑  收藏  举报