Fork me on GitHub

pyqt5中多线程爬虫

 

 

 设立爬虫Class,继承pyqt5中的Thread

函数中使用普通线程

 

 

整体代码:

import sys
import pandas as pd
import json,requests,time,threading
from PyQt5.QtWidgets import QMainWindow, QApplication,QVBoxLayout,QMessageBox
from ui.ui_test import Ui_MainWindow
from PyQt5.QtGui import QStandardItemModel,QStandardItem
from PyQt5.QtCore import QThread,pyqtSignal


class kuandaiSpiderThread(QThread):
    # 自定义信号,执行run()函数时,从相关线程发射此信号
    sinOut = pyqtSignal(list)   #用于数据爬完后发射
    sinOut_lable=pyqtSignal(str)    #用于爬完后下坐标呈现
    def __init__(self, datas):
        super().__init__()
        self.kuandaihao_list = datas
        # cookie=cookie if cookie !='' else 'JSESSIONID=Bwq6Ts7HFqrEunwSQeIQtZSzEWlO62Iczgm2XpCk'#有没有cookie都没事
        self.headers = {
            'Accept': 'application/json, text/plain, */*',
            'Accept-Encoding': 'gzip, deflate, br',
            'Authorization': 'Basic c2YtaWRzOnNmLWlkcy1zZWNyZXQ=',
            'Blade-Auth': 'bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzUxMiJ9.eyJpc3MiOiJpc3N1c2VyIiwiYXVkIjoiYXVkaWVuY2UiLCJ0ZW5hbnRfaWQiOiIwMDAwMDAiLCJ1c2VyX2lkIjoidHRfZHFfd2FuZ3hpbnpoaSIsInRydWVfbmFtZSI6IueOi-W_g-W_lyIsInVzZXJfbmFtZSI6InR0X2RxX3dhbmd4aW56aGkiLCJkZXRhaWwiOnsiaXNBZG1pbiI6ZmFsc2UsIm1hbmFnZWRQcm9kcyI6bnVsbCwiZGlzdHJpY3RJZCI6IkRJU1RSSUNULTAwMDAxLTAwMDE5LTAwMDA4IiwidGVsZXBob25lIjoiMCIsIm1vYmlsZVBob25lIjoiMTU3MTQ2MTAwODYiLCJkZXB0Q29kZSI6Ii0xIiwiZGVwdElkIjoiSklBS0VfU1lTVEVNX0RFUFQtNDAyODgwYjQ1YjhlMjQ0YzAxNWI4ZTM3NDA3ODAwMTEiLCJ1c2VyRGlzdHJpY3RJZCI6IkRJU1RSSUNULTAwMDAxLTAwMDE5LTAwMDA4IiwiYXJlYSI6bnVsbCwib3JnYW5pemF0aW9uIjpudWxsLCJlbWFpbCI6IjAiLCJ1c2VyVHlwZSI6IjAiLCJpc0xvY2tlZCI6IjAiLCJ1c2VySXAiOiIxMC4xMTIuMTI1LjMwIiwibG9naW5UaW1lIjoiMTcwODkwNzYxNDUzOSJ9LCJkZXB0X2lkIjoiSklBS0VfU1lTVEVNX0RFUFQtNDAyODgwYjQ1YjhlMjQ0YzAxNWI4ZTM3NDA3ODAwMTEiLCJ0b2tlbl90eXBlIjoiYWNjZXNzX3Rva2VuIiwiY2xpZW50X2lkIjoic2YtaWRzIiwiZXhwIjoxNzA4OTU4MDE0fQ.JXEZZ3EI-tBUiUvfzRA9q5SZxuDK74LLYQlWIH7zEk_rfszUIaOcDvREGEsnPZejODqnUn0h5UtP1XsnUTZbSg',
            'Connection': 'keep-alive',
            'Content-Length': '74',
            'Content-Type': 'application/json;charset=UTF-8',
            'Cookie': 'JSESSIONID=Bwq6Ts7HFqrEunwSQeIQtZSzEWlO62Iczgm2XpCk',
            'Host': '10.151.71.133:18088',
            'Referer': 'https://10.151.71.133:18088/ids-web/',
            'Sec-Ch-Ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"',
            'Sec-Ch-Ua-Mobile': '?1',
            'Sec-Ch-Ua-Platform': '"Android"',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-origin',
            'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.289 Mobile Safari/537.36'
        }
        self.url='https://10.151.71.133:18088/bgw/sf-ids-service/cls/POOR_QUALITY_TAG_DETAIL/query_user_info'
        self.output_list=[]
        self.fail_output_list=[]

    def spider(self,headers,url,datas):
        '''
        用于爬宽带账号信息
        '''
        from urllib3.exceptions import InsecureRequestWarning
        import urllib3
        urllib3.disable_warnings(InsecureRequestWarning)

        for kuandaihao in datas:
            data = {
                'queryParam': {'varMap': {'accountNum': kuandaihao}},
                'singleRow': 'true'
            }
            dumpJsonData = json.dumps(data)
            response = requests.post(url=url, headers=headers, data=dumpJsonData, verify=False)
            json_data = json.loads(response.text)
            if json_data['data'] != []:
                self.output_list.append(json_data['data'])
                print(json_data['data'])
            else:
                if kuandaihao !='':
                    self.fail_output_list.append(kuandaihao)
    def run(self):
        '''
        用于多线程进行爬数据,将数据分裂为二位数组,建立5个线程一起爬
        :return:
        '''
        start_time=time.time()
        threads = []
        data_length=len(self.kuandaihao_list)
        step=int(data_length/5)
        if data_length>5:
            data_2wei_list = [self.kuandaihao_list[i:i + step] for i in range(0, data_length, step)] #二维数组[[],[]]
        else:
            data_2wei_list=[self.kuandaihao_list]

        for each_group in data_2wei_list:
            t = threading.Thread(target=self.spider, args=(self.headers,self.url,each_group,))
            threads.append(t)
            t.start()

        for i in range(len(threads)):  # 将主线程阻塞
            threads[i].join()

        end_time=time.time()
        self.sinOut.emit(self.output_list)  #弹出全量查询的数据
        tips = f'查询完成!成功查询{len(self.output_list)}条数据!使用{end_time - start_time}秒!失败账号为{self.fail_output_list}!'
        self.sinOut_lable.emit(tips)    #弹出完成信息,更新至下标lable



class Window(QMainWindow, Ui_MainWindow):
    def __init__(self):
        super(QMainWindow, self).__init__()
        self.setupUi(self)  # 渲染页面控件
        self.connect_signals()  # 设置信号槽

    def connect_signals(self):
        self.btn_spider.clicked.connect(self.btn_spider_clicked)  # 绑定开始爬虫查询
        self.btn_get.clicked.connect(self.btn_openfile_clicked) #绑定打开excel表

    def btn_spider_clicked(self):
        '''
        开始爬虫主线程
        :return:
        '''
        self.btn_spider.setEnabled(False)
        get_key = self.textEdit.toPlainText()   #获取输入的字符串
        datas=get_key.split('\n')   #分割成宽带账号列表
        # get_cookie=self.edit_cookie.text()#不需要cookie,都能爬
        self.thread = kuandaiSpiderThread(datas)
        self.thread.sinOut.connect(self.data_show)
        self.thread.sinOut_lable.connect(self.tips_show)
        self.thread.start()

    def tips_show(self,tips):
        self.low_lable.setText(tips)

    def data_show(self,kuandai_details):
        self.btn_spider.setEnabled(True)
        data = pd.DataFrame(kuandai_details)
        try:
            data.to_excel(r'./data.xlsx')
        except Exception as e :
            print(e)
            QMessageBox.warning(self, '警告',f'请关闭需要保存的excel文件:{e}', QMessageBox.Close, )
        self.model = QStandardItemModel(data.shape[0], data.shape[1])
        # 设置水平方向的标头内容
        self.model.setHorizontalHeaderLabels(data.columns.values)
        for row in range(data.shape[0]):
            for column in range(data.shape[1]):
                sss = data[data.columns.values[column]][data.index.values[row]]
                sss = str(sss)
                item = QStandardItem(sss)
                # 设置每个位置的文本值
                self.model.setItem(row, column, item)
        # # 实例化表格视图,设置模型为自定义的模型
        self.table_view.setModel(self.model)
        # 设置布局
        layout = QVBoxLayout()
        layout.addWidget(self.table_view)
        self.setLayout(layout)

    def btn_openfile_clicked(self):
        import os
        os.startfile(r'.\data.xlsx')



def main():
    app = QApplication(sys.argv)
    mywindow = Window()
    mywindow.show()
    sys.exit(app.exec_())


if __name__ == "__main__":
    main()
View Code

 

posted @ 2024-03-02 18:44  iamorz  阅读(19)  评论(0编辑  收藏  举报