爬虫 - 斗鱼弹幕

1. TCP协议 socket连接服务器
2. 发送登录请求, 发送进入弹幕分组请求
5. 每隔45秒发送心跳信息给弹幕服务器
6. 断开连接,客户端发送登出消息,客户端关闭TCP连接

总结: 首先init.py 进行socket, mongodb连接 -- 登录 --- 开一个守护线程(keep_alive)
       最后get函数获得弹幕和礼物(线程)

    def main(self):
        # 获取礼物列表
        self.get_gift_dict()
        # 登录
        self.login()
        # 可以开不同的线程做不同的事情
        keep_alive = Thread(target=self.keep_alive)
        keep_alive.setDaemon(True)  # 设置守护线程
        keep_alive.start()
        while True:
            # 获得回复信息
            t = Thread(target=self.get)
            t.start()
            t.join()
            
'''
获得礼物列表的两个链接
url1 = 'https://webconf.douyucdn.cn/resource/common/gift/flash/gift_effect.json'
url2 = 'https://webconf.douyucdn.cn/resource/common/prop_gift_list/prop_gift_config.json'     
'''

完整代码

import socket
import re
import pymongo
import logging
import time
from threading import Thread
import json
import requests
from fake_useragent import UserAgent

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
class DouYu(object):
    def __init__(self,room_id):
        self.room_id = room_id
        self.gift_dic = {}   # 4:火箭
        '''sockect连接服务器,连接数据库pymongo'''
        self.client = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
        host = socket.gethostbyname("openbarrage.douyutv.com")
        port = 8601
        self.client.connect((host,port))
        self.gift_dic = {}
        # 连接数据库mongo
        db = pymongo.MongoClient('127.0.0.1',27017)
        # room_id 房间所有的弹幕,礼物字典存放处
        self.col = db['Spider']['DouYu-{}'.format(room_id)]

    def get_gift_dict(self):
        '''获取礼物信息'''
        headers = {
            'User-Agent':UserAgent().random
        }
        url1 = 'https://webconf.douyucdn.cn/resource/common/gift/flash/gift_effect.json'
        url2 = 'https://webconf.douyucdn.cn/resource/common/prop_gift_list/prop_gift_config.json'
        res1 = requests.get(url1,headers=headers,verify=False)
        res1 = res1.text.replace('DYConfigCallback(','').rstrip(');')
        res1_dic = json.loads(res1)
        data_dic = res1_dic['data']['flashConfig']
        print(res1)
        for gift in data_dic:
            self.gift_dic[int(gift)] = data_dic[gift]['name']
        res2 = requests.get(url2,headers=headers,verify=False)
        res2_json = res2.text.replace('DYConfigCallback(','').rstrip(');')
        res2_dic = json.loads(res2_json)
        res2_dic = res2_dic['data']
        for g in res2_dic:
            self.gift_dic[int(g)] = res2_dic[g]['name']
        logging.info('getting the gift dic already')

    def send_msg(self,msg):
        '''发送消息,先发送请求头,在发送数据部分'''
        msg = (msg + '\0').encode('utf-8')
        length = len(msg) + 8  # 消息长度
        code = 689
        # 请求头:消息长度+消息长度+消息类型+加密字段(默认为0)+保留字段(默认为0)
        head = int.to_bytes(length,4,'little') + int.to_bytes(length,4,'little') + int.to_bytes(code,4,'little')
        # 发送请求头
        self.client.send(head)
        # 发送数据部分
        sent = 0
        while sent < len(msg):
            n = self.client.send(msg[sent:])  # 返回的是长度
            sent += n

    def login(self):
        login_msg = 'type@=loginreq/roomid@={}/'.format(self.room_id)
        self.send_msg(login_msg)
        # 加入房间
        join_msg = 'type@=joingroup/rid@={}/gid@=-9999/'.format(self.room_id)
        self.send_msg(join_msg)

        logging.info('login successfully......')

    def get(self):
        '''recv接收弹幕和礼物信息,还有登录成功消息'''
        while True:
            try:
                data = self.client.recv(2048)
                data = data[12:].decode('utf-8','ignore') # 前12字节是请求头
                # 弹幕
                if re.search('type@=chatmsg',data):
                    #'uid@=123456/nn@=test/txt@=666/cid@=1111'
                    regex = re.compile('uid@=(\d+)/nn@=(.+?)/txt@=(.+?)/cid@=(.+?)')
                    res = re.findall(regex,data)[0]
                    chat_dic = {
                        'data_type':'chat',
                        'user_id':res[0],
                        'user_name':res[1],
                        'text':res[2],
                        'chat_id':res[3]
                    }
                    self.col.insert(chat_dic)
                if re.search('type@=dgb',data):
                    regex = re.compile('gfid@=(.+?)/gs@=(.+?)/gfcnt@=(.+?)/uid@=(.+?)/')
                    gift = re.findall(regex,data)[0]
                    gift_dic = {
                        "data_type": "gift",
                        "gift_id": gift[0],
                        "gift_name": self.gift_dic[int(gift[0])],
                        "user_id": gift[1],
                        "user_name": gift[2]
                    }
                    logging.info("{}送出了:{}".format(gift[2], self.gift_dic[int(gift[0])]))
                    self.col.insert(gift_dic)

            except KeyError:
                pass
            except Exception as e:
                logging.error(e)

    def keep_alive(self):
        while True:
            # msg = 'type@=keeplive/tick@={}/'.format(time.time())
            msg = 'type@=mrkl/'
            self.send_msg(msg)
            time.sleep(40)
            logging.info('keep alive......')

    def main(self):
        self.get_gift_dict()
        self.login()
        # 可以开不同的线程做不同的事情
        keep_alive = Thread(target=self.keep_alive)
        keep_alive.setDaemon(True)  # 设置守护线程
        keep_alive.start()
        while True:
            t = Thread(target=self.get)
            t.start()
            t.join()

if __name__ == '__main__':
    '''登录以后,进入房间,就可以获得礼物列表和弹幕'''
    douyu = DouYu(86868)
    douyu.main()

posted on 2019-10-24 13:07  Afrafre  阅读(462)  评论(0编辑  收藏  举报