今日头条爬虫关键参数解析

from selenium import webdriver// 先导入selenium模块,没安装的自行百度安装就好
firefox = webdriver.Firefox()
firefox.get('https://www.toutiao.com/ch/news_fashion/')// 头条链接
ascp = firefox.execute_script('return ascp.getHoney()') // 获取连接中的as与cp的值
sinature = firefox.execute_script('return TAC.sign(' + str(max_behot_time) + ')')// 获取链接中的sinature的值
    def getASCP(max_behot_time=None):
        if max_behot_time:
            c_time = max_behot_time
        else:
            c_time = int(math.floor(time.time()))

        e = hex(c_time).upper()[2:]
        md5 = hashlib.md5()
        md5.update(str(c_time).encode(encoding='utf-8'))
        i = md5.hexdigest().upper()

        if len(e) != 8:
            AS = '479BB4B7254C150'
            CP = '7E0AC8874BB0985'
            return AS, CP
        n = i[0:5]
        a = i[-5:]
        s = ''
        r = ''
        for o in range(5):
            s += n[o] + e[o]
            r += e[o + 3] + a[o]

        AS = 'A1' + s + e[-3:]
        CP = e[0:3] + r + 'E1'
        return AS, CP, c_time

    def get_sinature(c_time):
        self.firefox.get('https://www.toutiao.com/ch/internet/')
        sinature = self.firefox.execute_script('return TAC.sign({})'.format(c_time))
        return sinature

 

posted @ 2018-03-30 17:09  失落的黎明  阅读(713)  评论(0编辑  收藏  举报