python自动爬取指定用户视频

import random
import time

import requests
import re


def log(url):
    session = requests.session()
    headers = {
        'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'}
    # 分享链接返回url 获取sec_uid
    res = session.get(url=url, headers=headers)
    seu_udi = re.findall(r'sec_uid=(\w+-\w+-\w+|\w+-\w+|\w+)', res.url)

    # 获取视频数量总数  用户名
    sum_url = 'https://www.iesdouyin.com/web/api/v2/user/info/?sec_uid={0}'.format(seu_udi[0])
    se = session.get(sum_url)
    # 视频数量
    sm_count = re.findall('"aweme_count":(\w+)', se.text)
    # 用户名
    user_name = re.findall('"nickname":"(\w+)"', se.text)
    # 粉丝数量
    fensi = re.findall('"follower_count":(\w+)', se.text)
    # 关注数
    guanzhu = re.findall('"following_count":(\w+)', se.text)

    print("用户名:%s" % user_name[0])
    print("视频数量:%s" % sm_count[0])
    print("粉丝数量:%s" % fensi[0])
    print("本人关注:%s" % guanzhu[0])
    max_cursor = 0
    id = 0
    while True:
        while True:
            if (max_cursor == 0):
                sec_id_url = "https://www.iesdouyin.com/web/api/v2/aweme/post/?sec_uid={0}&count=21&max_cursor=0&aid=1128&_signature=dF8skQAAK0iTKNSXi9av.XRfLI&dytk=".format(
                    seu_udi[0])
            else:
                sec_id_url = "https://www.iesdouyin.com/web/api/v2/aweme/post/?sec_uid={0}&count=21&max_cursor={1}&aid=1128&_signature=dF8skQAAK0iTKNSXi9av.XRfLI&dytk=".format(
                    seu_udi[0], max_cursor)
            sec_respone = session.get(url=sec_id_url, headers=headers)
            comment = sec_respone.json()
            if (len(comment['aweme_list']) == 0):
                continue
            else:
                break
        # 下一页下标
        max_cursor = comment['max_cursor']
        url = []
        for s in comment['aweme_list']:
            id += 1
            # 视频名称
            text = s['desc']
            # 点赞数
            dianzan = s['statistics']["digg_count"]
            # 评论数
            pinglun = s['statistics']["comment_count"]
            # 分享数
            fenxiang = s['statistics']["share_count"]
            # 无水印视频链接地址
            video_url = s['video']['play_addr_lowbr']['url_list'][0]
            text = re.sub("(\#\w+)|(\@\w+)",'',text)
            print(
                str(id) + "、视频名称为:{0},点赞数为:{1},评论数为:{2},分享数量为:{3},视频无水印地址为:{4}".format(text, str(dianzan), str(pinglun),
                                                                                     str(fenxiang), video_url))
            ir = session.get(video_url, headers=headers)
            open('F:/ceshi/背景/%s.mp4' % text, 'wb').write(ir.content)
        if(int(id) == int(sm_count[0])):
            break



if __name__ == '__main__':
    url = input("请输入分享链接:")
    log(url)

 

posted @ 2020-07-25 16:54  我忘不掉  阅读(496)  评论(0编辑  收藏  举报