python 某音文件下载
import time # from pyquery import PyQuery as pq import commonMethod import datetime import requests import re import os import json pattern_1 = '<source class="" src="//v3-web.douyinvod.com/(.{486,488}) type="">' # pattern_2 = '<source class="" src="//v26-web.douyinvod.com/(.{486,488}) type="">' # pattern_3 = '<source class="" src="//www.douyin.com/aweme/v1/play/(.+) type="">' pattern_4='https://www.douyin.com/video/(\d+)' def get_info_by_pattern(text,pattern): p = re.compile(pattern) p_res = p.findall(text) return p_res def get_headdouyinvod_com(): headers = { "Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "sec-ch-ua": "\"Google Chrome\";v=\"95\", \"Chromium\";v=\"95\", \";Not A Brand\";v=\"99\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "\"Windows\"", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Sec-Fetch-Site": "none", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-User": "?1", "Sec-Fetch-Dest": "document", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8" } return headers # 获取抖音链接 def get_douyin_url(driver1,sharl_url): try: if 'https' not in sharl_url: sharl_url = 'https://www.douyin.com/video/' + sharl_url VID = '' p_res = get_info_by_pattern(sharl_url, pattern_4) if len(p_res) > 0: VID = p_res[0] # driver1 = commonMethod.getDriver('',False) url = 'https://www.douyin.com' driver1.get(url) # driver1.delete_all_cookies() list_cooke = [ {'domain': '.douyin.com', 'expiry': 1698999663, 'httpOnly': False, 'name': 'VIDEO_FILTER_MEMO_SELECT', 'path': '/', 'secure': False, 'value': '%7B%22expireTime%22%3A1698999663897%2C%22type%22%3A1%7D'}, ...] for cook in list_cooke: driver1.add_cookie(cook) time.sleep(5) driver1.refresh() # cookies = driver1.get_cookies() # print(cookies) # sharl_url = 'https://www.douyin.com/video/7294079788010999040' driver1.get(sharl_url) time.sleep(5) txt = driver1.execute_script("return document.documentElement.outerHTML") txt = txt.replace('amp;', '') # txt = pq(selenium_html) # pattern_1 = '<source class="" src="//v3-web.douyinvod.com/(.+) type="">' # pattern_2 = '<source class="" src="//v26-web.douyinvod.com/(.+) type="">' p_res = get_info_by_pattern(txt, pattern_1) if len(p_res) > 0: p_res = p_res[0].replace('"', '') # print(len(p_res)) herf1 = 'https://v3-web.douyinvod.com/' + p_res print(herf1) return herf1,VID except Exception as ex: print('获取链接发生异常:',sharl_url,ex) return '','' # p_res = get_info_by_pattern(txt, pattern_2) # if len(p_res) > 0: # p_res = p_res[0].replace('"', '') # # print(len(p_res)) # herf1 = 'https://v26-web.douyinvod.com/' + p_res # print(herf1) # return herf1,VID # p_res = get_info_by_pattern(txt, pattern_3) # if len(p_res) > 0: # p_res = p_res[0].replace('"', '') # print(len(p_res)) # herf1 = 'https://www.douyin.com/aweme/v1/play/' + p_res # print(herf1) # return herf1,VID # return '',VID #下载抖音文件 def dowfile_v3_web_douyinvod_com(file_url, fileName): page_size = 1024 * 128 # url = "https://v3-web.douyinvod.com/4643edd0f5d68ed70fd6c6681f98ecdc/653b7807/video/tos/cn/tos-cn-ve-15c001-alinc2/ossMXydUtABkQozrA7g6NnEgHfeAOn9BkDhIAz/" # url='https://v3-web.douyinvod.com/4643edd0f5d68ed70fd6c6681f98ecdc/653b7807/video/tos/cn/tos-cn-ve-15c001-alinc2/ossMXydUtABkQozrA7g6NnEgHfeAOn9BkDhIAz/?a=6383&ch=26&cr=3&dr=0&lr=all&cd=0%7C0%7C0%7C3&cv=1&br=893&bt=893&cs=0&ds=3&ft=GN7rKGVVywIiRZm8Zmo~1u249EAp2yfbEvrK3ugu0mo0g3&mime_type=video_mp4&qs=1&rc=ZTpoODQzZmg3NGU8NjdlN0BpamZ4dGY6Zm92bjMzNGkzM0A0MDMvMjFgNTYxNmEuY2FgYSNtNi1ncjRvYXBgLS1kLTBzcw%3D%3D&btag=e00008000&dy_q=1698392554&feature_id=46a7bb47b4fd1280f3d3825bf2b29388&l=20231027154234B07D3936DE469D034161' headers = get_headdouyinvod_com() date = datetime.datetime.now() strstart = date.strftime("%Y-%m-%d-%H-%M-%S") print('开始下载:', strstart) try: response = requests.get(file_url, headers=headers, stream=True) # response = requests.get(url, headers=headers, params=params, stream=True) if response.status_code == 200 or response.status_code == 206: headers = json.loads(str(response.headers).replace('"', '').replace('\'', '"')) print(headers) with(open(fileName, 'ab')) as f: for chunk in response.iter_content(chunk_size=512): if chunk: f.write(chunk) if 'Content-Range' in headers: # Content_Range = str(headers['Content-Length']) # total_length = int(Content_Range) Content_Range = str(headers['Content-Range']) total_length = int(Content_Range.split('/')[1]) if total_length > page_size: rest_size = total_length - page_size loop_count = int(rest_size / page_size) if rest_size % page_size > 0: loop_count += 1 if loop_count > 0: flag_minus = False for i in range(1, loop_count + 1): loop_index = i if flag_minus: loop_index -= 1 sfrom = page_size * loop_index + 1 sto = page_size * (loop_index + 1) headers['Range'] = 'bytes=%d-%d' % (sfrom, sto) print(loop_index, loop_count) try: # proxy_one = getIP() # proxies = {"https": "https://" + proxy_one} response = requests.get(url, headers=headers, stream=True) if response.status_code == 200 or response.status_code == 206: headers = json.loads(str(response.headers).replace('"', '').replace('\'', '"')) # print(headers) with(open(fileName, 'ab')) as f: for chunk in response.iter_content(chunk_size=512): if chunk: f.write(chunk) flag_minus = False except Exception as ex: print(ex) time.sleep(10) if str(ex).find('Max retries exceeded with url') > -1: print('Max retries exceeded with url') flag_minus = True time.sleep(1) date = datetime.datetime.now() strend = date.strftime("%Y-%m-%d %H:%M:%S") print('完成下载:', strstart, strend) except Exception as ex: print('下载文件发生异常:',file_url,ex) # 下载抖音文件, def down_file(sharl_url_list,strDirectory): ''' 下载抖音文件 :param sharl_url: 抖音详细链接列表,类似:['https://www.douyin.com/video/7294079788010999040','https://www.douyin.com/video/7293552737067928868'] :param strDirectory: 抖音文件存储路径,类似:D:/douyin_file_down/202310 :return: ''' # strDirectory = os.getcwd() # sharl_url = 'https://www.douyin.com/video/7294079788010999040' try: driver1 = commonMethod.getDriver('', False) for sharl_url in sharl_url_list: file_url, VID = get_douyin_url(driver1, sharl_url) # file_url, VID ='https://v3-web.douyinvod.com/4643edd0f5d68ed70fd6c6681f98ecdc/653b7807/video/tos/cn/tos-cn-ve-15c001-alinc2/ossMXydUtABkQozrA7g6NnEgHfeAOn9BkDhIAz/?a=6383&ch=26&cr=3&dr=0&lr=all&cd=0%7C0%7C0%7C3&cv=1&br=893&bt=893&cs=0&ds=3&ft=GN7rKGVVywIiRZm8Zmo~1u249EAp2yfbEvrK3ugu0mo0g3&mime_type=video_mp4&qs=1&rc=ZTpoODQzZmg3NGU8NjdlN0BpamZ4dGY6Zm92bjMzNGkzM0A0MDMvMjFgNTYxNmEuY2FgYSNtNi1ncjRvYXBgLS1kLTBzcw%3D%3D&btag=e00008000&dy_q=1698392554&feature_id=46a7bb47b4fd1280f3d3825bf2b29388&l=20231027154234B07D3936DE469D034161','7294079788010999040' fileName = os.path.join(strDirectory, VID + '.mp4') if len(file_url) > 0: dowfile_v3_web_douyinvod_com(file_url, fileName) driver1.close() except Exception as ex: print(ex) if __name__ == "__main__": # strDirectory = os.getcwd() # 保存路径 strDirectory = 'D:/douyin/file/202310' # sharl_url='https://www.douyin.com/video/7293555365818453274' sharl_url_list=['https://www.douyin.com/video/7293555365818453274', 'https://www.douyin.com/video/7293552737067928868', 'https://www.douyin.com/video/7293555206388780324'] down_file(sharl_url_list, strDirectory)