Python: download file

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def unGz(file_name):
    """
    ungz zip file  import gzip
 
    :param file_name:
    :return:
    """
    f_name = file_name.replace(".gz", "")
    #获取文件的名称,去掉
    g_file = gzip.GzipFile(file_name)
    #创建gzip对象
    open(f_name, "w+").write(g_file.read())
    #gzip对象用read()打开后,写入open()建立的文件里。
    g_file.close()
    #关闭gzip对象
 
def unZip(file_name):
    """
    unzip zip file  import zipfile
    :param file_name:
    :return:
    """
    zip_file = zipfile.ZipFile(file_name)
    if os.path.isdir(file_name + "_files"):
        pass
    else:
        os.mkdir(file_name + "_files")
    for names in zip_file.namelist():
        zip_file.extract(names,file_name + "_files/")
    zip_file.close()

  

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def getLink(url:str):
    """
     
    :param url:
    :return:
    """
    headers = {
        'Accept-Language': 'zh-CN,zh;q=0.9,en-CN;q=0.8,en;q=0.7,zh-TW;q=0.6',
        'Cookie': 'rewardsn=; wxtokenkey=777',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
    }
 
    # 访问链接并从json中提取微信推文链接
    response = requests.get(url, headers=headers)
    data = json.loads(response.text)
    link = data['getalbum_resp']['article_list'][0]['url']
    return link
 
def getZipUrl(link:str):
    """
     
    :param link:
    :return:
    """
     
    # 访问微信推文链接并解析网页
    response = requests.get(link)
    soup = BeautifulSoup(response.text, 'html.parser')
 
    # 提取文本中的zip链接,正则匹配以https://开头以.zip后缀的链接
    content = soup.find('div', {'id': 'js_content'}).get_text()
    zipurl = re.findall(r'https://.*?\.zip', content)
 
    return zipurl
 
def requestsDownload(url:str,newfile:str):
    """
     
    :param url:
    :param newfile:
    :return:
    """
    content = requests.get(url).content
    with open(newfile, 'wb') as file:
        file.write(content)
 
 
 
 
def print_hi(name):
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {name} world,geovindu,涂聚文'# Press Ctrl+F8 to toggle the breakpoint.
 
 
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    #print_hi('PyCharm,python language')
    # 从微信推文json数据中获得最新一期IP库的发布文章链接
    url = 'https://mp.weixin.qq.com/mp/appmsgalbum?__biz=Mzg3Mzc0NTA3NA==&action=getalbum&album_id=2329805780276838401&f=json'
    downurl=""
    try:
        link = getLink(url)
        if link:
            zip_url = getZipUrl(link)
            if len(zip_url)>0:
                for i in range(0,len(zip_url)):
                    downurl=zip_url[0]
                    print(zip_url[i])
            else:
                print("没有找到zip链接")
        else:
            print("没有找到微信推文链接")
    except Exception as e:
        print("出现错误:", e)
 
    requestsDownload(downurl,"geovindu.zip")
    

  

posted @   ®Geovin Du Dream Park™  阅读(29)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· 零经验选手,Compose 一天开发一款小游戏!
· 一起来玩mcp_server_sqlite,让AI帮你做增删改查!!
历史上的今天:
2023-04-29 cpp: Template Mothod Pattern
2023-04-29 mysql: character set in mysql 8.0
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5
点击右上角即可分享
微信分享提示