Python: download file

 

def unGz(file_name):
    """
    ungz zip file  import gzip

    :param file_name: 
    :return: 
    """
    f_name = file_name.replace(".gz", "")
    #获取文件的名称,去掉
    g_file = gzip.GzipFile(file_name)
    #创建gzip对象
    open(f_name, "w+").write(g_file.read())
    #gzip对象用read()打开后,写入open()建立的文件里。
    g_file.close()
    #关闭gzip对象

def unZip(file_name):
    """
    unzip zip file  import zipfile
    :param file_name: 
    :return: 
    """
    zip_file = zipfile.ZipFile(file_name)
    if os.path.isdir(file_name + "_files"):
        pass
    else:
        os.mkdir(file_name + "_files")
    for names in zip_file.namelist():
        zip_file.extract(names,file_name + "_files/")
    zip_file.close()

  

 

def getLink(url:str):
    """
    
    :param url: 
    :return: 
    """
    headers = {
        'Accept-Language': 'zh-CN,zh;q=0.9,en-CN;q=0.8,en;q=0.7,zh-TW;q=0.6',
        'Cookie': 'rewardsn=; wxtokenkey=777',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
    }

    # 访问链接并从json中提取微信推文链接
    response = requests.get(url, headers=headers)
    data = json.loads(response.text)
    link = data['getalbum_resp']['article_list'][0]['url']
    return link

def getZipUrl(link:str):
    """
    
    :param link: 
    :return: 
    """
    
    # 访问微信推文链接并解析网页
    response = requests.get(link)
    soup = BeautifulSoup(response.text, 'html.parser')

    # 提取文本中的zip链接,正则匹配以https://开头以.zip后缀的链接
    content = soup.find('div', {'id': 'js_content'}).get_text()
    zipurl = re.findall(r'https://.*?\.zip', content)

    return zipurl

def requestsDownload(url:str,newfile:str):
    """
    
    :param url: 
    :param newfile: 
    :return: 
    """
    content = requests.get(url).content
    with open(newfile, 'wb') as file:
        file.write(content)




def print_hi(name):
    # Use a breakpoint in the code line below to debug your script.
    print(f'Hi, {name} world,geovindu,涂聚文')  # Press Ctrl+F8 to toggle the breakpoint.


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    #print_hi('PyCharm,python language')
    # 从微信推文json数据中获得最新一期IP库的发布文章链接
    url = 'https://mp.weixin.qq.com/mp/appmsgalbum?__biz=Mzg3Mzc0NTA3NA==&action=getalbum&album_id=2329805780276838401&f=json'
    downurl=""
    try:
        link = getLink(url)
        if link:
            zip_url = getZipUrl(link)
            if len(zip_url)>0:
                for i in range(0,len(zip_url)):
                    downurl=zip_url[0]
                    print(zip_url[i])
            else:
                print("没有找到zip链接")
        else:
            print("没有找到微信推文链接")
    except Exception as e:
        print("出现错误:", e)

    requestsDownload(downurl,"geovindu.zip")
    

  

posted @ 2024-04-29 20:52  ®Geovin Du Dream Park™  阅读(23)  评论(0编辑  收藏  举报