Python: download file
def unGz(file_name): """ ungz zip file import gzip :param file_name: :return: """ f_name = file_name.replace(".gz", "") #获取文件的名称,去掉 g_file = gzip.GzipFile(file_name) #创建gzip对象 open(f_name, "w+").write(g_file.read()) #gzip对象用read()打开后,写入open()建立的文件里。 g_file.close() #关闭gzip对象 def unZip(file_name): """ unzip zip file import zipfile :param file_name: :return: """ zip_file = zipfile.ZipFile(file_name) if os.path.isdir(file_name + "_files"): pass else: os.mkdir(file_name + "_files") for names in zip_file.namelist(): zip_file.extract(names,file_name + "_files/") zip_file.close()
def getLink(url:str): """ :param url: :return: """ headers = { 'Accept-Language': 'zh-CN,zh;q=0.9,en-CN;q=0.8,en;q=0.7,zh-TW;q=0.6', 'Cookie': 'rewardsn=; wxtokenkey=777', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36' } # 访问链接并从json中提取微信推文链接 response = requests.get(url, headers=headers) data = json.loads(response.text) link = data['getalbum_resp']['article_list'][0]['url'] return link def getZipUrl(link:str): """ :param link: :return: """ # 访问微信推文链接并解析网页 response = requests.get(link) soup = BeautifulSoup(response.text, 'html.parser') # 提取文本中的zip链接,正则匹配以https://开头以.zip后缀的链接 content = soup.find('div', {'id': 'js_content'}).get_text() zipurl = re.findall(r'https://.*?\.zip', content) return zipurl def requestsDownload(url:str,newfile:str): """ :param url: :param newfile: :return: """ content = requests.get(url).content with open(newfile, 'wb') as file: file.write(content) def print_hi(name): # Use a breakpoint in the code line below to debug your script. print(f'Hi, {name} world,geovindu,涂聚文') # Press Ctrl+F8 to toggle the breakpoint. # Press the green button in the gutter to run the script. if __name__ == '__main__': #print_hi('PyCharm,python language') # 从微信推文json数据中获得最新一期IP库的发布文章链接 url = 'https://mp.weixin.qq.com/mp/appmsgalbum?__biz=Mzg3Mzc0NTA3NA==&action=getalbum&album_id=2329805780276838401&f=json' downurl="" try: link = getLink(url) if link: zip_url = getZipUrl(link) if len(zip_url)>0: for i in range(0,len(zip_url)): downurl=zip_url[0] print(zip_url[i]) else: print("没有找到zip链接") else: print("没有找到微信推文链接") except Exception as e: print("出现错误:", e) requestsDownload(downurl,"geovindu.zip")
哲学管理(学)人生, 文学艺术生活, 自动(计算机学)物理(学)工作, 生物(学)化学逆境, 历史(学)测绘(学)时间, 经济(学)数学金钱(理财), 心理(学)医学情绪, 诗词美容情感, 美学建筑(学)家园, 解构建构(分析)整合学习, 智商情商(IQ、EQ)运筹(学)生存.---Geovin Du(涂聚文)