python 爬取媒体文件(无防火墙)
#coding = utf-8 import requests import pandas as pd import os,time root_path = './根目录/' input_file = '码表.xlsx' url = 'http://api.map.baidu.com/geocoder/v2/?id = %s&local=1' fail_file = root_path +'fail.csv' class Auto_down: def __init__(self): print("--start--") def read_excel(self): # pd.read_excel(converters = {u'列名':str})按照str类型读入,不会出现0被舍去的情况 sheet = pd.read_excel(input_file,converters = {u'列名':str},sheetname = '子表名') cust_Id = sheet['cust_id'] void_Id = sheet['void_id'] for i in range(len(cust_Id)): self.create_file(cust_Id[i],void_Id[i]) def download_voice(self,custid_filename,voiceid): print(voiceid) try: r = requests.get(url%voiceid) return_code = r.status_code if return_code == 200: voice_filename = '%s/%s.mp3'%(custid_filename,voiceid) with open(voice_filename, 'wb') as fd: fd.write(r.content) else: with open(fail_file, 'a+') as ff: ff.write(voiceid + '\n') except: print('request url is fail!!') with open(fail_file, 'a+') as ff: ff.write(voiceid + '\n') def create_file(self, custid, voiceid): custid_filename = root_path + custid if not os.path.exists(custid_filename): os.mkdir(custid_filename) else: self.download_voice(custid_filename,voiceid) if __name__ == '__main__': tStart = time.clock() AD = Auto_down() AD.read_excel() tEnd = time.clock() print("%s s"%(tEnd - tStart))
#coding = utf-8 import requests root_path = "./下载/" url = "" fail_file = root_path + 'fail.csv' voiceid = '11111' for i in range(3): try: r = requests.get(url) return_code = r.status_code if r.status_code == 200: voice_filename = root_path + 'dada.fdf' with open(voice_filename,'wb') as fd: fd.write(r.content) else: with open(fail_file,'a+') as ff: ff.write(voiceid + '\n') except: prin("fail") with open(fail_file,'a+') as ff: ff.write(voiceid + '\n')
r = request.get(url)
r.status_code 获取响应状态码
r.text 获取响应内容
r.headers 获取响应头
r.encoding 获取响应编码
r.content 获取二进制响应内容
r.json() 获取JSON响应内容
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· Open-Sora 2.0 重磅开源!
· 周边上新:园子的第一款马克杯温暖上架