python 爬取媒体文件(无防火墙)
#coding = utf-8 import requests import pandas as pd import os,time root_path = './根目录/' input_file = '码表.xlsx' url = 'http://api.map.baidu.com/geocoder/v2/?id = %s&local=1' fail_file = root_path +'fail.csv' class Auto_down: def __init__(self): print("--start--") def read_excel(self): # pd.read_excel(converters = {u'列名':str})按照str类型读入,不会出现0被舍去的情况 sheet = pd.read_excel(input_file,converters = {u'列名':str},sheetname = '子表名') cust_Id = sheet['cust_id'] void_Id = sheet['void_id'] for i in range(len(cust_Id)): self.create_file(cust_Id[i],void_Id[i]) def download_voice(self,custid_filename,voiceid): print(voiceid) try: r = requests.get(url%voiceid) return_code = r.status_code if return_code == 200: voice_filename = '%s/%s.mp3'%(custid_filename,voiceid) with open(voice_filename, 'wb') as fd: fd.write(r.content) else: with open(fail_file, 'a+') as ff: ff.write(voiceid + '\n') except: print('request url is fail!!') with open(fail_file, 'a+') as ff: ff.write(voiceid + '\n') def create_file(self, custid, voiceid): custid_filename = root_path + custid if not os.path.exists(custid_filename): os.mkdir(custid_filename) else: self.download_voice(custid_filename,voiceid) if __name__ == '__main__': tStart = time.clock() AD = Auto_down() AD.read_excel() tEnd = time.clock() print("%s s"%(tEnd - tStart))
#coding = utf-8 import requests root_path = "./下载/" url = "" fail_file = root_path + 'fail.csv' voiceid = '11111' for i in range(3): try: r = requests.get(url) return_code = r.status_code if r.status_code == 200: voice_filename = root_path + 'dada.fdf' with open(voice_filename,'wb') as fd: fd.write(r.content) else: with open(fail_file,'a+') as ff: ff.write(voiceid + '\n') except: prin("fail") with open(fail_file,'a+') as ff: ff.write(voiceid + '\n')
r = request.get(url)
r.status_code 获取响应状态码
r.text 获取响应内容
r.headers 获取响应头
r.encoding 获取响应编码
r.content 获取二进制响应内容
r.json() 获取JSON响应内容