下载gldas数据连接脚本
单进程脚本
import os
import sys
import requests
def get_file_once(URL, FILENAME):
result = requests.get(URL)
try:
result.raise_for_status()
f = open(FILENAME,'wb')
f.write(result.content)
f.close()
print('contents of URL written to '+FILENAME)
except:
print('requests.get() returned an error code '+str(result.status_code))
def getnc(dataList, localPath):
if not os.path.exists(localPath): # 新建文件夹
os.mkdir(localPath)
with open(dataList,'r') as f:
lines = f.readlines()
for line in lines:
file_name = line.split('?')[0] # 文件名
file_name_1 = file_name.split('/')[-1].strip()
print(file_name_1)
file_name_2 = os.path.join(localPath, file_name_1)
get_file_once(line.strip(), file_name_2)
if __name__ == '__main__':
localPath= "./data/" # 下载数据路径
fl_list = os.popen('ls *.txt') # 打开一个管道
fl_list = fl_list.read().split()
for fl in fl_list:
print(fl)
getnc(fl,localPath) # 下载数据
下面是更新的多进程脚本
# -*- coding: utf-8 -*-
"""
Created on May 8 13:55:39 2023
增加多进程下载功能
@author: fengxiang
下载链接列表格式样例:
https://hydro1.gesdisc.eosdis.nasa.gov/daac-bin/OTF/HTTP_services.cgi?FILENAME=%2Fdata%2FGLDAS%2FGLDAS_NOAH025_3H.2.1%2F2013%2F001%2FGLDAS_NOAH025_3H.A20130101.1800.021.nc4&VERSION=1.02&LABEL=GLDAS_NOAH025_3H.A20130101.1800.021.nc4.SUB.nc4&DATASET_VERSION=2.1&SHORTNAME=GLDAS_NOAH025_3H&SERVICE=L34RS_LDAS&BBOX=20%2C85%2C50%2C120&VARIABLES=LWdown_f_tavg%2CPsurf_f_inst%2CQair_f_inst%2CRainf_tavg%2CSWdown_f_tavg%2CTair_f_inst%2CWind_f_inst&FORMAT=nc4%2F
"""
import os
import sys
import requests
import re
from queue import Queue
from threading import Thread
from time import time
def get_file_once(URL):
str_time = re.search('\d{8}.\d{4}', URL).group()
localPath= "./" # 下载数据路径
FILENAME = localPath+'GLDAS'+'_'+str_time
print('read '+FILENAME)
if not os.path.exists(FILENAME):
result = requests.get(URL)
try:
result.raise_for_status()
f = open(FILENAME,'wb')
f.write(result.content)
f.close()
print('contents of URL written to '+FILENAME)
except:
print('requests.get() returned an error code '+str(result.status_code))
class DownloadWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
# 从队列中获取任务并扩展tuple
line = self.queue.get()
# downloadonefile(riqi)
get_file_once(line)
self.queue.task_done()
if __name__ == '__main__':
ts = time()
localPath= "./" # 下载数据路径
fl_list = os.popen('ls *.txt') # 打开一个管道
fl_list = fl_list.read().split()
data_list = fl_list[0] # txt文件
with open(data_list,'r') as f:
lines = f.readlines()
url_list = []
for line in lines:
line = line.strip() # 链接
#get_file_once(line)
url_list.append(line)
queue = Queue()
#创建四个工作线程
for x in range(8):
worker = DownloadWorker(queue)
#将daemon设置为True将会使主线程退出,即使所有worker都阻塞了
worker.daemon = True
worker.start()
#将任务以tuple的形式放入队列中
# for link in links:
# queue.put((link))
for url in url_list:
queue.put((url))
#让主线程等待队列完成所有的任务
queue.join()
print('Took {}'.format(time() - ts))