下载gldas数据连接脚本

单进程脚本

import os
import sys
import requests

def get_file_once(URL, FILENAME):
    result = requests.get(URL)
    try:
      result.raise_for_status()
      f = open(FILENAME,'wb')
      f.write(result.content)
      f.close()
      print('contents of URL written to '+FILENAME)
    except:
      print('requests.get() returned an error code '+str(result.status_code))


def getnc(dataList, localPath):

    if not os.path.exists(localPath):  # 新建文件夹
        os.mkdir(localPath)
    with open(dataList,'r') as f:
        lines = f.readlines()
        for line in lines:
            file_name = line.split('?')[0]  # 文件名
            file_name_1 = file_name.split('/')[-1].strip()
            print(file_name_1)
            file_name_2 = os.path.join(localPath, file_name_1)
            get_file_once(line.strip(), file_name_2)
            

if __name__ == '__main__':

    localPath= "./data/"  # 下载数据路径
    fl_list = os.popen('ls *.txt')  # 打开一个管道
    fl_list = fl_list.read().split()
    for fl in fl_list:
        print(fl)
        getnc(fl,localPath) # 下载数据

下面是更新的多进程脚本

# -*- coding: utf-8 -*-
"""
Created on May 8 13:55:39 2023

增加多进程下载功能
@author: fengxiang
下载链接列表格式样例:
https://hydro1.gesdisc.eosdis.nasa.gov/daac-bin/OTF/HTTP_services.cgi?FILENAME=%2Fdata%2FGLDAS%2FGLDAS_NOAH025_3H.2.1%2F2013%2F001%2FGLDAS_NOAH025_3H.A20130101.1800.021.nc4&VERSION=1.02&LABEL=GLDAS_NOAH025_3H.A20130101.1800.021.nc4.SUB.nc4&DATASET_VERSION=2.1&SHORTNAME=GLDAS_NOAH025_3H&SERVICE=L34RS_LDAS&BBOX=20%2C85%2C50%2C120&VARIABLES=LWdown_f_tavg%2CPsurf_f_inst%2CQair_f_inst%2CRainf_tavg%2CSWdown_f_tavg%2CTair_f_inst%2CWind_f_inst&FORMAT=nc4%2F
"""
import os
import sys
import requests
import re
from queue import Queue
from threading import Thread
from time import time

def get_file_once(URL):

    str_time = re.search('\d{8}.\d{4}', URL).group()
    localPath= "./"  # 下载数据路径
    FILENAME = localPath+'GLDAS'+'_'+str_time
    print('read   '+FILENAME)

    if not os.path.exists(FILENAME):
        result = requests.get(URL)
        try:
            result.raise_for_status()
            f = open(FILENAME,'wb')
            f.write(result.content)
            f.close()
            print('contents of URL written to '+FILENAME)
        except:
            print('requests.get() returned an error code '+str(result.status_code))



class DownloadWorker(Thread):
    def __init__(self, queue):
        Thread.__init__(self)
        self.queue = queue
 
    def run(self):
        while True:
            # 从队列中获取任务并扩展tuple
            line = self.queue.get()
            # downloadonefile(riqi)
            get_file_once(line)
            self.queue.task_done()


if __name__ == '__main__':


    ts = time()

    localPath= "./"  # 下载数据路径
    fl_list = os.popen('ls *.txt')  # 打开一个管道
    fl_list = fl_list.read().split()  
    data_list = fl_list[0]  # txt文件

    with open(data_list,'r') as f:
        lines = f.readlines()
        url_list = []
        for line in lines:
            line = line.strip()  # 链接
            #get_file_once(line)
            url_list.append(line)

    queue = Queue()

    #创建四个工作线程
    for x in range(8):
        worker = DownloadWorker(queue)
        #将daemon设置为True将会使主线程退出,即使所有worker都阻塞了
        worker.daemon = True
        worker.start()
        
    #将任务以tuple的形式放入队列中
    # for link in links:
        # queue.put((link))
    for url in url_list:
        queue.put((url))

    #让主线程等待队列完成所有的任务
    queue.join()
    print('Took {}'.format(time() - ts))
posted @ 2022-11-02 11:15  xiaofeifeixd  阅读(42)  评论(0编辑  收藏  举报