gscloud.cn下载GDEMV3 DEM模型
采用Selenium+Chromedriver方式下载,具体安装Selenium+Chromedriver请看
https://zhuanlan.zhihu.com/p/588679601
(1)Python没有自带selenium库,需要自行下载,只需要用简单的命令即可实现:
pip install selenium
(2)安装Chrome浏览器
(3)接着去下载对应版本的Chromedriver。通常从以下网址下载:
https://npm.taobao.org/mirrors/
(4)下载程序python:from selenium import webdriver import time from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service import os import glob demdownloadurl = r'https://www.gscloud.cn/sources/accessdata/aeab8000652a45b38afbb7ff023ddabb?pid=302' #GDEMV3 30M 分辨率数字高程模型 链接地址 foldersave = r'F:\GDEMV3 DEM' # 文件的保存目录 page_num = 2292 # 总页面数 page = 1580 #起始下载页面编号 concurrentdownload = 6 #最大并发下载数量, gscloud.cn限制同时下6个 chromedriver = Service(r'C:\Users\Lenovo\AppData\Local\Programs\Python\Python310\chromedriver.exe') chromeoptions=webdriver.ChromeOptions() prefs={'profile.default_content_settings.popups':0, 'download.default_directory': foldersave} chromeoptions.add_experimental_option('prefs', prefs) driver = webdriver.Chrome(service=chromedriver, options=chromeoptions) # 设置Chrome的下载目录 #删除所有ASTGTMV003_*.crdownload临时下载文件 for file in glob.glob(foldersave+r'\\'+"ASTGTMV003_*.crdownload"): if os.path.isfile(file): # 这里判断下,如果是文件夹就先不删 os.remove(file) #打开登录网页 driver.get('http://www.gscloud.cn/accounts/login_user') #程序一直等待您的登录并且进入指定下载页面后,才会开始自动下载 #请先设定好浏览器下载目录=foldersave #再请登录后进入“资源”-》“公开数据”-》DEM数字高程模型-》进入”GDEMV3 30M 分辨率数字高程模型“ while driver.current_url!=demdownloadurl: time.sleep(1) d_everypage = '//*[@id="datasource"]/div/table/tr['+str(2+10)+']/td[9]/div/div/p[2]/img' WebDriverWait(driver, 20).until(expected_conditions.visibility_of_element_located((By.XPATH, d_everypage))) # 翻页后重新定位,否则报错ElementClickInterceptedException: inputp= driver.find_element(By.XPATH, '//*[@id="pager"]/div/table/tr/td[7]/input') inputp.click() inputp.send_keys(Keys.CONTROL,'a') inputp.send_keys(str(page)) inputp.send_keys(Keys.ENTER) WebDriverWait(driver, 20).until(expected_conditions.visibility_of_element_located((By.XPATH, d_everypage))) # 翻页后重新定位,否则报错ElementClickInterceptedException: dlcount=0 # 开始下载 while True: while page <= page_num: print('当前下载第{}页'.format(page)) # if (len(driver.window_handles)>1): #查找并关闭所有提示下载错误的标签页面 # for iwin in reversed(range(1, len(driver.window_handles))): # driver.switch_to.window(driver.window_handles[iwin]) # if 'bjdl.gscloud.cn' in driver.current_url: #关闭503 Service Temporarily Unavailable标签页 # driver.close() # time.sleep(1) for tr_num in range(1,10): # 每页10个 driver.switch_to.window(driver.window_handles[0]) #切换到下载标签页 d_everypage = '//*[@id="datasource"]/div/table/tr['+str(2+tr_num)+']/td[9]/div/div/p[2]/img' WebDriverWait(driver, 20).until(expected_conditions.visibility_of_element_located((By.XPATH, d_everypage))) # 翻页后重新定位,否则报错ElementClickInterceptedException: d_filename = '//*[@id="datasource"]/div/table/tr['+str(2+tr_num)+']/td[2]/div' WebDriverWait(driver, 20).until(expected_conditions.visibility_of_element_located((By.XPATH, d_filename))) # 翻页后重新定位,否则报错ElementClickInterceptedException: d_filename = driver.find_element(By.XPATH, d_filename).text print(d_filename) if os.path.exists(foldersave+r'\\'+d_filename+'.zip') : continue tabcount = len(driver.window_handles) element = driver.find_element(By.XPATH, d_everypage) webdriver.ActionChains(driver).move_to_element(element).click(element).perform() #模拟点击下载图标 dlcount+=1 time.sleep(1) # 等待下载开始 __t=0 while(len(driver.window_handles) >tabcount): time.sleep(1) __t+=1 if (__t>50): break # 为了尽量将带宽占满,保持concurrentdownload个文件同时下载 while (len(glob.glob(foldersave+r'\\'+"*.crdownload"))>=concurrentdownload): time.sleep(1) page += 1 next = driver.find_element(By.XPATH, '//*[@id="pager"]/div/table/tr/td[10]/a/span/span/span').click() # 下一页 time.sleep(1) print('下载循环下载完成') d_firstpage='//*[@id="pager"]/div/table/tr/td[3]/a/span/span/span' WebDriverWait(driver, 20).until(expected_conditions.visibility_of_element_located((By.XPATH, d_firstpage))) # 翻页后重新定位,否则报错ElementClickInterceptedException: next = driver.find_element(By.XPATH, d_firstpage).click() # 首页 page = 1 if dlcount==0: break else: print('查漏补缺') print('全部下载完成') os.system("pause")