import wget
from bs4 import BeautifulSoup as bs
import requests
import random
import requests
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, Future, as_completed, wait
from multiprocessing import cpu_count
headers = [
'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.76 Mobile Safari/537.36',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
]
def open_url(url):
respon = requests.get(url, headers={"user-agent": random.choice(headers)}).content
respon_decoded = respon.decode("utf-8")
return respon_decoded
response = open_url('https://nadc.china-vo.org/res/r101217/')
soup = bs(response, 'html.parser')
pd_files = soup.find_all(id='pd-files')
file_info_list = []
for file in pd_files:
file_info = {
'file_name': file.find(class_='paperinfo-files-filename').text.strip(),
'file_size': file.find(class_='paperinfo-files-filesize').text,
'download_link': 'https://nadc.china-vo.org/'+file.find(class_='col-sm-1').find('a')['href']
}
file_info_list.append(file_info)
import requests
import time
def download(file_info):
resp = requests.get(file_info['download_link'], stream=True)
total = int(resp.headers.get('content-length', 0))
with open('download/'+file_info['file_name'], 'wb') as file, tqdm(
desc=file_info['file_name'],
total=total,
unit='iB',
unit_scale=True,
unit_divisor=1024,
) as bar:
for data in resp.iter_content(chunk_size=1024):
size = file.write(data)
bar.update(size)
print(f'总共有:{cpu_count()} 个核心')
def test_tqdm():
executor = ThreadPoolExecutor(max_workers=cpu_count())
for file_info in file_info_list:
args = [file_info,]
tasks = [executor.submit(lambda p:download(*p), args)]
wait(tasks)
test_tqdm()
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!
2022-04-11 pretty_midi和music21对降调号的不同处理方式