3GPP协议下载 #requests/bs4/threading

import requests
from bs4 import BeautifulSoup
import threading

# 线程锁
thread_lock = threading.BoundedSemaphore(value=10)


def get_3gppurl():
    urllist = []
    url = 'https://www.3gpp.org/ftp/Specs/archive/38_series/'

    response = requests.get(url).content.decode('utf-8')

    bs = BeautifulSoup(response, 'html.parser').find('tbody').find_all('a')

    for b in bs:
        u = b['href']
        urllist.append(u)
    return urllist


def download_zip(u, t):
    res = requests.get(u)
    path = './3gpp-TS38.xx/' + str(t)
    with open(path, 'wb+') as file:
        file.write(res.content)
    # 解锁
    thread_lock.release()


def get_zip(url1):
    response = requests.get(url1).content.decode('utf-8')

    bs = BeautifulSoup(response, 'html.parser').find('tbody').find_all('a')
    for b in bs:
        u = b['href']
        t = b.text
        print(u, t)
        # 上锁,
        thread_lock.acquire()
        t = threading.Thread(target=download_zip, args=(u, t))
        t.start()


if __name__ == '__main__':
    urllist = get_3gppurl()
    for url1 in urllist:
        print(url1)
        get_zip(url1)

# 打印结果

 

# 下载结果

 

posted @ 2021-02-07 11:57  gala  阅读(88)  评论(0编辑  收藏  举报