下载ElementUI组件(Python)

代码:

# encoding:utf-8
from bs4 import BeautifulSoup
import requests, re, os, socket
from urllib import request

# 指定要下载的版本
element_ui_version = "2.13.0"
# 指定文件要存放的位置
element_ui_dir = "D:/tmp"

save_ui_dir = os.path.join(element_ui_dir, "element-ui")

if not os.path.isdir(save_ui_dir):
    os.makedirs(save_ui_dir)

element_ui_url = "https://unpkg.com/browse/element-ui@" + element_ui_version + "/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0"
}


def get_page(url, save_dir):
    print("Current Page:    ", url)
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(str(response.content), "lxml")
    tbody = soup.find("tbody")
    rule_name = r'href="(.+?)"'
    td_href = re.findall(rule_name, str(tbody))
    dir_list = []
    for href in td_href:
        href_path = os.path.join(save_dir, href)
        if href == "../":
            pass
        elif "/" in href:

            os.mkdir(href_path)
            print("Makedir:    ", href_path.replace(save_ui_dir, ""))
            dir_list.append(href)
        else:
            file_url = url + href
            abs_name = file_url.replace(element_ui_url, "")
            print("Download:    ", abs_name)
            get_file(file_url, href_path)

    for sub_dir in dir_list:
        sub_url = url + sub_dir
        sub_dir = os.path.join(save_dir, sub_dir)
        get_page(sub_url, sub_dir)


def get_file(url, filename):
    opener = request.build_opener()
    opener.addheaders = [('User-agent',
                          'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0')]
    request.install_opener(opener)
    socket.setdefaulttimeout(30)
    url = url.replace("browse/", "")
    count = 1
    while count <= 5:
        try:
            request.urlretrieve(url, filename)
            break
        except socket.timeout:
            err_info = '<Timeout>   Reloading for %d time' % count
            print(err_info)
            count += 1
        except Exception as e:
            err_info = '<' + str(e) + '>   Reloading for %d time' % count
            print(err_info)
            count += 1
    if count > 5:
        print("<Error>  download job failed!")
    else:
        pass
get_page(element_ui_url, save_ui_dir)

 

posted @ 2020-06-03 20:37  alittlecomputer  阅读(868)  评论(0编辑  收藏  举报