Python抓取目前全国大学名单

#!/usr/bin/env python
# -*- coding:utf-8 -*-
#@Time  : 2020/4/23 8:57
#@Author: ltl
#@File  : daxueDownload.py


import urllib.request
import threading
from time import ctime
from bs4 import BeautifulSoup


def downloadDxmd():
    site = 'http://www.chinadegrees.cn/xwyyjsjyxx/xwsytjxx/qgptgxmd/qgptgxmd.html'
    html = urllib.request.urlopen(site)
    soup = BeautifulSoup(html,'html.parser')

    content = soup.find('tbody')

    for id,tr in enumerate(soup.find_all('tr')):
        if id != 0:
            tds = tr.find_all('td')
            tplt = "{0:{5}^20}\t{1:{5}^20}\t{2:{5}^20}\t{3:{5}^20}\t{4:{5}^20}"
            if len(tds) == 5:
                print(tplt.format(tds[0].contents[0], tds[1].contents[0], tds[2].contents[0], tds[3].contents[0], tds[4].contents[0], chr(12288)))


if __name__ == '__main__':
    downloadDxmd()

运行结果为:

 

posted @ 2020-04-23 09:43  R小哥  阅读(414)  评论(0编辑  收藏  举报