Python抓取目前全国大学名单
#!/usr/bin/env python # -*- coding:utf-8 -*- #@Time : 2020/4/23 8:57 #@Author: ltl #@File : daxueDownload.py import urllib.request import threading from time import ctime from bs4 import BeautifulSoup def downloadDxmd(): site = 'http://www.chinadegrees.cn/xwyyjsjyxx/xwsytjxx/qgptgxmd/qgptgxmd.html' html = urllib.request.urlopen(site) soup = BeautifulSoup(html,'html.parser') content = soup.find('tbody') for id,tr in enumerate(soup.find_all('tr')): if id != 0: tds = tr.find_all('td') tplt = "{0:{5}^20}\t{1:{5}^20}\t{2:{5}^20}\t{3:{5}^20}\t{4:{5}^20}" if len(tds) == 5: print(tplt.format(tds[0].contents[0], tds[1].contents[0], tds[2].contents[0], tds[3].contents[0], tds[4].contents[0], chr(12288))) if __name__ == '__main__': downloadDxmd()
运行结果为:
愿我们漂泊半生,
归来仍少年!