一、访问网站20次
import requests from bs4 import BeautifulSoup#调用函数 r = requests.get("http://www.baidu.com")#输入网站名字 r.encoding = "utf-8" soup = BeautifulSoup(r.text) for i in range(20):#控制循环,访问网站20次即可。 print(soup.head) print("\n") print(i+1) print("\n")
通过用for循环,来控制访问次数,效果如下:
二、爬虫
import requests from bs4 import BeautifulSoup alluniv = [] def getHTMLText(url):#访问网站并获取内容 try: r = requests.get(url,timeout = 30) r.raise_for_status() r.encoding = 'utf-8' return r.text except: return "error" def fillunivlist(soup):#获取我们需要的内容 data=soup.find_all('tr') for tr in data: ltd =tr.find_all('td') if len(ltd)==0: continue singleuniv=[] for td in ltd: singleuniv.append(td.string) alluniv.append(singleuniv) def printunivlist(num):#建立这个排名表格 print("{:^4}{:^10}{:^5}{:^8}{:^10}".format("排名","学校名字","省份","总分","培养规模")) for i in range(num): u=alluniv[i] print("{:^4}{:^10}{:^5}{:^8}{:^10}".format(u[0],u[1],u[2],u[3],u[6])) def main(num):#输出num个数的大学排名 url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html" html=getHTMLText(url) soup=BeautifulSoup(html,"html.parser") fillunivlist(soup) printunivlist(num) main(100)
效果如下: