(2)请用requests库的get()函数访问必应主页20次,打印返回状态,text()内容,计算text()属性和content属性所返回网页内容的长度
import requests for i in range(20): r = requests.get("https://cn.bing.com/?intlf=&mkt=zh-cn", timeout=30) r.raise_for_status() r.encoding = 'utf-8' print('状态={}'.format(r.status_code)) print(r.text) print('text属性长度{},content属性长度{}'.format(len(r.text), len(r.content))
(3)这是一个简单的html页面,请保持为字符串,完成后面的计算要求。(良好)
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>菜鸟教程(runoob.com)</title>
</head>
<body>
<h1>我的第一个标题</h1>
<p id="first">我的第一个段落。</p>
</body>
<table border="1">
<tr>
<td>row 1, cell 1</td>
<td>row 1, cell 2</td>
</tr>
<tr>
<td>row 2, cell 1</td>
<td>row 2, cell 2</td>
</tr>
</table>
</html>
from bs4 import BeautifulSoup r = ''' <!DOCTYPE html> <html> <head>9号的作业 <meta charset="utf-8"> <title>菜鸟教程(runoob.com) </title> </head> <body> <h1>我的第一个标题</h1> <p id="first">我的第一个段落。</p> </body> <table border="1"> <tr> <td>row 1, cell 1</td> <td>row 1, cell 2</td> </tr> </table> </html> ''' demo = BeautifulSoup(r, "html.parser") print(demo.head) print(demo.title) print(demo.body) print(demo.p) print(demo.string)
(4) 爬中国大学排名网站内容
import requests from bs4 import BeautifulSoup import csv import os ALL = [] def getHTMLtext(url): try: r = requests.get(url, timeout=30) r.raise_for_status() r.encoding = 'utf-8' return r.text except: return "" def fillUni(soup): data = soup.find_all('tr') for tr in data: td1 = tr.find_all('td') if len(td1) == 0: continue Single = [] for td in td1: Single.append(td.string) ALL.append(Single) def writercsv(save_road, num, title): if os.path.isfile(save_road): with open(save_road, 'a', newline='') as f: csv_write = csv.writer(f, dialect='excel') for i in range(num): u = ALL[i] csv_write.writerow(u) else: with open(save_road, 'w', newline='') as f: csv_write = csv.writer(f, dialect='excel') csv_write.writerow(title) for i in range(num): u = ALL[i] csv_write.writerow(u) title = ["排名", "学校名称", "省市", "总分", "生源质量", "培养结果", "科研规模", "科研质量", "顶尖成果", "顶尖人才", "科技服务", "产学研究合作", "成果转化"] save_road = "D:\\html.csv" def main(num): url = "https://www.shanghairanking.cn/rankings/bcur/201911" html = getHTMLtext(url) soup = BeautifulSoup(html, "html.parser") fillUni(soup) writercsv(save_road, num, title) main(10)