Python网络爬虫爬取贴吧话题热议榜单(可自定义条数)
以上为页面结构
import pandas as pd import requests as rq from bs4 import BeautifulSoup url="http://tieba.baidu.com/hottopic/browse/topicList?res_type=1" def res_caputure(): try: res = rq.get(url,timeout=30) res.raise_for_status() res.encoding = res.apparent_encoding return res.text except: return "发生异常,响应码为{}".format(res.status_code) if __name__ == "__main__": r = res_caputure() soup = BeautifulSoup(r) a = soup.select('a[target]') p = soup.select('span') soup_p=[] soup_a=[] s=input("输入要查看百度贴吧话题热议榜单的条数(直接回车默认为10条,最高为30):") if s=='': s=10 else: s=int(s) for k in range(3,s*2+3,2): soup_p.append(p[k].string) for i in range(0,s): soup_a.append(a[i].string) dt={'排名':range(1,s+1),'标题':soup_a,'内容数':soup_p} df=pd.DataFrame(dt) print(df)
可自定义查看条数,按回车确定,不输入默认为10条
演示如下: