python爬虫小练习
import re import requests from lxml import etree from bs4 import BeautifulSoup def novel(): url = 'https://www.bqkan8.com/50_50096/18520412.html' response = requests.get(url) html = etree.HTML(response.text) title = re.findall("<h1>(.*?)</h1>", response.text)[0] print('标题:', title) content = html.xpath('//div[@id="content"]/text()') res = '' for i in content[:-2]: res += i.strip().replace('&1t;/p>', '') print('正文:', res) def house(): url = 'https://beijing.qfang.com/newhouse' r = requests.get(url) soup = BeautifulSoup(r.text, 'lxml') first_house_title = soup.select('.house-name')[0].text print(first_house_title) if __name__ == '__main__': novel() house()
import requests from lxml import etree url = 'https://www.shanghairanking.cn/rankings/bcur/2021' r = requests.get(url) html = etree.HTML(r.content.decode()) tr_list = html.xpath('//*[@class="rk-table"]/tbody/tr') for tr in tr_list: item = {} item['rank'] = tr.xpath('.//td[1]/div/text()')[0].strip() # 排名 item['name_cn'] = tr.xpath('.//*[@class="name-cn"]/text()')[0] # 中文学校名称 item['name_en'] = tr.xpath('.//*[@class="name-en"]/text()')[0] # 英文学校名称 item['tags'] = tr.xpath('.//*[@class="tags"]/text()')[0] # 学校标签 item['province'] = tr.xpath('.//td[3]/text()')[0].strip() # 省份 item['category'] = tr.xpath('.//td[4]/text()')[0].strip() # 类别 item['total_score'] = tr.xpath('.//td[5]/text()')[0].strip() # 总分 print(item)

【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?