xpath 实战之全国城市名字爬取
import requests from lxml import etree if __name__ == "__main__": # 获取整张源码数据 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' } # UA伪装 url = 'https://www.aqistudy.cn/historydata/' page_text = requests.get(url=url,headers = headers).text # 进行数据解析 tree = etree.HTML(page_text) all_city_name = [] # 解析到热门城市列表 hot_li_list = tree.xpath('//div[@class = "bottom"]/ul/li') for li in hot_li_list: hot_city_name = li.xpath('./a/text()')[0] all_city_name.append(hot_city_name) # 解析到全部城市列表 city_names_list = tree.xpath('//div[@class = "bottom"]/ul/div[2]/li') for li in city_names_list: all_city = li.xpath('./a/text()')[0] all_city_name.append(all_city) print(all_city_name,len(all_city_name))
作者:华王
博客:https://www.cnblogs.com/huahuawang/
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步