xpath拉取链家二手房信息并保存到excel中
import os.path import requests from lxml import etree import xlwt import xlrd def create_excel(): if not os.path.exists('./lianjia_excel.xls'): book = xlwt.Workbook(encoding='utf-8', style_compression=0) sheet = book.add_sheet(f'{city}-链家二手房', cell_overwrite_ok=True) col = ('房源名称', '所在小区', '所在地区', '房源信息', '发布时间', '房源标签', '房源价格', '房源单价') # 创建列名 for i in range(0, 8): sheet.write(0, i, col[i]) savepath = './lianjia_excel.xls' book.save(savepath) return book book = xlrd.open_workbook('./lianjia_excel.xls') # 得到文件 return book if __name__ == '__main__': city = input('请输入需要查询的城市数据:') # 创建一个excel book = xlwt.Workbook(encoding='utf-8', style_compression=0) sheet = book.add_sheet(f'{city}-链家二手房', cell_overwrite_ok=True) col = ('房源名称', '所在小区', '所在地区', '房源信息', '发布时间', '房源标签', '房源价格', '房源单价') # 创建列名 for i in range(0, 8): sheet.write(0, i, col[i]) url = 'https://cd.lianjia.com/ershoufang/' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36' } page = requests.get(url=url, headers=headers) page.encoding = 'utf-8' tree = etree.HTML(page.text) li_list = tree.xpath('//div[@class="content "]/div/ul[@class="sellListContent"]/li') data_list = [] for li in li_list: li_div = li.xpath('./div[@class="info clear"]') if len(li_div) <= 0: continue li_div = li.xpath('./div[@class="info clear"]')[0] # 房源名称 li_title = li_div.xpath('./div[@class="title"]/a/text()')[0] # 房源链接 li_url = li_div.xpath('./div[@class="title"]/a/@href')[0] # 所在小区 li_xiaoqu = li_div.xpath('./div[@class="flood"]/div/a[1]/text()')[0] # 所在小区URL li_xiaoqu_url = li_div.xpath('./div[@class="flood"]/div/a[1]/@href')[0] # 所在地区 li_diqu = li_div.xpath('./div[@class="flood"]/div/a[2]/text()')[0] # 所在地区URL li_diqu_url = li_div.xpath('./div[@class="flood"]/div/a[2]/@href')[0] # 房源信息 li_houseinfo = li_div.xpath('./div[@class="address"]/div/text()')[0] # 发布时间 li_followinfo = li_div.xpath('./div[@class="followInfo"]//text()') # 房源标签 li_tag = li_div.xpath('./div[@class="tag"]//text()') # 房源总价 li_price = li_div.xpath('./div[@class="priceInfo"]/div[1]/span/text()')[0] li_price = li_price + '万' # 房源单价 li_unitPrice = li_div.xpath('./div[@class="priceInfo"]/div[2]/span/text()')[0] data_list.append([li_title, li_xiaoqu, li_diqu, li_houseinfo, li_followinfo, li_tag, li_price, li_unitPrice]) for data in data_list: for i in range(0, len(data_list)): data = data_list[i] for j in range(0, 8): sheet.write(i + 1, j, data[j]) savepath = './lianjia_excel.xls' book.save(savepath)
excel保存效果如图:
分类:
Python
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构