利用xpath爬取58同城二手房信息
import requests
from lxml import etree
url = 'https://bj.58.com/ershoufang/?utm_source=market&spm=u-2d2yxv86y3v43nkddh1.BDPCPZ_BT&PGTID=0d100000-0000-12a9-da48-d35d2fb95114&ClickID=2'
headers ={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:85.0) Gecko/20100101 Firefox/85.0'
}
# 爬取页面源码数据
page_text = requests.get(url=url,headers=headers).text
# 数据解析
tree = etree.HTML(page_text)
div_list = tree.xpath('//section[@class="list"][1]/div')
fp = open('./58.txt','w', encoding='utf-8')
for div in div_list:
title = div.xpath('./a/div[2]//h3/text()')[0]
price = div.xpath('./a/div[2]/div[2]/p[1]/span/text()')[0]
average = div.xpath('./a/div[2]/div[2]/p[2]/text()')[0]
# print(price, average)
fp.write(title+ ' :'+price+'万 '+average + '\n')
人生苦短,我用python