利用xpath爬取58同城二手房信息

import requests

from lxml import etree
url = 'https://bj.58.com/ershoufang/?utm_source=market&spm=u-2d2yxv86y3v43nkddh1.BDPCPZ_BT&PGTID=0d100000-0000-12a9-da48-d35d2fb95114&ClickID=2'
headers ={
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:85.0) Gecko/20100101 Firefox/85.0'
}
# 爬取页面源码数据
page_text = requests.get(url=url,headers=headers).text
# 数据解析
tree = etree.HTML(page_text)
div_list = tree.xpath('//section[@class="list"][1]/div')
fp = open('./58.txt','w', encoding='utf-8')
for div in div_list:
    title = div.xpath('./a/div[2]//h3/text()')[0]
    price = div.xpath('./a/div[2]/div[2]/p[1]/span/text()')[0]
    average = div.xpath('./a/div[2]/div[2]/p[2]/text()')[0]
    # print(price,   average)
    fp.write(title+ ' :'+price+'万 '+average + '\n')
posted @ 2021-02-17 23:31  未来全栈攻城狮  阅读(276)  评论(0编辑  收藏  举报