爬取58二手房数据

from lxml import etree
import requests
# 爬取58二手房
if __name__ == '__main__':
    #爬取到页面源码数据
    url = 'https://m.58.com/bj/ershoufang/?reform=pcfront&PGTID=0d000001-000f-1085-0e36-5f17322b35a6&ClickID=1'
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36'
    }
    page_text= requests.get(url=url, headers=headers).text
    # 数据解析
    tree = etree.HTML(page_text)
    # 存储的就是li标签
    li_list = tree.xpath('//ul/li')
    fp = open('58.txt','w',encoding='utf-8')
    for li in li_list:
        title = li.xpath('.//div[2]/div[1]/span/text()')[0]
        print(title)
        fp.write(title+'\n')
posted @ 2024-02-26 18:03  会秃头的小白  阅读(23)  评论(0编辑  收藏  举报