python爬虫案列03,爬取58二手房信息

import requests
from lxml import etree
url = "https://fy.58.com/ershoufang/?PGTID=0d100000-0091-53ca-4993-576198ca62e3"
headers={
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"
}
response = requests.get(url=url, headers=headers)  # 发起请求,并保存到response变量中
etree_txt = etree.HTML(response.text)   # 将网页原码加载到etree对象中
result = []  # 创建一个空列表用来保存最后的结果

divs = etree_txt.xpath('//*[@id="esfMain"]/section/section[3]/section[1]/section[2]/div')  # 在网页原码中用xpath定位到要爬取的信息在那个标签中
for div in divs:  # 编列页面存在的标签
    address = div.xpath('./a/div[2]/div[1]/div[1]/h3/text()')[0].strip()  # 注意:这是从当前标签中拿到文字信息,所以是div.xpath,strip()去空格
    result.append(address)  # 内容添加到列表

for i in result:  # 输出结果
    print(i)

 

posted @ 2023-03-13 11:44  shuxi_520  阅读(125)  评论(0编辑  收藏  举报