python爬虫 xpath

#!/usr/bin/env python
# -*- coding: utf-8 -*-


import requests
from lxml import etree

url = "http://www.spbeen.com/tool/request_info/"
#url = "http://image.angelimg.spbeen.com/00000mx00000/wrUDra2sDelythSuP8OD305841/LJpdoiu5dxErQDYpnL65305841-kjdZYh.jpg"

def requests_view(response):
    import webbrowser
    requests_url = response.url
    base_url = '<head><base href="%s">' %(requests_url)
    base_url = base_url.encode('utf-8')
    content = response.content.replace(b"<head>",base_url)
    tem_html = open('tmp.html','wb')
    tem_html.write(content)
    tem_html.close()
    webbrowser.open_new_tab("tmp.html")

try:
    headers = {"Referer":"http://angelimg.spbeen.com/ang/403"}
    r = requests.get(url,headers=headers)
    print(r.status_code)
    requests_view(r)
    if r.status_code != 200:
        raise Exception("请求失败" , r.status_code)
    content = r.content.decode()

    html = etree.HTML(content)
    res = html.xpath("normalize-space(/html/body/div[2]/div[3]/div/div[3]/div[2]//text())")

    print(res)
    # with open("a.jpg","wb") as f :
    #     f.write(r.content)
    #     f.close()
except Exception as e:
    print(str(e))

posted @ 2021-01-15 17:40  brady-wang  阅读(76)  评论(0编辑  收藏  举报