python之xpath

 

 

 1 #!/usr/bin/env python3
 2 # -*- coding: utf-8 -*-
 3 # author:Momo time:2018/6/29
 4 
 5 import urllib.request
 6 import urllib
 7 
 8 from lxml import etree
 9 
10 
11 def get_html(url):
12     html_page = urllib.request.urlopen(url)
13     html_code = html_page.read().decode('utf-8')
14     return html_code
15 
16 html = get_html("http://www.runoob.com/python3/python3-reg-expressions.html")
17 
18 selector = etree.HTML(html)
19 """
20     // 定位根节点
21     /  往下一层寻找
22     /text()  提取文本内容
23     /@XXXX   提取属性内容
24 """
25 # # 提取文本
26 # content = selector.xpath('//*[@id="content"]/p/text()')  # /text()
27 # for each in content:
28 #     print(each)
29 
30 # # 提取属性
31 # link = selector.xpath('/html/body/link/@href')
32 # for each in link:
33 #     print(each)
34 
35 table = selector.xpath('//*[@id="content"]/table/@class')
36 for each in table:
37     print(each)

 

posted @ 2018-09-05 21:03  肖邦、维也纳  阅读(106)  评论(0编辑  收藏  举报