python之xpath
1 #!/usr/bin/env python3 2 # -*- coding: utf-8 -*- 3 # author:Momo time:2018/6/29 4 5 import urllib.request 6 import urllib 7 8 from lxml import etree 9 10 11 def get_html(url): 12 html_page = urllib.request.urlopen(url) 13 html_code = html_page.read().decode('utf-8') 14 return html_code 15 16 html = get_html("http://www.runoob.com/python3/python3-reg-expressions.html") 17 18 selector = etree.HTML(html) 19 """ 20 // 定位根节点 21 / 往下一层寻找 22 /text() 提取文本内容 23 /@XXXX 提取属性内容 24 """ 25 # # 提取文本 26 # content = selector.xpath('//*[@id="content"]/p/text()') # /text() 27 # for each in content: 28 # print(each) 29 30 # # 提取属性 31 # link = selector.xpath('/html/body/link/@href') 32 # for each in link: 33 # print(each) 34 35 table = selector.xpath('//*[@id="content"]/table/@class') 36 for each in table: 37 print(each)