from lxml import html
htmlStr = html.etree.HTML(pagehtml, parser= html.etree.HTMLParser(encoding='utf-8')) #将网页源码转换为 XPath 可以解析的格式
nr= html.tostring(nr,encoding="utf-8").decode( )