yyyyyyyyyyyyyyyyyyyy

博客园 首页 新随笔 联系 订阅 管理
from lxml import etree

import codecs
import sys
from lxml import etree

def parser(p):
    tree = etree.HTML(open(str(p)+'.html', 'r').read())
    
    nodes = tree.xpath("//div[@class='BlueTable']//tr")
    #nodes = tree.xpath("//text()")
    for n in nodes:
        #mystr=etree.tostring(n,pretty_print=True)
        mystr=etree.tostring(n, method="html")
        print(etree.HTML(mystr).xpath("//text()"))

for p in range(1,211):
    parser(p);

 

posted on 2018-06-20 16:38  xxxxxxxx1x2xxxxxxx  阅读(431)  评论(0编辑  收藏  举报