spider

from lxml import etree
import requests
def getHtml(html):
novelcontent = requests.get(html).content
return etree.HTML(novelcontent)

source = getHtml("http://www.cabintu.com")

listclassify = source.xpath('//ul[@class="sg_menu"]/li/a')
listtype = source.xpath('//div[@class="mainleft"]/ul[@class="sg_menu"]/li[@class="section"]//ul[@class="subnav_a"]/li[@class="airline"]/a')

for i in range(0,len(listclassify)-1):
fname = source.xpath('//div[@class="mainleft"]/ul[@class="sg_menu"]/li[@class="section"]/a/text()')[i]
print fname



for n in range(0,len(listtype)-1):
typelist = source.xpath('//div[@class="mainleft"]/ul[@class="sg_menu"]/li[@class="section"]//ul[@class="subnav_a"]/li[@class="airline"]/a/text()')[n]
print typelist



# for n in range(0,)


# ftypelist = source.xpath('//div[@class="mainleft"]/ul[@class="sg_menu"]/li[@class="section"]/ul[@class="subnav_a"]/li[@class="airline"]/a/text()')[i]

posted on 2016-11-25 19:53  chinxfin  阅读(130)  评论(0编辑  收藏  举报

导航