Python XPath抓取小说《三国演义》《一》
from lxml import etree import requests """ 获取章节列表和地址 """ def getContents(): tagret = "https://www.kanunu8.com/files/old/2011/2447.html" req = requests.get(url=tagret) req.encoding = "gb2312" html = req.text bookdata = etree.HTML(html) table_list = bookdata.xpath('//table[9]//tr[1]//td[2]//table[4]//tr[1]//td[1]//table[1]//a//text()') table_url = bookdata.xpath('//table[9]//tr[1]//td[2]//table[4]//tr[1]//td[1]//table[1]//a//@href') for title in table_list: print(title) for u in table_url: print(u) """ 获取小说内容 """ def getContent(): tagret = "https://www.kanunu8.com/files/old/2011/2447/71775.html" req = requests.get(url=tagret) req.encoding = "gb2312" html = req.text bookdata = etree.HTML(html) table_list = bookdata.xpath('//table[5]//tr[1]//td[2]//text()') print(table_list) if __name__ == '__main__': getContents()