python--slenium--爬取笔趣阁小说网站

# -*- coding: utf-8 -*-
from selenium import webdriver
import requests,re,pprint,time

url='https://www.sbiquge.com/5_5374/'
browser = webdriver.Chrome('C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe')

response= requests.get(url)
html=  response.text
result = re.findall('\w\shref\s="/5_5374/(\d+)',html,re.S)
esult = re.findall('\w\shref\s="/5_5374/\d+\.html">(.*?)</a>',html,re.S)
print(result)
#print(esult)
n=0
while n<10000:
	try:
		browser.get(url+"{}.html".format(result [n]))
		time.sleep(0.1)
		n=n+1
		cont=browser.find_element_by_xpath('// *[ @ id = "book"] / div[2] / h1')
		con=browser.find_element_by_class_name('showtxt')
		print(cont.text)
		print(con.text)
		novel_name = '诛仙.txt'
		with open(novel_name,'a',encoding='utf-8')as f:
			f.write('\n\n'+cont.text+'\n\n\t'+con.text+n)
	except Exception as e:
		print(e)
		print(result[n]+"这章加载太慢了====================================================================================!")
		pass
browser.close()

  

posted @ 2019-10-07 21:53  传道授业  阅读(294)  评论(0编辑  收藏  举报