Spider小说模型

import requests,re

class Spider:

def take_html(self,url):
r = requests.get(url)
r.encoding = r.apparent_encoding
return r.text

def take_info(self,url,**regex):
html = self.take_html(url)

info_dict = {}
for key, value in regex.items():
info_dict[key] = re.findall(value, html)[:20]
return info_dict

if __name__ == '__main__':

url = 'https://www.x23us.com/html/69/69937/'

chapter_regex = '<td class="L"><a href="(.*?)">(.*?)</a></td>'
title_regex = '<title>(.*?)</title>'
content_regex = '<dd id="contents">(.*?)</dd>'
author_regex = '<meta name="description" content="冰与火之凛冬已至最新章节及全集列表免费在线订阅，本小说作者：(.*?)，由顶点小说会员整理上传。" />'

info = Spider().take_info(
url,
book_title=title_regex,
book_author=author_regex,
book_chapter=chapter_regex,
book_content=content_regex,
)
print(info)

posted @ 2018-12-29 10:03 陈文鑫阅读(177) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

陈文鑫

Spider小说模型

公告