Spider小说模型

import requests,re

class Spider:

def take_html(self,url):
r = requests.get(url)
r.encoding = r.apparent_encoding
return r.text

def take_info(self,url,**regex):
html = self.take_html(url)

info_dict = {}
for key, value in regex.items():
info_dict[key] = re.findall(value, html)[:20]
return info_dict

if __name__ == '__main__':

url = 'https://www.x23us.com/html/69/69937/'

chapter_regex = '<td class="L"><a href="(.*?)">(.*?)</a></td>'
title_regex = '<title>(.*?)</title>'
content_regex = '<dd id="contents">(.*?)</dd>'
author_regex = '<meta name="description" content="冰与火之凛冬已至最新章节及全集列表免费在线订阅,本小说作者:(.*?),由顶点小说会员整理上传。" />'

info = Spider().take_info(
url,
book_title=title_regex,
book_author=author_regex,
book_chapter=chapter_regex,
book_content=content_regex,
)
print(info)

posted @ 2018-12-29 10:03  陈文鑫  阅读(177)  评论(0编辑  收藏  举报