增补博客 第十三篇 python大作业小说阅读器(2)爬取
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | import os import re from lxml import etree import random import requests from 可视化查询书籍.查询书籍信息 import searchbook # 代理IP池 proxy_list = [ "192.168.72.241" ] def get_random_proxy(): return random.choice(proxy_list) def search(user_leixing, user_title, user_author): # 调用 searchbook 函数并获取匹配的书籍信息 book_info = searchbook(user_leixing, user_title, user_author) # 检查目录是否存在 directory = "..\\小说\\" + book_info[ 0 ] if not os.path.exists(directory): print ( "目录不存在" ) # 初始化 book 变量 book = [ None , None , None ] # 调用 searchbook 函数并获取匹配的书籍信息 book = searchbook(user_leixing, user_title, user_author) headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" } index_url = 'https://www.bqg88.cc/' + book[ 2 ] # Using proxy proxy = { "http" : "http://" + get_random_proxy()} response = requests.get(index_url, headers = headers, proxies = proxy) response.raise_for_status() info_list = re.findall( '<dd><a href\s*=\s*"(.*?)">(.*?)</a></dd>' , response.text) # Create directory if it doesn't exist if not os.path.exists(directory): os.makedirs(directory) for info in info_list: url = 'https://www.bqg88.cc/' + info[ 0 ] response = requests.get(url, headers = headers, proxies = proxy) response.raise_for_status() print ( "正在下载" + info[ 1 ]) html_data = etree.HTML(response.text) text_list = html_data.xpath( '//div[@id="chaptercontent"]/text()' ) text = ''.join(text_list) book_text = '\n\n' + info[ 1 ] + '\n\n' book_text + = text.replace( '请收藏本站:https://www.bqg88.cc。笔趣阁手机版:https://m.bqg88.cc' , '') # Save each chapter to a separate text file chapter_filename = os.path.join(directory, info[ 1 ] + '.txt' ) with open (chapter_filename, "w" , encoding = 'utf-8' ) as file : file .write(book_text) print ( "下载完成!" ) else : print ( "目录已存在" ) |
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步