# 不用协程
"""
import os
import asyncio
import requests
import aiofiles as aiofiles
from lxml import etree
import aiohttp
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.57'
}
# chapter_url = 'https://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":'+b_id+',"cid":"'+b_id+'|1569782244","need_bookinfo":1}'
def getnovel(url):
# 如果文件夹不存在则创建文件夹,用来放小说
if not os.path.exists("other/novel"):
os.mkdir("other/novel")
resp = requests.get(url=url, headers=headers)
# 取得第一个标题和cid
# print(resp.json()['data']['novel']['items'][0]['title'])
# print(resp.json()['data']['novel']['items'][0]['cid'])
all_chapter = resp.json()['data']['novel']['items']
for chapter in all_chapter:
# 每个标题跟cid
# print(chapter['title'])
# print(chapter['cid'])
title = chapter['title']
cid = chapter['cid']
# 拼接每个章节的 url
chapter_url = 'https://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":' + b_id + ',"cid":"' + b_id + '|' + cid + '","need_bookinfo":1}'
# print(chapter_url)
chapter_resp = requests.get(url=chapter_url, headers=headers).json()
# 获得每个章节的内容
# print(chapter_resp['data']['novel']['content'])
# 存储文件
chapter_text = chapter_resp['data']['novel']['content']
with open('other/novel/' + title, mode='w', encoding='utf-8') as f:
f.write(chapter_text)
# 测试用
break
# 章节内容
# print(resp.json()['data']['novel']['content'])
resp.close()
if __name__ == '__main__':
b_id = "4306063500"
# 不能用f'' 因为{"book_id":4306063500}中原本就存在{}
url = 'https://dushu.baidu.com/api/pc/getCatalog?data={"book_id":' + b_id + '}'
getnovel(url)
"""
# 协程操作
import os
import asyncio
import requests
import aiofiles
import aiohttp
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.57'
}
# chapter_url = 'https://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":'+b_id+',"cid":"'+b_id+'|1569782244","need_bookinfo":1}'
async def aiodownload(chapter_url, title):
async with aiohttp.ClientSession() as session: # request
async with session.get(url=chapter_url, headers=headers) as resp: # chapter_resp = requests.get
chapter_resp = await resp.json() # 请求网页也是个等待的过程
# 获得每个章节的内容
# print(chapter_resp['data']['novel']['content'])
# 存储文件
chapter_text = chapter_resp['data']['novel']['content']
# print(chapter_text)
async with aiofiles.open('other/novel/' + title +'.txt', mode='w', encoding='utf-8') as f:
# 错误示范 : await f.write(await chapter_text)
# TypeError: object str can't be used in 'await' expression
# 类型错误:对象 str 不能在“等待”表达式中使用
await f.write(chapter_text)
print(title, "下载成功!!!")
async def getnovel(url):
# 如果文件夹不存在则创建文件夹,用来放小说
if not os.path.exists("other/novel"):
os.mkdir("other/novel")
resp = requests.get(url=url, headers=headers)
# 取得第一个标题和cid
# print(resp.json()['data']['novel']['items'][0]['title'])
# print(resp.json()['data']['novel']['items'][0]['cid'])
# 取得一个范围里面包含着标题和cid
all_chapter = resp.json()['data']['novel']['items']
# 用来放所有异步任务
tasks = []
for chapter in all_chapter:
# 每个标题跟cid
# print(chapter['title'])
# print(chapter['cid'])
title = chapter['title']
cid = chapter['cid']
# 拼接每个章节的 url
chapter_url = 'https://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":' + b_id + ',"cid":"' + b_id + '|' + cid + '","need_bookinfo":1}'
# print(chapter_url)
# 添加异步任务
tasks.append(asyncio.create_task(aiodownload(chapter_url, title)))
# 测试
# break
await asyncio.wait(tasks)
resp.close()
if __name__ == '__main__':
b_id = "4306063500"
# 不能用f'' 因为{"book_id":4306063500}中原本就存在{}
url = 'https://dushu.baidu.com/api/pc/getCatalog?data={"book_id":' + b_id + '}'
asyncio.run(getnovel(url))