再见,洛谷博客!——下载洛谷博客文章

posted on 2022-11-06 10:58:17 | under 学术 | source

前置知识

单组询问

F12

// copy the code of blogs
fetch('/api/blog/detail/' + BlogGlobals.blogID).then(res => res.json()).then(res => console.log(res.data.Content))

多次询问下载 markdown

# fetcher.py
import os
import json
import time
import requests

url="https://www.luogu.com.cn"
print("Warning: When the program is running, don't log out your luogu account...")
uid=input("Please input your user id...")
client=input("Please input your client id...")
savePath=input("Where do you want to save? (e.g. ./blogs/)")
# os.mkdir(savePath)

userAgent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0"
headers={
    "User-Agent": userAgent,
    "cookie": "_uid="+uid+";__client_id="+client
}

restCount=0

def fetchBlogContent(bid):
    global restCount
    restCount+=1
    if restCount%10==0:
        print("Please wait for 10 seconds...")
        time.sleep(10)
    response=requests.get(url+"/api/blog/detail/"+str(bid),headers=headers)
    response.encoding="utf-8"
    result=json.loads(response.text)
    # print(result)
    # print(json.dumps(result,sort_keys=True, indent=4, separators=(',', ': ')))
    if result["status"] == 200:
        result=result["data"]
        return {
            "source": url+"/blog/_post/"+str(bid),
            "content": result["Content"],
            "identifier": result["Identifier"],
            "postTime": result["PostTime"],
            "title": result["Title"],
            "type": result["Type"]
        }
    else:
        print("Error: Cannot catch blog {%d}, result:"%bid)
        print(result)
        return {}
    
def saveBlogContent(bid):
    result=fetchBlogContent(bid)
    if result == {}:
        return 
    postTime=time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(result["postTime"]))
    with open(savePath+"%s.md"%result["identifier"],mode="w",encoding="utf-8") as file:
        file.write("# %s\n"%result["title"])
        file.write("posted on %s | under %s | [source](%s)\n\n"%(postTime,result["type"],result["source"]))
        file.write(result["content"])
        file.write("\n")

def getBlogLists(uid):
    response=requests.get(url+"/api/blog/userBlogs",headers=headers)
    response.encoding="utf-8"
    result=json.loads(response.text)["blogs"]
    # print(json.dumps(blogs,sort_keys=True, indent=4, separators=(',', ': ')))
    lists=[]
    pageNumber=result["count"]//result["perPage"]+1
    for i in range(1,pageNumber+1):
        response=requests.get(url+"/api/blog/userBlogs?page="+str(i),headers=headers)
        response.encoding="utf-8"
        result=json.loads(response.text)["blogs"]
        for blog in result["result"]:
            lists.append(blog["id"])
    return lists

print("Now start fetching blogs of user %s..."%uid)
for bid in getBlogLists(uid):
    saveBlogContent(bid)
    print("Save blog %d successfully..."%bid)
print("Blogs of user %s have already saved..."%uid)
print("Thank you for using my tool. Author: luogu@yukimianyan.")
posted @ 2023-07-20 12:44  caijianhong  阅读(136)  评论(0编辑  收藏  举报