用go把博客园博客下载到本地Hexo目录下
找到cookie#
直接浏览器F12 巴拉巴拉
直接上代码#
用hexo建静态博客的话,go文件在\source\_posts
目录下,run之后将会在此目录下生成cnblogs文件夹,以博客id作为md文件名,然后在\source\
下生成一个cnblogs目录存放博客里面的图片文件,图片文件我使用https://img.*.png
来做匹配
package main import ( "bufio" "encoding/json" "fmt" "io" "io/ioutil" "net/http" "os" "strconv" "strings" "regexp" "path" ) const cookie = " xxxxxxxxxxxxxxxxxx" func main() { fmt.Printf("开始执行") getBlogList(1) } func geturl(pageno int) string{ return fmt.Sprintf("https://i.cnblogs.com/api/posts/list?p=%s&cid=&tid=&t=1&cfg=0&search=&orderBy=&s=&scid=",strconv.Itoa(pageno)) } func getBlogList(pageindex int){ var urlstr = geturl(pageindex) recordbody := getData(urlstr) fmt.Printf("\r\n recordbody:%s \n", recordbody) var conf blogList err := json.Unmarshal(recordbody, &conf) if err != nil { fmt.Println("error:", err) } fmt.Printf("\r\n PageIndex:%s,PageSize:%s,PostsCount:%s \n", strconv.Itoa(conf.PageIndex), strconv.Itoa(conf.PageSize), strconv.Itoa(conf.PostsCount)) for _, childval := range conf.PostList { if(childval.IsPublished){ childbody := getData(fmt.Sprintf("https://i.cnblogs.com/api/posts/%s", strconv.Itoa(childval.Id))) fmt.Printf("childbody:%s \n", childbody) var jsconf blogbodyConf err := json.Unmarshal(childbody, &jsconf) if err != nil { fmt.Println("error:", err) } var tagbody = "" for _, tag := range jsconf.BlogPost.Tags { if(tagbody!=""){ tagbody = fmt.Sprintf("%s,\"%s\"",tagbody,tag) }else{ tagbody = fmt.Sprintf("\"%s\"",tag) } } var tagstr = fmt.Sprintf("[%s]",tagbody) var articleBody = fmt.Sprintf("---\r\ntitle: %s\r\ndate: %s\r\nauthor: %s\r\ntags: %s\r\n---\r\n%s", jsconf.BlogPost.Title, jsconf.BlogPost.DatePublished, jsconf.BlogPost.Author, tagstr, string(jsconf.BlogPost.PostBody)) //添加文章信息 reg, _ := regexp.Compile(`https://img.*.png`) imgurls := reg.FindAllString(articleBody, -1) for _, imgurl := range imgurls { fileName := path.Base(imgurl) downloadImage(imgurl,strconv.Itoa(jsconf.BlogPost.Id),fileName) articleBody = strings.Replace(articleBody, imgurl, fmt.Sprintf("/cnblogs/%s/%s",strconv.Itoa(jsconf.BlogPost.Id),fileName), -1) } fmt.Printf("articleBody:%s \n", articleBody) downloadFile(strings.NewReader(articleBody), strconv.Itoa(jsconf.BlogPost.Id), fmt.Sprintf("%s.md", strconv.Itoa(jsconf.BlogPost.Id))) } } if(conf.PageIndex>0 && conf.PageIndex*conf.PageSize<=conf.PostsCount){ getBlogList(conf.PageIndex+1) } fmt.Println("执行完毕") } func getData(urlstr string) []byte { client := &http.Client{} fmt.Printf("\r\n urlstr:%s \n", urlstr) req, _ := http.NewRequest("GET", urlstr, nil) req.Header.Add("cookie", cookie) resp, _ := client.Do(req) defer resp.Body.Close() body, _ := ioutil.ReadAll(resp.Body) return body } func downloadImage(imgurl string, rootpath string, fileName string){ filepath := fmt.Sprintf("../cnblogs/%s/%s", rootpath, fileName) res, err := http.Get(imgurl) if err != nil { fmt.Println("A error occurred!") return } defer res.Body.Close() // 获得get请求响应的reader对象 reader := bufio.NewReaderSize(res.Body, 32 * 1024) if _, err := os.Stat(fmt.Sprintf("../cnblogs/%s", rootpath)); os.IsNotExist(err) { // 必须分成两步:先创建文件夹、再修改权限 os.MkdirAll(fmt.Sprintf("../cnblogs/%s", rootpath), 0777) //0777也可以os.ModePerm os.Chmod(fmt.Sprintf("../cnblogs/%s", rootpath), 0777) } file, err := os.Create(filepath) if err != nil { panic(err) } // 获得文件的writer对象 writer := bufio.NewWriter(file) written, _ := io.Copy(writer, reader) fmt.Printf("Total length: %d", written) } func downloadFile(body io.Reader, rootpath string, name string) { filepath := fmt.Sprintf("./cnblogs/%s", name) // Create output file if rootpath != "" { if _, err := os.Stat("./cnblogs"); os.IsNotExist(err) { // 必须分成两步:先创建文件夹、再修改权限 os.MkdirAll("./cnblogs", 0777) //0777也可以os.ModePerm os.Chmod("./cnblogs", 0777) } } out, err := os.Create(filepath) if err != nil { panic(err) } defer out.Close() // copy stream _, err = io.Copy(out, body) if err != nil { panic(err) } } type blogList struct { PageIndex int `json:"pageIndex"` PageSize int `json:"pageSize"` PostsCount int `json:"postsCount"` PostList []blogbodymsg `json:"postList"` } type blogbodymsg struct { Id int `json:"id"` DatePublished string `json:"datePublished"` DateUpdated string `json:"dateUpdated"` Title string `json:"title"` IsPublished bool `json:"isPublished"` } type blogbodyConf struct { BlogPost blogPostEntity `json:"blogPost"` } type blogPostEntity struct { Id int `json:"id"` AutoDesc string `json:"autoDesc"` DatePublished string `json:"datePublished"` PostBody string `json:"postBody"` Title string `json:"title"` Url string `json:"url"` Author string `json:"author"` Tags []string `json:"tags"` }
作者: spatxos
出处:https://www.cnblogs.com/spatxos/p/16463506.html
版权:本站使用「spatxos」创作共享协议,未经作者同意,请勿转载;若经同意转载,请在文章明显位置注明作者和出处。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列1:轻松3步本地部署deepseek,普通电脑可用
· 按钮权限的设计及实现
· 【杂谈】分布式事务——高大上的无用知识?