用go把博客园博客下载到本地Hexo目录下

找到cookie

直接浏览器F12 巴拉巴拉

直接上代码

用hexo建静态博客的话,go文件在\source\_posts目录下,run之后将会在此目录下生成cnblogs文件夹,以博客id作为md文件名,然后在\source\下生成一个cnblogs目录存放博客里面的图片文件,图片文件我使用https://img.*.png来做匹配

package main

import (
	"bufio"
	"encoding/json"
	"fmt"
	"io"
	"io/ioutil"
	"net/http"
	"os"
	"strconv"
	"strings"
	"regexp"
	"path"
)
const cookie = " xxxxxxxxxxxxxxxxxx"

func main() {
    fmt.Printf("开始执行")
	getBlogList(1)
}
func geturl(pageno int) string{
    return fmt.Sprintf("https://i.cnblogs.com/api/posts/list?p=%s&cid=&tid=&t=1&cfg=0&search=&orderBy=&s=&scid=",strconv.Itoa(pageno))
}
func getBlogList(pageindex int){
	var urlstr = geturl(pageindex)

	recordbody := getData(urlstr)
	fmt.Printf("\r\n recordbody:%s \n", recordbody)

	var conf blogList
	err := json.Unmarshal(recordbody, &conf)
	if err != nil {
		fmt.Println("error:", err)
	}

	fmt.Printf("\r\n PageIndex:%s,PageSize:%s,PostsCount:%s \n", strconv.Itoa(conf.PageIndex), strconv.Itoa(conf.PageSize), strconv.Itoa(conf.PostsCount))
	for _, childval := range conf.PostList {
             if(childval.IsPublished){
			childbody := getData(fmt.Sprintf("https://i.cnblogs.com/api/posts/%s", strconv.Itoa(childval.Id)))
			fmt.Printf("childbody:%s \n", childbody)
			var jsconf blogbodyConf
			err := json.Unmarshal(childbody, &jsconf)
			if err != nil {
				fmt.Println("error:", err)
			}
			var tagbody = ""
			for _, tag := range jsconf.BlogPost.Tags {
				if(tagbody!=""){
					tagbody = fmt.Sprintf("%s,\"%s\"",tagbody,tag)
				}else{
					tagbody = fmt.Sprintf("\"%s\"",tag)
				}
			}
			var tagstr = fmt.Sprintf("[%s]",tagbody)
			var articleBody = fmt.Sprintf("---\r\ntitle: %s\r\ndate: %s\r\nauthor: %s\r\ntags: %s\r\n---\r\n%s",
			    jsconf.BlogPost.Title,
				jsconf.BlogPost.DatePublished,
				jsconf.BlogPost.Author,
				tagstr,
			    string(jsconf.BlogPost.PostBody))
			//添加文章信息

			reg, _ := regexp.Compile(`https://img.*.png`)
			imgurls := reg.FindAllString(articleBody, -1)
			for _, imgurl := range imgurls {
				fileName := path.Base(imgurl)
				downloadImage(imgurl,strconv.Itoa(jsconf.BlogPost.Id),fileName)
				articleBody = strings.Replace(articleBody, imgurl, fmt.Sprintf("/cnblogs/%s/%s",strconv.Itoa(jsconf.BlogPost.Id),fileName), -1)
			}
			fmt.Printf("articleBody:%s \n", articleBody)

			downloadFile(strings.NewReader(articleBody), strconv.Itoa(jsconf.BlogPost.Id), fmt.Sprintf("%s.md",  strconv.Itoa(jsconf.BlogPost.Id)))
		}
	}
        if(conf.PageIndex>0 && conf.PageIndex*conf.PageSize<=conf.PostsCount){
	    getBlogList(conf.PageIndex+1)
	}
	fmt.Println("执行完毕")
}
func getData(urlstr string) []byte {
	client := &http.Client{}
	fmt.Printf("\r\n urlstr:%s \n", urlstr)
	req, _ := http.NewRequest("GET", urlstr, nil)
	req.Header.Add("cookie", cookie)

	resp, _ := client.Do(req)
	defer resp.Body.Close()
	body, _ := ioutil.ReadAll(resp.Body)
	return body
}
func downloadImage(imgurl string, rootpath string, fileName string){
	filepath := fmt.Sprintf("../cnblogs/%s/%s", rootpath, fileName)
	res, err := http.Get(imgurl)
	if err != nil {
		fmt.Println("A error occurred!")
		return
	}
	defer res.Body.Close()
	// 获得get请求响应的reader对象
	reader := bufio.NewReaderSize(res.Body, 32 * 1024)

    if _, err := os.Stat(fmt.Sprintf("../cnblogs/%s", rootpath)); os.IsNotExist(err) {
		// 必须分成两步:先创建文件夹、再修改权限
		os.MkdirAll(fmt.Sprintf("../cnblogs/%s", rootpath), 0777) //0777也可以os.ModePerm
		os.Chmod(fmt.Sprintf("../cnblogs/%s", rootpath), 0777)
	}
	file, err := os.Create(filepath)
	if err != nil {
		panic(err)
	}
	// 获得文件的writer对象
	writer := bufio.NewWriter(file)

	written, _ := io.Copy(writer, reader)
	fmt.Printf("Total length: %d", written)
}
func downloadFile(body io.Reader, rootpath string, name string) {
    filepath := fmt.Sprintf("./cnblogs/%s", name)
	// Create output file
	if rootpath != "" {
		if _, err := os.Stat("./cnblogs"); os.IsNotExist(err) {
			// 必须分成两步:先创建文件夹、再修改权限
			os.MkdirAll("./cnblogs", 0777) //0777也可以os.ModePerm
			os.Chmod("./cnblogs", 0777)
		}
	}
	out, err := os.Create(filepath)
	if err != nil {
		panic(err)
	}
	defer out.Close()
	// copy stream
	_, err = io.Copy(out, body)
	if err != nil {
		panic(err)
	}
}

type blogList struct {
	PageIndex int `json:"pageIndex"`
	PageSize int `json:"pageSize"`
	PostsCount int `json:"postsCount"`

	PostList []blogbodymsg `json:"postList"`
}
type blogbodymsg struct {
	Id int `json:"id"`

	DatePublished string `json:"datePublished"`

	DateUpdated string `json:"dateUpdated"`

	Title string `json:"title"`
        IsPublished bool `json:"isPublished"`
}

type blogbodyConf struct {
	BlogPost blogPostEntity `json:"blogPost"`
}
type blogPostEntity struct {
	Id int `json:"id"`
	AutoDesc string `json:"autoDesc"`
	DatePublished string `json:"datePublished"`
	PostBody string `json:"postBody"`
	Title string `json:"title"`
	Url string `json:"url"`
	Author string `json:"author"`
	Tags []string `json:"tags"` 
}

posted @ 2022-07-10 17:08  spatxos  阅读(217)  评论(0编辑  收藏  举报