用go把博客园博客下载到本地Hexo目录下
找到cookie
直接浏览器F12 巴拉巴拉
直接上代码
用hexo建静态博客的话,go文件在\source\_posts
目录下,run之后将会在此目录下生成cnblogs文件夹,以博客id作为md文件名,然后在\source\
下生成一个cnblogs目录存放博客里面的图片文件,图片文件我使用https://img.*.png
来做匹配
package main
import (
"bufio"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"net/http"
"os"
"strconv"
"strings"
"regexp"
"path"
)
const cookie = " xxxxxxxxxxxxxxxxxx"
func main() {
fmt.Printf("开始执行")
getBlogList(1)
}
func geturl(pageno int) string{
return fmt.Sprintf("https://i.cnblogs.com/api/posts/list?p=%s&cid=&tid=&t=1&cfg=0&search=&orderBy=&s=&scid=",strconv.Itoa(pageno))
}
func getBlogList(pageindex int){
var urlstr = geturl(pageindex)
recordbody := getData(urlstr)
fmt.Printf("\r\n recordbody:%s \n", recordbody)
var conf blogList
err := json.Unmarshal(recordbody, &conf)
if err != nil {
fmt.Println("error:", err)
}
fmt.Printf("\r\n PageIndex:%s,PageSize:%s,PostsCount:%s \n", strconv.Itoa(conf.PageIndex), strconv.Itoa(conf.PageSize), strconv.Itoa(conf.PostsCount))
for _, childval := range conf.PostList {
if(childval.IsPublished){
childbody := getData(fmt.Sprintf("https://i.cnblogs.com/api/posts/%s", strconv.Itoa(childval.Id)))
fmt.Printf("childbody:%s \n", childbody)
var jsconf blogbodyConf
err := json.Unmarshal(childbody, &jsconf)
if err != nil {
fmt.Println("error:", err)
}
var tagbody = ""
for _, tag := range jsconf.BlogPost.Tags {
if(tagbody!=""){
tagbody = fmt.Sprintf("%s,\"%s\"",tagbody,tag)
}else{
tagbody = fmt.Sprintf("\"%s\"",tag)
}
}
var tagstr = fmt.Sprintf("[%s]",tagbody)
var articleBody = fmt.Sprintf("---\r\ntitle: %s\r\ndate: %s\r\nauthor: %s\r\ntags: %s\r\n---\r\n%s",
jsconf.BlogPost.Title,
jsconf.BlogPost.DatePublished,
jsconf.BlogPost.Author,
tagstr,
string(jsconf.BlogPost.PostBody))
//添加文章信息
reg, _ := regexp.Compile(`https://img.*.png`)
imgurls := reg.FindAllString(articleBody, -1)
for _, imgurl := range imgurls {
fileName := path.Base(imgurl)
downloadImage(imgurl,strconv.Itoa(jsconf.BlogPost.Id),fileName)
articleBody = strings.Replace(articleBody, imgurl, fmt.Sprintf("/cnblogs/%s/%s",strconv.Itoa(jsconf.BlogPost.Id),fileName), -1)
}
fmt.Printf("articleBody:%s \n", articleBody)
downloadFile(strings.NewReader(articleBody), strconv.Itoa(jsconf.BlogPost.Id), fmt.Sprintf("%s.md", strconv.Itoa(jsconf.BlogPost.Id)))
}
}
if(conf.PageIndex>0 && conf.PageIndex*conf.PageSize<=conf.PostsCount){
getBlogList(conf.PageIndex+1)
}
fmt.Println("执行完毕")
}
func getData(urlstr string) []byte {
client := &http.Client{}
fmt.Printf("\r\n urlstr:%s \n", urlstr)
req, _ := http.NewRequest("GET", urlstr, nil)
req.Header.Add("cookie", cookie)
resp, _ := client.Do(req)
defer resp.Body.Close()
body, _ := ioutil.ReadAll(resp.Body)
return body
}
func downloadImage(imgurl string, rootpath string, fileName string){
filepath := fmt.Sprintf("../cnblogs/%s/%s", rootpath, fileName)
res, err := http.Get(imgurl)
if err != nil {
fmt.Println("A error occurred!")
return
}
defer res.Body.Close()
// 获得get请求响应的reader对象
reader := bufio.NewReaderSize(res.Body, 32 * 1024)
if _, err := os.Stat(fmt.Sprintf("../cnblogs/%s", rootpath)); os.IsNotExist(err) {
// 必须分成两步:先创建文件夹、再修改权限
os.MkdirAll(fmt.Sprintf("../cnblogs/%s", rootpath), 0777) //0777也可以os.ModePerm
os.Chmod(fmt.Sprintf("../cnblogs/%s", rootpath), 0777)
}
file, err := os.Create(filepath)
if err != nil {
panic(err)
}
// 获得文件的writer对象
writer := bufio.NewWriter(file)
written, _ := io.Copy(writer, reader)
fmt.Printf("Total length: %d", written)
}
func downloadFile(body io.Reader, rootpath string, name string) {
filepath := fmt.Sprintf("./cnblogs/%s", name)
// Create output file
if rootpath != "" {
if _, err := os.Stat("./cnblogs"); os.IsNotExist(err) {
// 必须分成两步:先创建文件夹、再修改权限
os.MkdirAll("./cnblogs", 0777) //0777也可以os.ModePerm
os.Chmod("./cnblogs", 0777)
}
}
out, err := os.Create(filepath)
if err != nil {
panic(err)
}
defer out.Close()
// copy stream
_, err = io.Copy(out, body)
if err != nil {
panic(err)
}
}
type blogList struct {
PageIndex int `json:"pageIndex"`
PageSize int `json:"pageSize"`
PostsCount int `json:"postsCount"`
PostList []blogbodymsg `json:"postList"`
}
type blogbodymsg struct {
Id int `json:"id"`
DatePublished string `json:"datePublished"`
DateUpdated string `json:"dateUpdated"`
Title string `json:"title"`
IsPublished bool `json:"isPublished"`
}
type blogbodyConf struct {
BlogPost blogPostEntity `json:"blogPost"`
}
type blogPostEntity struct {
Id int `json:"id"`
AutoDesc string `json:"autoDesc"`
DatePublished string `json:"datePublished"`
PostBody string `json:"postBody"`
Title string `json:"title"`
Url string `json:"url"`
Author string `json:"author"`
Tags []string `json:"tags"`
}
本文来自博客园,作者:spatxos,转载请注明原文链接:https://www.cnblogs.com/spatxos/p/16463506.html