golang 简单爬虫
爬取 BILIBILI 专栏图片的程序
利用 go 语言,获取哔哩哔哩专栏的图片,lsp再也不用手动一个个下载了。。。。
用到的都是 go 语言自带的包,放心食用
完整代码
package main
import (
"fmt"
"io/ioutil"
"net/http"
"os"
"regexp"
"strings"
)
// 定义根路径
var rootPath = "./imgs"
func SaveImg(url string, page chan string) {
name := url[strings.LastIndex(url, "/")+1:]
// fmt.Println(name) 获取保存的名字
if url[:4] != "http" {
url = "http:" + url // 添加 http 前缀
}
content, err := HttpGet(url)
if err != nil {
fmt.Println("img get err:", err)
return
}
path := rootPath + name
f, err := os.Create(path) // 创建图片文件
if err != nil {
fmt.Println(path+" err:", err)
return
}
defer f.Close() // 关闭
f.Write(content) // 写入
page <- name // 发送
}
func HttpGet(url string) (result []byte, err error) {
req, _ := http.NewRequest("GET", url, nil) // 创建请求对象
req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36 Edg/89.0.774.50")
client := &http.Client{} // 创建客户端
resp, err := client.Do(req) // 发送请求
if err != nil {
fmt.Println("request err", err)
return
}
if resp.Status != "200 OK" {
err = fmt.Errorf(url + " >>>> " + resp.Status)
return
}
defer resp.Body.Close()
result, err = ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Println("err", err)
return
}
return
}
func main() {
// cv := "cv6597100"
var cv string
var fn string
if len(os.Args) == 1 {
fmt.Println("输入cv号:")
fmt.Scan(&cv)
fn = cv
} else if len(os.Args) == 2 {
cv = os.Args[1]
fn = cv
} else if len(os.Args) == 3 {
cv = os.Args[1]
rootPath = os.Args[2]
fn = cv
} else if len(os.Args) == 4 {
cv = os.Args[1]
rootPath = os.Args[2]
fn = os.Args[3]
} else {
fmt.Println("err")
return
}
url := "https://www.bilibili.com/read/" + cv
cont, err := HttpGet(url)
if err != nil {
fmt.Println("err:", err)
return
}
result := string(cont)
// 正则匹配 图片 url
ret1 := regexp.MustCompile(`(//i0.hdslb.com/bfs/article/.*?[j|p|g][p|n|i][g|f])`)
alls := ret1.FindAllStringSubmatch(result, -1)
// fmt.Println(alls)
page := make(chan string)
n := len(alls)
//创建保存图片的文件夹
rootPath = rootPath + "/" + fn + "/"
os.MkdirAll(rootPath, 777)
for _, imgURL := range alls {
// fmt.Println(imgURL[1])
go SaveImg(imgURL[1], page) // 获取图片
}
for i := 0; i < n; i++ {
fmt.Printf("%s 下载完成\n", <-page) // 接收消息
}
}
有什么建议意见请留言讨论
谢谢观看
浪费了你人生中的几秒真的很抱歉(≧ ﹏ ≦)
结局早已注定,命运无法违抗