pu369com

纯golang爬虫实战-(六)-关于cookiejar的理解 (2020-02-14 13:50)

对上一篇遗留的cookie的问题,从这里https://studygolang.com/articles/5228找到一篇几年前的代码,原作者golang_yh发表的原文已经不见了,我对代码中的一处小错误进行了修复。

感觉这段代码有助于理解cookiejar 

package main

import (
    "fmt"
    "io/ioutil"
    "net/http"
    "net/http/cookiejar"
)

var gCurCookies []*http.Cookie
var gCurCookieJar *cookiejar.Jar

func initAll() {
    gCurCookies = nil
    //var err error;
    gCurCookieJar, _ = cookiejar.New(nil)

}

//get url response html
func getUrlRespHtml(url string) string {
    fmt.Printf("getUrlRespHtml, url=%s", url)

    var respHtml string = ""

    httpClient := &http.Client{
        CheckRedirect: nil,
        Jar:           gCurCookieJar,
    }

    httpReq, err := http.NewRequest("GET", url, nil)
    httpResp, err := httpClient.Do(httpReq)
    if err != nil {
        fmt.Printf("http get url=%s response error=%s\n", url, err.Error())
    }
    fmt.Printf("httpResp.Header=%s", httpResp.Header)
    fmt.Printf("httpResp.Status=%s", httpResp.Status)

    defer httpResp.Body.Close()

    body, errReadAll := ioutil.ReadAll(httpResp.Body)
    if errReadAll != nil {
        fmt.Printf("get response for url=%s got error=%s\n", url, errReadAll.Error())
    }
    //全局保存
    gCurCookies = gCurCookieJar.Cookies(httpReq.URL)

    respHtml = string(body)

    return respHtml
}

func dbgPrintCurCookies() {
    var cookieNum int = len(gCurCookies)
    fmt.Printf("cookieNum=%d", cookieNum)
    for i := 0; i < cookieNum; i++ {
        var curCk *http.Cookie = gCurCookies[i]
        fmt.Printf("\n------ Cookie [%d]------", i)
        fmt.Printf("\tName=%s", curCk.Name)
        fmt.Printf("\tValue=%s", curCk.Value)
        fmt.Printf("\tPath=%s", curCk.Path)
        fmt.Printf("\tDomain=%s", curCk.Domain)
        fmt.Printf("\tExpires=%s", curCk.Expires)
        fmt.Printf("\tRawExpires=%s", curCk.RawExpires)
        fmt.Printf("\tMaxAge=%d", curCk.MaxAge)
        fmt.Printf("\tSecure=%t", curCk.Secure)
        fmt.Printf("\tHttpOnly=%t", curCk.HttpOnly)
        fmt.Printf("\tRaw=%s", curCk.Raw)
        fmt.Printf("\tUnparsed=%s", curCk.Unparsed)
    }
}

func main() {
    initAll()

    fmt.Printf("====== step 1:get Cookie ======")
    var baiduMainUrl string = "http://www.baidu.com/"
    fmt.Printf("baiduMainUrl=%s", baiduMainUrl)
    getUrlRespHtml(baiduMainUrl)
    dbgPrintCurCookies()

    fmt.Printf("\n====== step 2:use the Cookie ======")

    var getapiUrl string = "https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true"
    getUrlRespHtml(getapiUrl)
    dbgPrintCurCookies()
}

 还有这里 https://segmentfault.com/q/1010000010339661 的实践有助于深入理解cookiejar

posted on 2020-02-16 20:18  pu369com  阅读(3111)  评论(0编辑  收藏  举报

导航