go xpath

 

 

 

https://github.com/antchfx/htmlquery

 

https://www.icode9.com/content-4-730338.html

 

 

 

 

 

package main

import (
    "fmt"
    "github.com/antchfx/htmlquery"
    "log"
    "net/http"
    "time"
)

func main() {
    url := "http://quotes.toscrape.com/"

    req, _ := http.NewRequest("GET", url, nil)
    req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3776.0 Safari/537.36")
    client := &http.Client{Timeout: time.Second * 5}
    resp, err := client.Do(req)
    if err != nil {
        log.Fatalln(err)
    }
    defer resp.Body.Close()

    doc,_ := htmlquery.Parse(resp.Body)
    list := htmlquery.Find(doc, "//div[@class=\"quote\"]")

    for _,n  := range list {
        content := htmlquery.FindOne(n,".//span[1]")
        author := htmlquery.FindOne(n,"/span[2]//small")

        fmt.Printf("%s-%s\n",htmlquery.InnerText(author), htmlquery.InnerText(content))

    }

}

 

posted @ 2022-03-04 11:59  AngDH  阅读(65)  评论(0编辑  收藏  举报