苏宁易购价格爬取(golang)
如果商品地址为:http://product.suning.com/0070230548/10608983060.html
则价格地址:
http://pas.suning.com/nspcsale_0_000000010608983060_000000010608983060_0070230548_20_021_0210101_500353_1000267_9264_12113_Z001___R9006849_3.3_1___000278188__.html?callback=pcData&_=1558663936729
如果商品地址为:http://product.suning.com/0000000000/144016246.html
则价格地址:
http://pas.suning.com/nspcsale_0_000000000144016246_000000000144016246_0000000000_20_021_0210101_500353_1000267_9264_12113_Z001___R9006850_2.86_0___000278188__.html?callback=pcData&_=1558664442552
python和go共同爬取了相同的数据(135个商品的数据),go用时19.457s,python(未使用任何爬虫框架)用时178.672s
go:
1 package main 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "net/http" 7 "regexp" 8 "strings" 9 ) 10 11 func GetGoodPrice(url string) string { 12 re := regexp.MustCompile(`com/(.*?).html`) 13 keynum := re.FindAllStringSubmatch(url, -1) 14 keynum0 := keynum[0][1] 15 key0 := strings.Split(keynum0, "/")[0] 16 key1 := strings.Split(keynum0, "/")[1] 17 priceurl := "http://pas.suning.com/nspcsale_0_000000000" + key1 + "_000000000" + key1 + "_" + key0 + "_20_021_0210101_500353_1000267_9264_12113_Z001___R9006849_3.3_1___000278188__.html?callback=pcData&_=1558663936729" 18 if len(key1) == 11 { 19 priceurl = "http://pas.suning.com/nspcsale_0_0000000" + key1 + "_0000000" + key1 + "_" + key0 + "_20_021_0210101_500353_1000267_9264_12113_Z001___R9006849_3.3_1___000278188__.html?callback=pcData&_=1558663936729" 20 } 21 22 resp, err := http.Get(priceurl) 23 if err != nil { 24 panic(err) 25 } 26 if resp.StatusCode != 200 { 27 fmt.Println("err") 28 } 29 s, _ := ioutil.ReadAll(resp.Body) 30 resp.Body.Close() 31 32 re0 := regexp.MustCompile(`"netPrice":"(.*?)","warrantyList`) 33 price := re0.FindAllStringSubmatch(string(s), -1) 34 // fmt.Println(price) 35 // fmt.Println(priceurl) 36 return price[0][1] 37 } 38 func main() { 39 url := `http://product.suning.com/0000000000/144016246.html` 40 price := GetGoodPrice(url) 41 fmt.Println(price) 42 }