Go性能分析
程序 optimization.go
package optimization
import (
"encoding/json"
"strconv"
)
type Request struct {
TransactionID string `json:"section_id"`
PayLoad []int `json:"payload"`
}
type Response struct {
TransactionID string `json:"transaction_id"`
Expression string `json:"exp"`
}
func createRequest() string {
payload := make([]int, 100, 100)
for i := 0; i < 100; i++ {
payload[i] = i
}
req := Request{"demo_transaction", payload}
v, err := json.Marshal(&req)
if err != nil {
panic(err)
}
return string(v)
}
func processRequest(reqs []string) []string {
reps := []string{}
for _, req := range reqs {
reqObj := &Request{}
json.Unmarshal([]byte(req), reqObj)
ret := ""
for _, e := range reqObj.PayLoad {
ret += strconv.Itoa(e) + ","
}
repObj := &Response{reqObj.TransactionID, ret}
repJson, err := json.Marshal(&repObj)
if err != nil {
panic(err)
}
reps = append(reps, string(repJson))
}
return reps
}
测试程序 optimization_test.go
package optimization
import "testing"
func TestCreateRequest(t *testing.T) {
str := createRequest()
t.Log(str)
}
func TestProcessRequest(t *testing.T) {
reqs := []string{}
reqs = append(reqs, createRequest())
reps := processRequest(reqs)
t.Log(reps[0])
}
func BenchmarkProcessRequest(b *testing.B) {
reqs := []string{}
reqs = append(reqs, createRequest())
b.ResetTimer()
for i := 0; i < b.N; i++{
_ = processRequest(reqs)
}
b.StopTimer()
}
-
第一步 先做简单性能测试,在命令行进入当前包目录下,运行命令:
go test -bench=.
,运行结果如下:goos: windows goarch: amd64 pkg: go_learn/ch33/optimization BenchmarkProcessRequest-4 22869 52071 ns/op PASS ok go_learn/ch33/optimization 1.775s
-
第二步 做cpu性能测试,运行命令:
go test -bench=. -cpuprofile=cpu.prof
,在包目录下会出现 cpu.prof 文件 -
第三步 运行
go tool pprof cpu.prof
,就可以进行如下操作了:Type: cpu Time: Jan 15, 2020 at 5:12pm (CST) Duration: 1.92s, Total samples = 2s (104.12%) Entering interactive mode (type "help" for commands, "o" for options) (pprof) top Showing nodes accounting for 830ms, 41.50% of 2000ms total Showing top 10 nodes out of 191 flat flat% sum% cum cum% 150ms 7.50% 7.50% 430ms 21.50% runtime.concatstrings 140ms 7.00% 14.50% 140ms 7.00% runtime.memmove 90ms 4.50% 19.00% 330ms 16.50% encoding/json.indirect 90ms 4.50% 23.50% 290ms 14.50% runtime.mallocgc 90ms 4.50% 28.00% 130ms 6.50% strconv.ParseUint 80ms 4.00% 32.00% 80ms 4.00% runtime.stdcall3 50ms 2.50% 34.50% 620ms 31.00% encoding/json.(*decodeState).literalStore 50ms 2.50% 37.00% 60ms 3.00% encoding/json.(*decodeState).rescanLiteral 50ms 2.50% 39.50% 180ms 9.00% strconv.ParseInt 40ms 2.00% 41.50% 70ms 3.50% encoding/json.stateBeginValue (pprof) top -cum Showing nodes accounting for 0.09s, 4.50% of 2s total Showing top 10 nodes out of 191 flat flat% sum% cum cum% 0 0% 0% 1.72s 86.00% go_learn/ch33/optimization.BenchmarkProcessRequest 0 0% 0% 1.72s 86.00% go_learn/ch33/optimization.processRequest 0 0% 0% 1.72s 86.00% testing.(*B).launch 0 0% 0% 1.72s 86.00% testing.(*B).runN 0 0% 0% 1.13s 56.50% encoding/json.Unmarshal 0 0% 0% 1s 50.00% encoding/json.(*decodeState).object 0 0% 0% 1s 50.00% encoding/json.(*decodeState).unmarshal 0.01s 0.5% 0.5% 1s 50.00% encoding/json.(*decodeState).value 0.03s 1.50% 2.00% 0.95s 47.50% encoding/json.(*decodeState).array 0.05s 2.50% 4.50% 0.62s 31.00% encoding/json.(*decodeState).literalStore (pprof) list processRequest Total: 2s ROUTINE ======================== go_learn/ch33/optimization.processRequest in D:\go_code\src\go_learn\ch33\optimization\optimization.go 0 1.72s (flat, cum) 86.00% of Total . . 30: . . 31:func processRequest(reqs []string) []string { . . 32: reps := []string{} . . 33: for _, req := range reqs { . . 34: reqObj := &Request{} . 1.13s 35: json.Unmarshal([]byte(req), reqObj) . . 36: ret := "" . . 37: for _, e := range reqObj.PayLoad { . 460ms 38: ret += strconv.Itoa(e) + "," . . 39: } . 30ms 40: repObj := &Response{reqObj.TransactionID, ret} . 90ms 41: repJson, err := json.Marshal(&repObj) . . 42: if err != nil { . . 43: panic(err) . . 44: } . 10ms 45: reps = append(reps, string(repJson)) . . 46: } . . 47: return reps . . 48:} (pprof) exit
通过上面分析得出,
json.Unmarshal
函数耗时最长,可以针对这里做优化 -
第四步 针对
json.Unmarshal
进行优化,这个 json 序列化用的是 go 语言中内置的(通过反射实现,性能比较差),我们可以使用 easyjson 进行替换