004_gc监控

一、解释

(1)

<1>
S0U:Survivor space 0 utilization (kB).  S0U年轻代中第一个survivor(幸存区)目前已使用空间
<2>
S0: Survivor space 0 utilization as a percentage of the space's current capacity.  S0年轻代中第一个survivor(幸存区)已使用的占当前容量百分比
<3>
S0C:Current survivor space 0 capacity (kB)  S0C年轻代中第一个survivor(幸存区)的容量 
<4>
S1U:Survivor space 1 utilization (kB)   S1U年轻代中第二个survivor(幸存区)目前已使用空间
<5>
S1C:Current survivor space 1 capacity (kB)   S1C年轻代中第二个survivor(幸存区)的容量
<6>
S1年轻代中第二个survivor(幸存区)已使用的占当前容量百分比
S1: Survivor space 1 utilization as a percentage of the space's current capacity.
<7>
OU_Old代目前已使用空间
OU: Old space utilization (kB)
<8>
OC_Old代的容量
OC: Current old space capacity (kB)
<9>
O_old代已使用的占当前容量百分比
O:Old space utilization as a percentage of the space's current capacity.
<10>
EU年轻代中Eden(伊甸园)目前已使用空间
EU:Eden space utilization (kB)
<11>
EC年轻代中Eden(伊甸园)的容量
EC: Current eden space capacity (kB)
<12>
年轻代中Eden(伊甸园)已使用的占当前容量百分比
E:Eden space utilization as a percentage of the space's current capacity.
<13>
metaspace(元空间)目前已使用空间
MU: Metacspace utilization (kB)
<14>
metaspace(元空间)的容量
MC: Metaspace capacity (kB).
<15>
元数据区使用比例
M: Metaspace utilization as a percentage of the space's current capacity

<16>Heap已经使用空间百分比
jms.Heap_used / jms.Heap_max

<17>
YGC单event平均回收耗时
YGCT: Young generation garbage collection time/YGC: Number of young generation garbage collection events.

<18>FGCT单event平均耗时
FGCT: Full garbage collection time/FGC: Number of full GC events.

<19>zkjvm_YGC events数量
YGC: Number of young generation GC events.

<20>
Young GC垃圾回收耗时
YGCT: Young generation garbage collection time.

<21>full GC events数量
FGC: Number of full GC events.

<22>Full gc耗时
FGCT: Full garbage collection time

<23>总的gc回收耗时
GCT:Total garbage collection time.

(2)Heap内存使用空间等于下面几个空间之和

S0U: Survivor space 0 utilization (kB)
S1U: Survivor space 1 utilization (kB)
EU: Eden space utilization (kB)
OU: Old space utilization (kB)

(3)Heap_max内存最大使用空间

EC:Current eden space capacity (kB) + S0C:Current survivor space 0 capacity (kB) + S1C:Current survivor space 1 capacity (kB) + OC:Current old space capacity (kB)

即: EC年轻代中Eden(伊甸园)的容量 + S0C年轻代中第一个survivor(幸存区)的容量+ S1C + OC_Old代的容量

(4)YGC单event平均回收耗时

YGCT: Young generation garbage collection time/YGC: Number of young generation garbage collection events.

(5)

FGCT: Full garbage collection time/FGC: Number of full GC events.

或者

GCT: Total garbage collection time/(YGC: Number of young generation garbage collection events + FGC: Number of full GC events)

二、下面为golang实现代码

package jstat

import (
	"errors"
	"fmt"
	"io/ioutil"
	"log"
	"metrics"
	"os/exec"
	"reflect"
	"runtime/debug"
	"sources"
	"strconv"
	"strings"
)

type JvmMonitorStatistics struct {
	S0_used    float64 `json:"S0_used"`
	S0_max     float64 `json:"S0_max"`
	S0_ratio   float64 `json:"S0_ratio"`
	S1_used    float64 `json:"S1_used"`
	S1_max     float64 `json:"S1_max"`
	S1_ratio   float64 `json:"S1_ratio"`
	Old_used   float64 `json:"Old_used"`
	Old_max    float64 `json:"Old_max"`
	Old_ratio  float64 `json:"Old_ratio"`
	Eden_used  float64 `json:"Eden_used"`
	Eden_max   float64 `json:"Eden_max"`
	Eden_ratio float64 `json:"Eden_ratio"`
	Perm_used  float64 `json:"Perm_used"`
	Perm_max   float64 `json:"Perm_max"`
	Perm_ratio float64 `json:"Perm_ratio"`
	Heap_used  float64 `json:"Heap_used"`
	Heap_max   float64 `json:"Heap_max"`
	Heap_ratio float64 `json:"Heap_ratio"`
	YGCT_avg   float64 `json:"YGCT_avg"`
	FGCT_avg   float64 `json:"FGCT_avg"`
	GCT_avg    float64 `json:"GCT_avg"`
	YGC        float64 `json:"YGC"`
	YGCT       float64 `json:"YGCT"`
	FGC        float64 `json:"FGC"`
	FGCT       float64 `json:"FGCT"`
	GCT        float64 `json:"GCT"`
}

func Decimal(value float64) float64 {
	value, _ = strconv.ParseFloat(fmt.Sprintf("%.2f", value), 64) //会进行四舍五入
	return value
}

func DecimalTh(value float64) float64 {
	value, _ = strconv.ParseFloat(fmt.Sprintf("%.3f", value), 64)
	return value
}

// 判断obj是否在target中,target支持的类型arrary,slice,map
func Contain(obj interface{}, target interface{}) (bool, error) {
	targetValue := reflect.ValueOf(target)
	switch reflect.TypeOf(target).Kind() {
	case reflect.Slice, reflect.Array:
		for i := 0; i < targetValue.Len(); i++ {
			if targetValue.Index(i).Interface() == obj {
				return true, nil
			}
		}
	case reflect.Map:
		if targetValue.MapIndex(reflect.ValueOf(obj)).IsValid() {
			return true, nil
		}
	}

	return false, errors.New("not in array")
}

func cusSplit(str string, sep byte) []string {
	byte_str := []byte(str)
	var isSep bool = true
	var l_str, n_str, strword string
	for _, word := range byte_str {
		if word == sep {
			if isSep == false {
				l_str += strword
				l_str += "#"
				strword = ""
				isSep = true
			}
		} else {
			strword += string(word)
			isSep = false
		}
	}

	if byte_str[len(byte_str)-1] != sep {
		l_str += strword
	}
	if string(l_str[len(l_str)-1]) == "#" {
		n_str = l_str[0 : len(l_str)-1]
	} else {
		n_str = l_str
	}
	return strings.Split(n_str, "#")
}

//Input command,then return the result of the command exec.
func ShCommand(commandname string, args ...string) (map[string]float64, error) {
	cmd := exec.Command(commandname, args...)
	stdout, err := cmd.StdoutPipe()
	//stdout, err := cmd.CombinedOutput()
	if err != nil {
		log.Fatal(err)
	}

	if err := cmd.Start(); err != nil {
		log.Fatal(err)
	}

	// Read the output of the command exec result
	opBytes, err := ioutil.ReadAll(stdout)
	if err != nil {
		log.Fatal(err)
	}
	l_str := strings.Split(string(opBytes), "\n")

	cmd.Wait()
	defer stdout.Close()

	var pdict = make(map[string]float64)
	if len(l_str) >= 1 {
		streg := cusSplit(l_str[0], 32)
		intreg := cusSplit(l_str[1], 32)

		var _intreg []float64

		for num := range intreg {
			if sfloat, err := strconv.ParseFloat(intreg[num], 64); err == nil {
				_intreg = append(_intreg, sfloat)
			}
		}
		for k, v := 0, 0; k < len(streg) && v < len(intreg); k++ {
			pdict[streg[k]] = _intreg[k]
		}
	}
	return pdict, nil
}

func getPidBySearchString(searchName string) string {
	//var searchName1  = "org.apache.zookeeper.server.quorum.QuorumPeerMain"
	cmd := "ps -ef|grep " + searchName + "|grep -v grep|awk '{print $2}'"

	out, err := exec.Command("bash", "-c", cmd).Output()
	if err != nil {
		return fmt.Sprintf("Failed to execute command: %s", cmd)
	}
	pidNoNewLine := strings.Split(string(out), "\n")

	return string(pidNoNewLine[0])
}

func MergeData(pid string, jstatpath string) (map[string]float64, error) {

	var mergedata = make(map[string]float64)

	gc_out, gc_err := ShCommand(jstatpath, "-gc", pid)
	if gc_err != nil {
		debug.PrintStack()
		log.Fatal(gc_err)
	}
	for k, v := range gc_out {
		mergedata[k] = v
	}

	gccapacity_out, gccapacity_err := ShCommand(jstatpath, "-gccapacity", pid)
	if gccapacity_err != nil {
		log.Fatal(gccapacity_err)
	}
	for k, v := range gccapacity_out {
		mergedata[k] = v
	}

	gcutil_out, gcutil_err := ShCommand(jstatpath, "-gcutil", pid)
	if gcutil_err != nil {
		log.Fatal(gcutil_err)
	}
	for k, v := range gcutil_out {
		mergedata[k] = v
	}

	return mergedata, nil
}

func (jms JvmMonitorStatistics) compute_jstats(pdict map[string]float64) (JvmMonitorStatistics, error) {
	jms.S0_used = Decimal(pdict["S0U"] * 1024)
	jms.S0_max = Decimal(pdict["S0C"] * 1024)
	jms.S0_ratio = Decimal(pdict["S0"] * 1024)
	jms.S1_used = Decimal(pdict["S1U"] * 1024)
	jms.S1_max = Decimal(pdict["S1C"] * 1024)
	jms.S1_ratio = Decimal(pdict["S1"])
	//jms.S1_ratio = Decimal(pdict["S1"] * 1024)
	jms.Old_used = Decimal(pdict["OU"] * 1024)
	jms.Old_max = Decimal(pdict["OC"] * 1024)
	//jms.Old_ratio = Decimal(pdict["O"] * 1024)
	jms.Old_ratio = Decimal(pdict["O"])
	jms.Eden_used = Decimal(pdict["EU"] * 1024)
	jms.Eden_max = Decimal(pdict["EC"] * 1024)
	jms.Eden_ratio = Decimal(pdict["E"])
	jms.Perm_used = Decimal(pdict["MU"] * 1024)
	jms.Perm_max = Decimal(pdict["MC"] * 1024)
	jms.Perm_ratio = Decimal(pdict["M"])
	jms.Heap_used = Decimal((pdict["EU"] + pdict["S0U"] + pdict["S1U"] + pdict["OU"]) * 1024)
	jms.Heap_max = Decimal((pdict["EC"] + pdict["S0C"] + pdict["S1C"] + pdict["OC"]) * 1024)
	jms.Heap_ratio = Decimal((jms.Heap_used / jms.Heap_max) * 100)
	jms.YGC = pdict["YGC"]
	jms.YGCT = pdict["YGCT"]
	jms.FGC = pdict["FGC"]
	jms.FGCT = pdict["FGCT"]
	jms.GCT = pdict["GCT"]
	//This area is needed to repair.
	if pdict["YGC"] == 0 {
		jms.YGCT_avg = 0
	}
	if pdict["YGCT"] != 0 && pdict["YGC"] != 0 {
		jms.YGCT_avg = DecimalTh(pdict["YGCT"] / pdict["YGC"])
	}

	if pdict["FGC"] == 0 {
		jms.FGCT_avg = 0
	}
	if pdict["FGCT"] != 0 && pdict["FGC"] != 0 {
		jms.FGCT_avg = DecimalTh(pdict["FGCT"] / pdict["FGC"])
	}

	if (pdict["YGC"] == 0 && pdict["FGC"] == 0) || pdict["YGC"] == 0 || pdict["FGC"] == 0 || pdict["GCT"] == 0 {
		jms.GCT_avg = 0
	} else {
		jms.FGCT_avg = DecimalTh(pdict["GCT"] / Decimal(pdict["YGC"]+pdict["FGC"]))
	}
	return jms, nil
}

func Sender(jms *JvmMonitorStatistics, cfg *sources.Config) JvmMonitorStatistics {
	//pid := getPidBySearchString("org.apache.zookeeper.server.quorum.QuorumPeerMain")
	fmt.Println(cfg.Metrics.Hostname)
	//fmt.Println(cfg.FilterName.FilterName)
	pid := getPidBySearchString(cfg.Sources.FilterName)
	mergedata, err := MergeData(pid, cfg.Sources.JstatPath)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Println(mergedata)
	jmsResult, err := jms.compute_jstats(mergedata)
	if err != nil {
		log.Fatal(err)
	}
	fmt.Println(jmsResult)
	metrics.Client.Gauge("zkjvm", int(jmsResult.S0_used), map[string]string{"monitor": "zkjvm_S0_used", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.S0_max), map[string]string{"monitor": "zkjvm_S0_max", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.S0_ratio), map[string]string{"monitor": "zkjvm_S0_ratio", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.S1_used), map[string]string{"monitor": "zkjvm_S1_used", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.S1_max), map[string]string{"monitor": "zkjvm_S1_max", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.S1_ratio), map[string]string{"monitor": "zkjvm_S1_ratio", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Old_used), map[string]string{"monitor": "zkjvm_Old_used", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Old_max), map[string]string{"monitor": "zkjvm_Old_max", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Old_ratio), map[string]string{"monitor": "zkjvm_Old_ratio", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Eden_used), map[string]string{"monitor": "zkjvm_Eden_used", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Eden_max), map[string]string{"monitor": "zkjvm_Eden_max", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Eden_ratio), map[string]string{"monitor": "zkjvm_Eden_ratio", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Perm_used), map[string]string{"monitor": "zkjvm_Perm_used", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Perm_max), map[string]string{"monitor": "zkjvm_Perm_max", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Perm_ratio), map[string]string{"monitor": "zkjvm_Perm_ratio", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Heap_used), map[string]string{"monitor": "zkjvm_Heap_used", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Heap_max), map[string]string{"monitor": "zkjvm_Heap_max", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.Heap_ratio), map[string]string{"monitor": "zkjvm_Heap_ratio", "host": cfg.Metrics.Hostname})
	metrics.Client.Timer("zkjvm", int(jmsResult.YGCT_avg), map[string]string{"monitor": "zkjvm_YGCT_avg", "host": cfg.Metrics.Hostname})
	metrics.Client.Timer("zkjvm", int(jmsResult.FGCT_avg), map[string]string{"monitor": "zkjvm_FGCT_avg", "host": cfg.Metrics.Hostname})
	metrics.Client.Timer("zkjvm", int(jmsResult.GCT_avg), map[string]string{"monitor": "zkjvm_GCT_avg", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.YGC), map[string]string{"monitor": "zkjvm_YGC", "host": cfg.Metrics.Hostname})
	metrics.Client.Timer("zkjvm", int(jmsResult.YGCT), map[string]string{"monitor": "zkjvm_YGCT", "host": cfg.Metrics.Hostname})
	metrics.Client.Gauge("zkjvm", int(jmsResult.FGC), map[string]string{"monitor": "zkjvm_FGC", "host": cfg.Metrics.Hostname})
	metrics.Client.Timer("zkjvm", int(jmsResult.FGCT), map[string]string{"monitor": "zkjvm_FGCT", "host": cfg.Metrics.Hostname})
	metrics.Client.Timer("zkjvm", int(jmsResult.GCT), map[string]string{"monitor": "zkjvm_GCT", "host": cfg.Metrics.Hostname})

	return jmsResult
}

参考:https://www.hollischuang.com/archives/481

需要仔细研究的: https://www.bilibili.com/video/av29502877/?p=51 

posted @ 2019-03-15 15:45  arun_python  阅读(207)  评论(0)    收藏  举报