5432

夜莺初探四·mtail插件采集日志指标

夜莺初探四·mtail插件采集日志指标

前言

上一篇介绍了Categraf的配置,这篇我们尝试通过使用google开源的mtail工具来作为Categraf的插件,从应用日志中提取指标数据。

mtail项目介绍和配置文件说明

通过mtail -h可以很方便看到参数详细,也推荐乔克-从日志中提取指标的瑞士军刀或者Dream运维梦工厂-categraf-mtail日志收集插件详解来了解更多,我就不再班门弄斧了。
当然也可以通过官方来了解详情新手村介绍高手入门

Categraf采集插件

categraf-mtail插件地址

https://github.com/flashcatcloud/categraf/tree/main/inputs/mtail

源码解读

package mtail
...

//常量值
const inputName = `mtail`
const description = ` extract internal monitoring data from application logs`

//配置
// MTail holds the configuration for the plugin.
type MTail struct {
    config.PluginConfig
    Instances []*Instance `toml:"instances"`
}
//配置文件中instances对象需要参数结构体
type Instance struct {
	config.InstanceConfig


	/**
		type InternalConfig struct {
			// append labels
			Labels map[string]string `toml:"labels"`

			// metrics drop and pass filter
			MetricsDrop       []string `toml:"metrics_drop"`
			MetricsPass       []string `toml:"metrics_pass"`
			MetricsDropFilter filter.Filter
			MetricsPassFilter filter.Filter

			// metric name prefix
			MetricsNamePrefix string `toml:"metrics_name_prefix"`

			// mapping value
			ProcessorEnum []*ProcessorEnum `toml:"processor_enum"`

			// whether instance initial success
			inited bool `toml:"-"`
		}
		type InstanceConfig struct {
			InternalConfig
			IntervalTimes int64 `toml:"interval_times"`
		}
	**/


	NamePrefix           string        `toml:"name_prefix"`
	Progs                string        `toml:"progs"` 							//规则文件(xxx.mtail)的目录
	Logs                 []string      `toml:"logs"` 							//要监控的日志文件
	IgnoreFileRegPattern string        `toml:"ignore_filename_regex_pattern"`
	OverrideTimeZone     string        `toml:"override_timezone"`				//指定时区
	EmitProgLabel        string        `toml:"emit_prog_label"`					//是否导出label标签 string类型的bool值 
	emitProgLabel        bool          `toml:"-"`
	EmitMetricTimestamp  string        `toml:"emit_metric_timestamp"`			//metrics是否带时间戳 string类型的bool值
	emitMetricTimestamp  bool          `toml:"-"`
	PollInterval         time.Duration `toml:"poll_interval"`
	PollLogInterval      time.Duration `toml:"poll_log_interval"`
	MetricPushInterval   time.Duration `toml:"metric_push_interval"`
	MaxRegexpLen         int           `toml:"max_regexp_length"`
	MaxRecursionDepth    int           `toml:"max_recursion_depth"`

	SyslogUseCurrentYear string `toml:"syslog_use_current_year"` // true
	sysLogUseCurrentYear bool   `toml:"-"`
	LogRuntimeErrors     string `toml:"vm_logs_runtime_errors"` // true
	logRuntimeErrors     bool   `toml:"-"`
	//
	ctx    context.Context    `toml:"-"`
	cancel context.CancelFunc `toml:"-"`
	m      *mtail.Server
}
//配置文件中instances对象的Init函数,调用mtail
func (ins *Instance) Init() error {

	//初始化检查,设置默认值

	if len(ins.Progs) == 0 || len(ins.Logs) == 0 {
		return types.ErrInstancesEmpty
	}

	// set default value
	ins.sysLogUseCurrentYear = ins.SyslogUseCurrentYear == "true"
	ins.logRuntimeErrors = ins.LogRuntimeErrors == "true"
	ins.emitProgLabel = ins.EmitProgLabel == "true"
	ins.emitMetricTimestamp = ins.EmitMetricTimestamp == "true"

	if ins.PollLogInterval == 0 {
		ins.PollLogInterval = 250 * time.Millisecond
	}
	if ins.PollInterval == 0 {
		ins.PollInterval = 250 * time.Millisecond
	}
	if ins.MetricPushInterval == 0 {
		ins.MetricPushInterval = 1 * time.Minute
	}
	if ins.MaxRegexpLen == 0 {
		ins.MaxRegexpLen = 1024
	}
	if ins.MaxRecursionDepth == 0 {
		ins.MaxRecursionDepth = 100
	}
	buildInfo := mtail.BuildInfo{
		Version: config.Version,
	}
	//时区设置
	loc, err := time.LoadLocation(ins.OverrideTimeZone)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Couldn't parse timezone %q: %s", ins.OverrideTimeZone, err)
		return err
	}
	//mtail参数设置
	opts := []mtail.Option{
		mtail.ProgramPath(ins.Progs),
		mtail.LogPathPatterns(ins.Logs...),
		mtail.IgnoreRegexPattern(ins.IgnoreFileRegPattern),
		mtail.SetBuildInfo(buildInfo),
		mtail.OverrideLocation(loc),
		mtail.MetricPushInterval(ins.MetricPushInterval), // keep it here ?
		mtail.MaxRegexpLength(ins.MaxRegexpLen),
		mtail.MaxRecursionDepth(ins.MaxRecursionDepth),
		mtail.LogRuntimeErrors,
	}
	if ins.cancel != nil {
		ins.cancel()
	} else {
		ins.ctx, ins.cancel = context.WithCancel(context.Background()) //父级ctx
	}
	//mtail配置,每隔1h启动 清理过期日志
	staleLogGcWaker := waker.NewTimed(ins.ctx, time.Hour)
	opts = append(opts, mtail.StaleLogGcWaker(staleLogGcWaker))

	if ins.PollInterval > 0 {
		logStreamPollWaker := waker.NewTimed(ins.ctx, ins.PollInterval)
		logPatternPollWaker := waker.NewTimed(ins.ctx, ins.PollLogInterval)
		opts = append(opts, mtail.LogPatternPollWaker(logPatternPollWaker), mtail.LogstreamPollWaker(logStreamPollWaker))
	}
	if ins.sysLogUseCurrentYear {
		opts = append(opts, mtail.SyslogUseCurrentYear)
	}
	if !ins.emitProgLabel {
		opts = append(opts, mtail.OmitProgLabel)
	}
	if ins.emitMetricTimestamp {
		opts = append(opts, mtail.EmitMetricTimestamp)
	}
	//指标结果存储对象
	store := metrics.NewStore()
	//间隔1h清理历史指标
	store.StartGcLoop(ins.ctx, time.Hour)

	m, err := mtail.New(ins.ctx, store, opts...)
	if err != nil {
		log.Println(err)
		ins.cancel()
		return err
	}
	ins.m = m

	return nil
}
//销毁取消所有任务
func (ins *Instance) Drop() {
	ins.cancel()
}
//对象初始化方法
func init() {
	inputs.Add(inputName, func() inputs.Input {
		return &MTail{}
	})
}
//对象复制返回新建对象
func (s *MTail) Clone() inputs.Input {
	return &MTail{}
}

func (s *MTail) Name() string {
	return inputName
}
//MTail获取配置文件中所有instances
func (s *MTail) GetInstances() []inputs.Instance {
	ret := make([]inputs.Instance, len(s.Instances))
	for i := 0; i < len(s.Instances); i++ {
		ret[i] = s.Instances[i]
	}
	return ret
}

// Description returns a one-sentence description on the input.
func (s *MTail) Description() string {
	return description
}
//抓取数据方法?
// Gather retrieves all the configured fields and tables.
// Any error encountered does not halt the process. The errors are accumulated
// and returned at the end.
// func (s *Instance) Gather(acc telegraf.Accumulator) error {
func (ins *Instance) Gather(slist *types.SampleList) {
	//获取到prometheus注册器
	reg := ins.m.GetRegistry()
	mfs, done, err := prometheus.ToTransactionalGatherer(reg).Gather()
	if err != nil {
		log.Println(err)
		return
	}
	defer done()
	//遍历所有指标向量?
	for _, mf := range mfs {
		metricName := mf.GetName()
		//遍历所有指标
		for _, m := range mf.Metric {
			//加入配置的Lables
			tags := util.MakeLabels(m, ins.GetLabels())
			//处理不同指标类型
			if mf.GetType() == dto.MetricType_SUMMARY {
				util.HandleSummary(inputName, m, tags, metricName, ins.GetLogMetricTime, slist)
			} else if mf.GetType() == dto.MetricType_HISTOGRAM {
				util.HandleHistogram(inputName, m, tags, metricName, ins.GetLogMetricTime, slist)
			} else {
				util.HandleGaugeCounter(inputName, m, tags, metricName, ins.GetLogMetricTime, slist)
			}
		}
	}
}
//返回时间戳
func (p *Instance) GetLogMetricTime(ts int64) time.Time {
	var tm time.Time
	if ts <= 0 || !p.emitMetricTimestamp {
		return tm
	}
	sec := ts / 1000
	ms := ts % 1000 * 1e6
	tm = time.Unix(sec, ms)
	return tm
}

整体理解下来,Categraf有效的通过统一的文件完成了多个目录,多个规则的日志采集,简化许多操作。

最后感谢看完,由于作者水平有限,使用很多工具并不熟悉,如有错误和遗漏欢迎指出,感谢谅解。
以上内容来源于官方推出的夜莺黄埔营的免费培训活动,加入 QQ 群查看直播视频,还可以在官方答疑站点获得更多支持 https://answer.flashcat.cloud/

posted on 2023-05-13 03:47  5432  阅读(346)  评论(0编辑  收藏  举报

导航