[Golang] 消费Kafka的日志提交到ElasticSearch

0x0 需求

  消费Kafka的日志并写入ElasticSearch供查询

 

0x1 依赖库

golang版Kafka客户端 https://github.com/Shopify/sarama

golang版ElasticSearch客户端  https://github.com/elastic/go-elasticsearch

 

0x2 实现

总共分3部分

1、Kafka消费者

// LogJson json格式
type LogJson struct {
    Tag     string    `json:"tag"`
    Level   string    `json:"level"`
    File    string    `json:"file"`
    Time    time.Time `json:"@timestamp"`
    Message string    `json:"message"`
}

type taskProcessor interface {
    AddTask(key string, val []byte)
}

// MyConsumer 可关闭的带任务处理器的消费者
type MyConsumer struct {
    processor taskProcessor
    ctx       context.Context
}

// NewMyConsumer 构造
func NewMyConsumer(p taskProcessor, ctx context.Context) *MyConsumer {
    c := &MyConsumer{
        processor: p,
        ctx:       ctx,
    }

    return c
}

// Setup 启动
func (consumer *MyConsumer) Setup(s sarama.ConsumerGroupSession) error {
    log.Printf("[main] consumer.Setup memberID=[%s]", s.MemberID())
    return nil
}

// Cleanup 当退出时
func (consumer *MyConsumer) Cleanup(s sarama.ConsumerGroupSession) error {
    log.Printf("[main] consumer.Cleanup memberID=[%s]", s.MemberID())
    return nil
}

// ConsumeClaim 消费日志
func (consumer *MyConsumer) ConsumeClaim(session sarama.ConsumerGroupSession, claim sarama.ConsumerGroupClaim) error {

    for {
        select {
        case message, ok := <-claim.Messages():
            if !ok {
                return nil
            }
            js := &LogJson{}
            if err := json.Unmarshal(message.Value, js); nil != err {
                fmt.Fprintf(os.Stderr, "[MyConsumer] ConsumeClaim json.Unmarshal err=[%s] topic=[%s] key=[%s] val=[%s]\n", err.Error(), message.Topic, message.Key, string(message.Value))
            } else {
                index := fmt.Sprintf("%s-%s", message.Topic, js.Time.Format("2006.01.02"))
                consumer.processor.AddTask(index, message.Value)
                session.MarkMessage(message, "")
            }

        case <-consumer.ctx.Done():
            return nil
        }
    }

    return nil
}

 

2、插入ElasticSearch的Worker

package elastic_worker

import (
    "context"
    "encoding/json"
    "fmt"
    "log"
    "runtime"
    "sync"
    "time"

    "github.com/olivere/elastic"
)

// Config 配置
type Config struct {
    MaxMessage int `xml:"max_msg"`          // 最大缓冲
    WorkerNum  int `xml:"worker_number"`    // 线程个数
    BatchSize  int `xml:"batch_size"`       // 每个批次最大条数
    TickTime   int `xml:"tick_millisecond"` // 处理频率
}

type task struct {
    key string
    val []byte
}

// Worker 消息处理器
type Worker struct {
    msgQ   chan *task
    client *elastic.Client
    wg     sync.WaitGroup
    config *Config
}

// NewWorker 构造
func NewWorker(client *elastic.Client, cfg *Config) *Worker {
    w := &Worker{
        client: client,
        config: cfg,
        msgQ:   make(chan *task, cfg.MaxMessage),
    }

    return w
}

// Run 开工
func (w *Worker) Run(ctx context.Context) {

    // 线程数
    thread := w.config.WorkerNum
    if thread <= 0 {
        thread = runtime.NumCPU()
    }

    // ticker
    tickTime := time.Duration(w.config.TickTime) * time.Millisecond
    if tickTime <= 0 {
        tickTime = time.Duration(100) * time.Millisecond
    }

    // 启动
    for i := 0; i < thread; i++ {
        w.wg.Add(1)
        time.Sleep(tickTime / time.Duration(thread))
        go func(idx int) {

            // 构造一个service,server可以反复使用
            service := w.client.Bulk()
            service.Refresh("wait_for")
            defer service.Reset()

            log.Printf("[elastic_worker] worker[%d] start", idx)
            defer w.wg.Done()

            // ticker
            ticker := time.NewTicker(tickTime)
            defer ticker.Stop()

        LOOP:
            for {
                select {
                case <-ctx.Done():
                    log.Printf("[elastic_worker] worker[%d] is quiting", idx)
                    // 要把通道里的全部执行完才能退出
                    for {
                        if num := w.process(service); num > 0 {
                            log.Printf("[elastic_worker] worker[%d] process batch [%d] when quiting", idx, num)
                        } else {
                            break LOOP
                        }
                        time.Sleep(tickTime)
                    }

                case <-ticker.C:
                    if num := w.process(service); num > 0 {
                        log.Printf("[elastic_worker] worker[%d] process batch [%d] ", idx, num)
                    }
                }
            }

            log.Printf("[elastic_worker] worker[%d] stop", idx)
        }(i)
    }
}

// AddTask 添加任务,goroutine safe
func (w *Worker) AddTask(key string, val []byte) {
    t := &task{
        key: key,
        val: val,
    }
    w.msgQ <- t
}

// process 处理任务
func (w *Worker) process(service *elastic.BulkService) int {
    //service.Reset()

    // 每个批次最多w.config.BatchSize个
LOOP:
    for i := 0; i < w.config.BatchSize; i++ {
        // 有任务就加到这个批次,没任务就退出
        select {
        case m := <-w.msgQ:
            req := elastic.NewBulkIndexRequest().Index(m.key).Type("doc").Doc(json.RawMessage(m.val))
            service.Add(req)
        default:
            break LOOP
        }
    }

    total := service.NumberOfActions()
    if total > 0 {
        if resp, err := service.Do(context.Background()); nil != err {
            panic(err)
        } else {
            if resp.Errors {
                for _, v := range resp.Failed() {
                    fmt.Println("service.Do failed", v)
                }
                panic("resp.Errors")
            }
        }
    }

    return total
}

// Close 关闭 需要外面的context关闭,和等待msgQ任务被执行完毕
func (w *Worker) Close() {
    w.wg.Wait()
    if n := len(w.msgQ); n > 0 {
        log.Printf("[elastic_worker] worker Close remain msg[%d]", n)
    }
}

 

3、main.go

package main

import (
    "context"
    "encoding/xml"
    "flag"
    "fmt"
    "io/ioutil"
    "log"
    "os"
    "os/signal"
    "runtime"
    "strings"
    "syscall"
    "time"

    "consumer"
    "elastic_worker"
    
    "github.com/Shopify/sarama"
    "github.com/olivere/elastic"
)

// Consumer Consumer配置
type ConsumerConfig struct {
    Topic       []string `xml:"topic"`
    Broker      string   `xml:"broker"`
    Partition   int32    `xml:"partition"`
    Replication int16    `xml:"replication"`
    Group       string   `xml:"group"`
    Version     string   `xml:"version"`
}

// Config 配置
type Config struct {
    Consumer   ConsumerConfig        `xml:"consumer"`
    ElasticURL string                `xml:"elastic_url"`
    Filters    []string              `xml:"filter"`
    Worker     elastic_worker.Config `xml:"elastic_worker"`
}

var (
    configFile = "" // 配置路径
    initTopic  = false
    listTopic  = false
    delTopic   = ""
    cfg        = &Config{}
    web        = ""
)

func init() {
    flag.StringVar(&configFile, "config", "cfg.xml", "config file ")
    flag.BoolVar(&initTopic, "init", initTopic, "create topic")
    flag.BoolVar(&listTopic, "list", listTopic, "list topic")
    flag.StringVar(&delTopic, "del", delTopic, "delete topic")
}

var (
    elasticClient *elastic.Client
)

func main() {
    runtime.GOMAXPROCS(runtime.NumCPU())

    defer time.Sleep(time.Second)

    // 获取host名字
    hostName, err := os.Hostname()
    if nil != err {
        hostName = "[beats]"
    }

    // 加载配置
    if contents, err := ioutil.ReadFile(configFile); err != nil {
        panic(err)
    } else {
        if err = xml.Unmarshal(contents, cfg); err != nil {
            panic(err)
        }
    }

    // sarama的logger
    sarama.Logger = log.New(os.Stdout, fmt.Sprintf("[%s]", hostName), log.LstdFlags)

    // 指定kafka版本,一定要支持kafka集群
    version, err := sarama.ParseKafkaVersion(cfg.Consumer.Version)
    if err != nil {
        panic(err)
    }
    config := sarama.NewConfig()
    config.Version = version
    config.Consumer.Offsets.Initial = sarama.OffsetOldest
    config.ClientID = hostName

    // 工具
    if tool(cfg, config) {
        return
    } else {
        initTopic = true
        tool(cfg, config)
    }

    // 启动elastic客户端
    urls := strings.Split(cfg.ElasticURL, ",")
    if cli, err := elastic.NewClient(elastic.SetURL(urls...)); err != nil {
        panic(err)
    } else {
        elasticClient = cli
        // ping检查
        if ret, _, err := elasticClient.Ping(urls[0]).Do(context.Background()); nil != err {

            panic(err)
        } else {
            log.Printf("elasticClient.Ping %+v", ret)
        }

        defer elasticClient.Stop()
    }

    // ctx
    ctx, cancel := context.WithCancel(context.Background())

    // Worker
    worker := elastic_worker.NewWorker(elasticClient, &cfg.Worker)
    worker.Run(ctx)
    defer worker.Close()

    // kafka consumer client
    kafkaClient, err := sarama.NewConsumerGroup(strings.Split(cfg.Consumer.Broker, ","), cfg.Consumer.Group, config)
    if err != nil {
        panic(err)
    }

    consumer := consumer.NewMyConsumer(worker, ctx)
    go func() {
        for {
            select {
            case <-ctx.Done():
                return
            default:
                err := kafkaClient.Consume(ctx, cfg.Consumer.Topic, consumer)
                if err != nil {
                    log.Printf("[main] client.Consume error=[%s]", err.Error())
                    time.Sleep(time.Second)
                }
            }
        }
    }()

    // os signal
    sigterm := make(chan os.Signal, 1)
    signal.Notify(sigterm, syscall.SIGINT, syscall.SIGTERM)

    //time.Sleep(time.Second * 4)
    sig := <-sigterm
    log.Printf("[main] os sig=[%v]", sig)

    cancel()
    log.Printf("[main] cancel")
    if err := kafkaClient.Close(); nil != err {
        log.Printf("[main] kafkaClient close error=[%s]", err.Error())
    }

    log.Printf("[main] beats quit")
}

func tool(cfg *Config, config *sarama.Config) bool {
    if initTopic || listTopic || len(delTopic) > 0 {
        ca, err := sarama.NewClusterAdmin(strings.Split(cfg.Consumer.Broker, ","), config)
        if nil != err {
            panic(err)
        }

        if len(delTopic) > 0 { // 删除Topic
            if err := ca.DeleteTopic(delTopic); nil != err {
                panic(err)
            }
            log.Printf("delete ok topic=[%s]\n", delTopic)
        } else if initTopic { // 初始化Topic
            if detail, err := ca.ListTopics(); nil != err {
                panic(err)
            } else {
                for _, v := range cfg.Consumer.Topic {
                    if d, ok := detail[v]; ok {
                        if cfg.Consumer.Partition > d.NumPartitions {
                            if err := ca.CreatePartitions(v, cfg.Consumer.Partition, nil, false); nil != err {
                                panic(err)
                            }
                            log.Println("alter topic ok", v, cfg.Consumer.Partition)
                        }

                    } else {
                        if err := ca.CreateTopic(v, &sarama.TopicDetail{NumPartitions: cfg.Consumer.Partition, ReplicationFactor: cfg.Consumer.Replication}, false); nil != err {
                            panic(err)
                        }
                        log.Println("create topic ok", v)
                    }
                }
            }
        }

        // 显示Topic列表
        if detail, err := ca.ListTopics(); nil != err {
            log.Println("ListTopics error", err)
        } else {
            for k := range detail {
                log.Printf("[%s] %+v", k, detail[k])
            }
        }

        if err := ca.Close(); nil != err {
            panic(err)
        }

        return true
    }
    return false
}

 

0x3 配置文件

<?xml version="1.0" encoding="utf-8"?>
<config>

  <consumer>
    <!-- Kafka cluster -->
    <broker>127.0.0.1:9092</broker>

    <!-- topic 可以配多个-->
    <topic>top1</topic>
    <topic>top2</topic>
    

    <!-- Kafka 分组 -->
    <group>test-group</group>

    <!-- Kafka 版本 -->
    <version>2.2.0</version>

    <!-- partition 个数,开consumer个数不能超过这个 -->
    <partition>16</partition>

    <!-- 副本因子 -->
    <replication>2</replication>
  </consumer>

  <elastic_url>http://127.0.0.1:9200</elastic_url>

  <elastic_worker>
    <!-- 最大缓冲 这个小点可以防止崩溃导致丢失太多-->
    <max_msg>2048</max_msg>

    <!-- 线程个数 -->
    <worker_number>1</worker_number>

    <!-- 每个批次最大数量 -->
    <batch_size>1024</batch_size>

    <!-- 处理频率(毫秒) -->
    <tick_millisecond>5000</tick_millisecond>
  </elastic_worker>


</config>

 

0x4 注意

1、如果你的ElasticSearch集群的配置足够高,你可以修改配置文件里的<worker_number>1</worker_number>给Worker开多协程,否则还是单协程性能更高一些。

2、可以适当调整<batch_size>1024</batch_size>每个批次的数量来提升写入性能。

3、如果报这个错误  EsRejectedExcutionException说明ES性能扛不住了,需要提升配置,降低写入量。

posted @ 2019-07-26 17:00  MrBlue  阅读(2227)  评论(0编辑  收藏  举报