flume 使用整合

flume 定义:
1,从节点使用 sources 指定数据与获取方式,使用channels, 然后 Sink到 主节点
2, 在主节点之中, sources 来源于从节点的sink,使用channels Sink 对象为 hbase 与 kafka,
本次使用 apache-flume-1.7.0-bin 安装
从节点 配置(另一个节点是 agent2)
#三个线程使用别名
agent3.sources = r1
agent3.channels = c1
agent3.sinks = s1

agent3.sources = r1
# 数据是使用的命令行
agent3.sources.r1.type = exec
# 数据来自于此命令行获取数据
agent3.sources.r1.command = tail -F /opt/datas/weblogs.log
# 数据源返回给 channels
agent3.sources.r1.channels = c1

agent3.channels.c1.type = memory
# 管道大小
agent3.channels.c1.capacity = 10000
#每次抓取数据的最大容量
agent3.channels.c1.transactionCapacity = 10000
# 当clannel 满时, sources 写入数据等待时间, 同理 sinks(clannel空时)一样
agent3.channels.keep-alive = 10000


agent3.sinks.k1.type = avro
agent3.sinks.k1.channel = c1
# sinks 不落地 推送给另一台 flume
agent3.sinks.k1.hostname = sr128
agent3.sinks.k1.port = 5555
View Code
主节点配置:
#三个线程使用别名
agent1.sources = r1
agent1.channels = kafkaC hbaseC
agent1.sinks = kafkaSink hbaseSink

# 配置接收端
agent1.sources.r1.type = avro
agent1.sources.r1.channels = hbaseC
agent1.sources.r1.bind = sr128
agent1.sources.r1.port = 5555
agent1.sources.threads = 5

# channels 类型
agent1.channels.c1.type = memory
agent1.channels.c1.capacity = 100000
agent1.channels.c1.transactionCapacity = 100000
agent1.channels.c1.keep-alive =20

# Sink 发送位置(编辑 jave 类)
agent1.sinks.hbaseSink.type = asynchbase
agent1.sinks.hbaseSink.table = weblogs
agent1.sinks.hbaseSink.columnFamily = info
agent1.sinks.hbaseSink.serializer =
agent1.sinks.hbaseSink.channel = hbaseC
# 指定数据项
agent1.sinks.hbaseSink.serializer.payloadColumn=datetime,userid,searchname.retorder,cliorder,cliurl
View Code
编辑 kafka 类 flume-ng-hbase-sink\src\main\java\org\apache\flume\sink\hbase
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.flume.sink.hbase;

import com.google.common.base.Charsets;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.FlumeException;
import org.apache.flume.conf.ComponentConfiguration;
import org.apache.flume.sink.hbase.SimpleHbaseEventSerializer.KeyType;
import org.hbase.async.AtomicIncrementRequest;
import org.hbase.async.PutRequest;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

/**
 * A simple serializer to be used with the AsyncHBaseSink
 * that returns puts from an event, by writing the event
 * body into it. The headers are discarded. It also updates a row in hbase
 * which acts as an event counter.
 *
 * Takes optional parameters:<p>
 * <tt>rowPrefix:</tt> The prefix to be used. Default: <i>default</i><p>
 * <tt>incrementRow</tt> The row to increment. Default: <i>incRow</i><p>
 * <tt>suffix:</tt> <i>uuid/random/timestamp.</i>Default: <i>uuid</i><p>
 *
 * Mandatory parameters: <p>
 * <tt>cf:</tt>Column family.<p>
 * Components that have no defaults and will not be used if absent:
 * <tt>payloadColumn:</tt> Which column to put payload in. If it is not present,
 * event data will not be written.<p>
 * <tt>incrementColumn:</tt> Which column to increment. If this is absent, it
 *  means no column is incremented.
 */
public class KfkAsyncHbaseEventSerializer implements AsyncHbaseEventSerializer {
    private byte[] table;
    private byte[] cf;
    private byte[] payload;        // v
    private byte[] payloadColumn;  // k
    private byte[] incrementColumn;
    private String rowPrefix;
    private byte[] incrementRow;
    private KeyType keyType;

    @Override
    public void initialize(byte[] table, byte[] cf) {
        this.table = table;
        this.cf = cf;
    }

    @Override
    public List<PutRequest> getActions() {
        List<PutRequest> actions = new ArrayList<PutRequest>();
        if (payloadColumn != null) {
            byte[] rowKey;
            try {
                String[] columns = String.valueOf(payloadColumn).split(",");
                String[] values = String.valueOf(this.payload).split(",");
                for(int i=0;i<columns.length;i++) {
                    byte[] colColum = columns[i].getBytes();
                    byte[] colvalues = values[i].getBytes(Charsets.UTF_8);
                    if(colColum.length !=colvalues.length) break; // 数据清洗
                    String datetime = values[0].toString();
                    String userid =values[1].toString();
                    rowKey = SimpleRowKeyGenerator.getKfkRowKey(userid,datetime);
                    PutRequest putRequest = new PutRequest(table, rowKey, cf,
                            colColum, colvalues);
                    actions.add(putRequest);
                }
            } catch (Exception e) {
                throw new FlumeException("Could not get row key!", e);
            }
        }
        return actions;
    }

    public List<AtomicIncrementRequest> getIncrements() {
        List<AtomicIncrementRequest> actions = new ArrayList<AtomicIncrementRequest>();
        if (incrementColumn != null) {
            AtomicIncrementRequest inc = new AtomicIncrementRequest(table,
                    incrementRow, cf, incrementColumn);
            actions.add(inc);
        }
        return actions;
    }

    @Override
    public void cleanUp() {
        // TODO Auto-generated method stub

    }

    @Override
    public void configure(Context context) {
        String pCol = context.getString("payloadColumn", "pCol");
        String iCol = context.getString("incrementColumn", "iCol");
        rowPrefix = context.getString("rowPrefix", "default");
        String suffix = context.getString("suffix", "uuid");
        if (pCol != null && !pCol.isEmpty()) {
            if (suffix.equals("timestamp")) {
                keyType = KeyType.TS;
            } else if (suffix.equals("random")) {
                keyType = KeyType.RANDOM;
            } else if (suffix.equals("nano")) {
                keyType = KeyType.TSNANO;
            } else {
                keyType = KeyType.UUID;
            }
            payloadColumn = pCol.getBytes(Charsets.UTF_8);
        }
        if (iCol != null && !iCol.isEmpty()) {
            incrementColumn = iCol.getBytes(Charsets.UTF_8);
        }
        incrementRow = context.getString("incrementRow", "incRow").getBytes(Charsets.UTF_8);
    }

    @Override
    public void setEvent(Event event) {
        this.payload = event.getBody();
    }

    @Override
    public void configure(ComponentConfiguration conf) {
        // TODO Auto-generated method stub
    }

}
View Code
增加函数处理 SimpleHbaseEventSerializer
  public static byte[] getKfkRowKey(String userid, String datetime) throws UnsupportedEncodingException {
    return (userid + datetime + String.valueOf(System.currentTimeMillis())).getBytes("UTF8");
  }
View Code

 

posted @ 2019-05-28 23:50  十七楼的羊  阅读(161)  评论(0编辑  收藏  举报