flume 使用整合
flume 定义:
1,从节点使用 sources 指定数据与获取方式,使用channels, 然后 Sink到 主节点
2, 在主节点之中, sources 来源于从节点的sink,使用channels Sink 对象为 hbase 与 kafka,
本次使用 apache-flume-1.7.0-bin 安装
从节点 配置(另一个节点是 agent2)
#三个线程使用别名 agent3.sources = r1 agent3.channels = c1 agent3.sinks = s1 agent3.sources = r1 # 数据是使用的命令行 agent3.sources.r1.type = exec # 数据来自于此命令行获取数据 agent3.sources.r1.command = tail -F /opt/datas/weblogs.log # 数据源返回给 channels agent3.sources.r1.channels = c1 agent3.channels.c1.type = memory # 管道大小 agent3.channels.c1.capacity = 10000 #每次抓取数据的最大容量 agent3.channels.c1.transactionCapacity = 10000 # 当clannel 满时, sources 写入数据等待时间, 同理 sinks(clannel空时)一样 agent3.channels.keep-alive = 10000 agent3.sinks.k1.type = avro agent3.sinks.k1.channel = c1 # sinks 不落地 推送给另一台 flume agent3.sinks.k1.hostname = sr128 agent3.sinks.k1.port = 5555
主节点配置:
#三个线程使用别名 agent1.sources = r1 agent1.channels = kafkaC hbaseC agent1.sinks = kafkaSink hbaseSink # 配置接收端 agent1.sources.r1.type = avro agent1.sources.r1.channels = hbaseC agent1.sources.r1.bind = sr128 agent1.sources.r1.port = 5555 agent1.sources.threads = 5 # channels 类型 agent1.channels.c1.type = memory agent1.channels.c1.capacity = 100000 agent1.channels.c1.transactionCapacity = 100000 agent1.channels.c1.keep-alive =20 # Sink 发送位置(编辑 jave 类) agent1.sinks.hbaseSink.type = asynchbase agent1.sinks.hbaseSink.table = weblogs agent1.sinks.hbaseSink.columnFamily = info agent1.sinks.hbaseSink.serializer = agent1.sinks.hbaseSink.channel = hbaseC # 指定数据项 agent1.sinks.hbaseSink.serializer.payloadColumn=datetime,userid,searchname.retorder,cliorder,cliurl
编辑 kafka 类 flume-ng-hbase-sink\src\main\java\org\apache\flume\sink\hbase
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.flume.sink.hbase; import com.google.common.base.Charsets; import org.apache.flume.Context; import org.apache.flume.Event; import org.apache.flume.FlumeException; import org.apache.flume.conf.ComponentConfiguration; import org.apache.flume.sink.hbase.SimpleHbaseEventSerializer.KeyType; import org.hbase.async.AtomicIncrementRequest; import org.hbase.async.PutRequest; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; /** * A simple serializer to be used with the AsyncHBaseSink * that returns puts from an event, by writing the event * body into it. The headers are discarded. It also updates a row in hbase * which acts as an event counter. * * Takes optional parameters:<p> * <tt>rowPrefix:</tt> The prefix to be used. Default: <i>default</i><p> * <tt>incrementRow</tt> The row to increment. Default: <i>incRow</i><p> * <tt>suffix:</tt> <i>uuid/random/timestamp.</i>Default: <i>uuid</i><p> * * Mandatory parameters: <p> * <tt>cf:</tt>Column family.<p> * Components that have no defaults and will not be used if absent: * <tt>payloadColumn:</tt> Which column to put payload in. If it is not present, * event data will not be written.<p> * <tt>incrementColumn:</tt> Which column to increment. If this is absent, it * means no column is incremented. */ public class KfkAsyncHbaseEventSerializer implements AsyncHbaseEventSerializer { private byte[] table; private byte[] cf; private byte[] payload; // v private byte[] payloadColumn; // k private byte[] incrementColumn; private String rowPrefix; private byte[] incrementRow; private KeyType keyType; @Override public void initialize(byte[] table, byte[] cf) { this.table = table; this.cf = cf; } @Override public List<PutRequest> getActions() { List<PutRequest> actions = new ArrayList<PutRequest>(); if (payloadColumn != null) { byte[] rowKey; try { String[] columns = String.valueOf(payloadColumn).split(","); String[] values = String.valueOf(this.payload).split(","); for(int i=0;i<columns.length;i++) { byte[] colColum = columns[i].getBytes(); byte[] colvalues = values[i].getBytes(Charsets.UTF_8); if(colColum.length !=colvalues.length) break; // 数据清洗 String datetime = values[0].toString(); String userid =values[1].toString(); rowKey = SimpleRowKeyGenerator.getKfkRowKey(userid,datetime); PutRequest putRequest = new PutRequest(table, rowKey, cf, colColum, colvalues); actions.add(putRequest); } } catch (Exception e) { throw new FlumeException("Could not get row key!", e); } } return actions; } public List<AtomicIncrementRequest> getIncrements() { List<AtomicIncrementRequest> actions = new ArrayList<AtomicIncrementRequest>(); if (incrementColumn != null) { AtomicIncrementRequest inc = new AtomicIncrementRequest(table, incrementRow, cf, incrementColumn); actions.add(inc); } return actions; } @Override public void cleanUp() { // TODO Auto-generated method stub } @Override public void configure(Context context) { String pCol = context.getString("payloadColumn", "pCol"); String iCol = context.getString("incrementColumn", "iCol"); rowPrefix = context.getString("rowPrefix", "default"); String suffix = context.getString("suffix", "uuid"); if (pCol != null && !pCol.isEmpty()) { if (suffix.equals("timestamp")) { keyType = KeyType.TS; } else if (suffix.equals("random")) { keyType = KeyType.RANDOM; } else if (suffix.equals("nano")) { keyType = KeyType.TSNANO; } else { keyType = KeyType.UUID; } payloadColumn = pCol.getBytes(Charsets.UTF_8); } if (iCol != null && !iCol.isEmpty()) { incrementColumn = iCol.getBytes(Charsets.UTF_8); } incrementRow = context.getString("incrementRow", "incRow").getBytes(Charsets.UTF_8); } @Override public void setEvent(Event event) { this.payload = event.getBody(); } @Override public void configure(ComponentConfiguration conf) { // TODO Auto-generated method stub } }
增加函数处理 SimpleHbaseEventSerializer
public static byte[] getKfkRowKey(String userid, String datetime) throws UnsupportedEncodingException { return (userid + datetime + String.valueOf(System.currentTimeMillis())).getBytes("UTF8"); }