自定义flume的hbase sink 的序列化程序

package com.hello.hbase;

import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;

import org.apache.commons.lang.RandomStringUtils;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.FlumeException;
import org.apache.flume.conf.ComponentConfiguration;
import org.apache.flume.sink.hbase.HbaseEventSerializer;
import org.apache.hadoop.hbase.client.Increment;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Row;
import com.google.common.base.Charsets;
import com.google.common.collect.Lists;


public class FlumeHbaseEventSerializer implements HbaseEventSerializer {
  
    // Config vars  
    /** Regular expression used to parse groups from event data. */  
    public static final String REGEX_CONFIG = "regex";  
    public static final String REGEX_DEFAULT = " ";  
    /** Whether to ignore case when performing regex matches. */  
    public static final String IGNORE_CASE_CONFIG = "regexIgnoreCase";  
    public static final boolean INGORE_CASE_DEFAULT = false;  
    /** Comma separated list of column names to place matching groups in. */  
    public static final String COL_NAME_CONFIG = "colNames";  
    public static final String COLUMN_NAME_DEFAULT = "ip";  
    /** Index of the row key in matched regex groups */  
    public static final String ROW_KEY_INDEX_CONFIG = "rowKeyIndex";  
    /** Placeholder in colNames for row key */  
    public static final String ROW_KEY_NAME = "ROW_KEY";  
    /** Whether to deposit event headers into corresponding column qualifiers */  
    public static final String DEPOSIT_HEADERS_CONFIG = "depositHeaders";  
    public static final boolean DEPOSIT_HEADERS_DEFAULT = false;  
    /** What charset to use when serializing into HBase's byte arrays */  
    public static final String CHARSET_CONFIG = "charset";  
    public static final String CHARSET_DEFAULT = "UTF-8";  
    /* 
     * This is a nonce used in HBase row-keys, such that the same row-key never 
     * gets written more than once from within this JVM. 
     */  
    protected static final AtomicInteger nonce = new AtomicInteger(0);  
    protected static String randomKey = RandomStringUtils.randomAlphanumeric(10);  
    protected byte[] cf;  
    private byte[] payload;  
    private List<byte[]> colNames = Lists.newArrayList();  
    private boolean regexIgnoreCase;  
    private Charset charset;  
    @Override  
    public void configure(Context context) {  
        String regex = context.getString(REGEX_CONFIG, REGEX_DEFAULT);  
        regexIgnoreCase = context.getBoolean(IGNORE_CASE_CONFIG, INGORE_CASE_DEFAULT);  
        context.getBoolean(DEPOSIT_HEADERS_CONFIG, DEPOSIT_HEADERS_DEFAULT);  
        Pattern.compile(regex, Pattern.DOTALL + (regexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));  
        charset = Charset.forName(context.getString(CHARSET_CONFIG, CHARSET_DEFAULT));  
  
        String cols = new String(context.getString("columns"));  
        String colNameStr;  
        if (cols != null && !"".equals(cols)) {  
            colNameStr = cols;  
        } else {  
            colNameStr = context.getString(COL_NAME_CONFIG, COLUMN_NAME_DEFAULT);  
        }  
  
        String[] columnNames = colNameStr.split(",");  
        for (String s : columnNames) {  
            colNames.add(s.getBytes(charset));  
        }  
    }  
    
    @Override  
    public void configure(ComponentConfiguration conf) {}  
  
    @Override  
    public void initialize(Event event, byte[] columnFamily) {  
        event.getHeaders();  
        this.payload = event.getBody();  
        this.cf = columnFamily;  
    }  
    
    protected byte[] getRowKey(Calendar cal) {  
        String str = new String(payload, charset);  
        String tmp = str.replace("\"", "");  
        String[] arr = tmp.split(" ");  
        String log_data = arr[4];
        String[] param_arr = log_data.split("&");
        String userid = param_arr[0];
        String itemid = param_arr[1];
        String type = param_arr[2];
        String ip_str = param_arr[3];
        
//        String dataStr = arr[3].replace("[", "");  
//        String rowKey = getDate2Str(dataStr) + "-" + clientIp + "-" + nonce.getAndIncrement();
        String rowKey = ip_str + "-" + nonce.getAndIncrement();
        
        return rowKey.getBytes(charset);  
    }  
  
    protected byte[] getRowKey() {  
        return getRowKey(Calendar.getInstance());  
    }  

    @Override  
    public List<Row> getActions() throws FlumeException {  
        List<Row> actions = Lists.newArrayList();  
        byte[] rowKey;  
  
        String body = new String(payload, charset);  
        String tmp = body.replace("\"", "");  
//        String[] arr = tmp.split(REGEX_DEFAULT); 
        String[] arr = tmp.split(" ");
        
        String log_data = arr[4];
        String[] param_arr = log_data.split("&");
        
        String userid = param_arr[0].split("=")[1];
        String itemid = param_arr[1].split("=")[1];
        String type = param_arr[2].split("=")[1];
        String ip_str = param_arr[3].split("=")[1];
                      
        System.out.println("===========");
        System.out.println("===========");
        System.out.println("===========");
        System.out.println("===========");
        System.out.println(userid);
        System.out.println(itemid);
        System.out.println(type);
        System.out.println(ip_str);
        System.out.println("===========");
        System.out.println("===========");
        System.out.println("===========");
        System.out.println("===========");
         
        try {  
            rowKey = getRowKey();
            Put put = new Put(rowKey);  
            put.add(cf, colNames.get(0), userid.getBytes(Charsets.UTF_8));  
            put.add(cf, colNames.get(1), itemid.getBytes(Charsets.UTF_8));  
            put.add(cf, colNames.get(2), type.getBytes(Charsets.UTF_8));
            put.add(cf, colNames.get(3), ip_str.getBytes(Charsets.UTF_8));
            actions.add(put);  
        } catch (Exception e) {  
            throw new FlumeException("Could not get row key!", e);  
        }  
        return actions;  
    }  
  
    @Override  
    public List<Increment> getIncrements() {  
        return Lists.newArrayList();  
    }  
  
    @Override  
    public void close() {}  
  
    public static String getDate2Str(String dataStr) {  
        SimpleDateFormat formatter = null;  
        SimpleDateFormat format = null;  
        Date date = null;  
        try {  
            formatter = new SimpleDateFormat("dd/MMM/yyyy:hh:mm:ss", Locale.ENGLISH);  
            date = formatter.parse(dataStr);  
            format = new SimpleDateFormat("yyyy-MM-dd-HH:mm:ss");  
        } catch (Exception e) {  
            e.printStackTrace();  
        }  
  
        return format.format(date);  
    }  
}

 

posted on 2018-07-05 11:09  NothingLZ  阅读(381)  评论(0编辑  收藏  举报

导航