08 友盟项目--拆分日志为五个表---UDTF自定义函数

UDTF处理json
添加依赖
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>2.1.0</version>
        </dependency>
  
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.24</version>
        </dependency>

 

 
 
 
仿照这个GenericUDTFExlode函数源码来写自定义UTDF,
因为自定义函数继承的类为GenericUDTF
 
 
 
自定义的UTDF代码

 自定义叉分函数

表生成函数

  1 package com.oldboy.umeng.hive.udtf;
  2 
  3 import com.alibaba.fastjson.JSONObject;
  4 import com.oldboy.umeng.common.domain.AppLogAggEntity;
  5 import com.oldboy.umeng.common.domain.AppStartupLog;
  6 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
  7 import org.apache.hadoop.hive.ql.metadata.HiveException;
  8 import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
  9 import org.apache.hadoop.hive.serde2.objectinspector.*;
 10 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 11 
 12 import java.beans.BeanInfo;
 13 import java.beans.Introspector;
 14 import java.beans.PropertyDescriptor;
 15 import java.lang.reflect.Method;
 16 import java.lang.reflect.ParameterizedType;
 17 import java.util.ArrayList;
 18 import java.util.List;
 19 
 20 /**
 21  * 自定义叉分函数
 22  * 表生成函数
 23  * fork(servertimestr , clienttimems , clientip ,json)
 24  */
 25 public abstract class BaseForkUDTF<T> extends GenericUDTF {
 26 
 27     private Class<T> clazz;
 28 
 29     private ObjectInspectorConverters.Converter[] converters ;
 30     //字段名称列表
 31     List<String> fieldNames = null ;
 32     //检查器列表
 33     List<ObjectInspector> ois = null ;
 34 
 35     //通过构造函数抽取子类的泛型化超类部分
 36     public BaseForkUDTF(){
 37         ParameterizedType type = (ParameterizedType) this.getClass().getGenericSuperclass();
 38         clazz = (Class) type.getActualTypeArguments()[0];
 39     }
 40 
 41     /**
 42      * 校验参数合规性
 43      */
 44     public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
 45 
 46         //字段名称集合
 47         fieldNames = new ArrayList<String>() ;
 48 
 49         //对象检查器集合
 50         List<ObjectInspector> ois = new ArrayList<ObjectInspector>() ;
 51 
 52         if (args.length != 4) {
 53             throw new UDFArgumentException("fork()需要4个参数!!!");
 54         }
 55         //判断参数的类型
 56         //1.string
 57         if(args[0].getCategory()!= ObjectInspector.Category.PRIMITIVE
 58                    || ((PrimitiveObjectInspector)args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING){
 59             throw new UDFArgumentException("参数{1}不是string类型!!!");
 60         }
 61         //2.bigint
 62         if (args[1].getCategory() != ObjectInspector.Category.PRIMITIVE || ((PrimitiveObjectInspector) args[1]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.LONG) {
 63             throw new UDFArgumentException("参数{2}不是bigint类型!!!");
 64         }
 65         //3.string
 66         if (args[2].getCategory() != ObjectInspector.Category.PRIMITIVE || ((PrimitiveObjectInspector) args[2]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
 67             throw new UDFArgumentException("参数{3}不是string类型!!!");
 68         }
 69         //4.string
 70         if (args[3].getCategory() != ObjectInspector.Category.PRIMITIVE || ((PrimitiveObjectInspector) args[3]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
 71             throw new UDFArgumentException("参数{4}不是string类型!!!");
 72         }
 73 
 74         //类型转换器
 75         converters = new ObjectInspectorConverters.Converter[args.length];
 76         //保持每个参数对应的转换器
 77         converters[0] = ObjectInspectorConverters.getConverter(args[0] , PrimitiveObjectInspectorFactory.javaStringObjectInspector) ;
 78         converters[1] = ObjectInspectorConverters.getConverter(args[1] , PrimitiveObjectInspectorFactory.javaLongObjectInspector) ;
 79         converters[2] = ObjectInspectorConverters.getConverter(args[2] , PrimitiveObjectInspectorFactory.javaStringObjectInspector) ;
 80         converters[3] = ObjectInspectorConverters.getConverter(args[3] , PrimitiveObjectInspectorFactory.javaStringObjectInspector) ;
 81 
 82         try {
 83             popOIS(fieldNames , ois) ;
 84         } catch (Exception e) {
 85             e.printStackTrace();
 86         }
 87         //返回结构体对象检查器
 88         return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames , ois) ;
 89     }
 90 
 91     /**
 92      * 组装对象检查器
 93      * 将字段名称和对象检查器集合同步组装完成
 94      * 每个字段都对应各自的对象检查器(ObjectInspector)
 95      */
 96     private void popOIS(List<String> fieldNames, List<ObjectInspector> ois) throws Exception {
 97         //获取clazz类的bean信息
 98         BeanInfo bi = Introspector.getBeanInfo(clazz) ;
 99 
100         //得到所有属性
101         PropertyDescriptor[] pps = bi.getPropertyDescriptors();
102 
103         for(PropertyDescriptor pp :pps){
104             String name = pp.getName() ;
105             Class type = pp.getPropertyType() ;
106             Method get = pp.getReadMethod() ;
107             Method set = pp.getWriteMethod() ;
108             //
109             if(get != null && set != null){
110                 if(type == Long.class || type == long.class){
111                     fieldNames.add(name) ;
112                     ois.add(PrimitiveObjectInspectorFactory.javaLongObjectInspector) ;
113                 }
114                 else if(type == int.class || type ==Integer.class){
115                     fieldNames.add(name);
116                     ois.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
117                 }
118                 else if(type == String.class){
119                     fieldNames.add(name);
120                     ois.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
121                 }
122             }
123         }
124     }
125 
126     public void process(Object[] args) throws HiveException {
127         //检查一下参数的个数有效性
128         if (args.length != 4) {
129             throw new UDFArgumentException("fork()需要4个参数!!!");
130         }
131 
132         String servertimestr = (String) converters[0].convert(args[0]);
133         Long clienttimems = (Long) converters[1].convert(args[1]);
134         String clientip = (String) converters[2].convert(args[2]);
135         String json = (String) converters[3].convert(args[3]);
136 
137         //替换\"为"
138         json = json.replace("\\\"" , "\"") ;
139 
140         //解析json,返回日志聚合体对象
141         AppLogAggEntity agg = JSONObject.parseObject(json , AppLogAggEntity.class) ;
142         //TODO 时间对齐
143         List<T> logs = getLogs(agg) ;
144 
145         //外层for循环决定行数
146         for(Object log : logs){
147             Object[] arr = new Object[fieldNames.size()] ;
148             int i = 0 ;
149             //内层for循环决定列数,顺序和filedname顺序相同
150             for(String fname : fieldNames){
151                 try {
152                     PropertyDescriptor pp = new PropertyDescriptor(fname , clazz) ;
153                     Method get = pp.getReadMethod() ;
154                     if(get != null){
155                         Object retValue = get.invoke(log) ;
156                         arr[i] = retValue ;
157                     }
158                     i ++ ;
159                 } catch (Exception e) {
160                     e.printStackTrace();
161                 }
162             }
163             //转发对象,就是输出一行
164             forward(arr);
165         }
166     }
167 
168     //抽象方法,子类必须重写
169     public abstract List<T> getLogs(AppLogAggEntity agg) ;
170 
171     public void close() throws HiveException {
172 
173     }
174 }
BaseForkUDTF

错误日志Errorlogs生成函数  继承了  BaseForkUDTF

/**
 * 叉分错误日志
 */
public class ForkErrorlogsUDTF extends BaseForkUDTF<AppErrorLog>{
    public List<AppErrorLog> getLogs(AppLogAggEntity agg) {
        return agg.getErrorLogs();
    }
}

事件日志EventLogs生成函数  继承了  BaseForkUDTF

/**
 * 叉分事件日志
 */
public class ForkEventlogsUDTF extends BaseForkUDTF<AppEventLog>{
    public List<AppEventLog> getLogs(AppLogAggEntity agg) {
        return agg.getEventLogs();
    }
}

 

页面日志PageLogs生成函数  继承了  BaseForkUDTF

/**
 * 叉分页面日志
 */
public class ForkPagelogsUDTF extends BaseForkUDTF<AppPageLog>{
    public List<AppPageLog> getLogs(AppLogAggEntity agg) {
        return agg.getPageLogs();
    }
}

 

 

开启日志StartupLogs生成函数  继承了  BaseForkUDTF

/**
 * 叉分启动日志
 */
public class ForkStartuplogsUDTF extends BaseForkUDTF<AppStartupLog>{
    public List<AppStartupLog> getLogs(AppLogAggEntity agg) {
        return agg.getStartupLogs();
    }
}

 

页面使用日志UsageLogs生成函数  继承了  BaseForkUDTF

 
/**
 * 叉分页面使用日志
 */
public class ForkUsagelogsUDTF extends BaseForkUDTF<AppUsageLog>{
    public List<AppUsageLog> getLogs(AppLogAggEntity agg) {
        return agg.getUsageLogs();
    }
}

 

 
 
 
 
 
posted @ 2018-10-31 00:19  star521  阅读(358)  评论(0编辑  收藏  举报