Hive自定义函数

  • 依赖
    <!-- 相关依赖 -->
     <dependency>
          <groupId>org.apache.hive</groupId>
          <artifactId>hive-exec</artifactId>
          <version>2.1.1-cdh6.2.1</version>
     </dependency>
     <dependency>
          <groupId>org.apache.hadoop</groupId>
          <artifactId>hadoop-common</artifactId>
          <version>3.0.0-cdh6.2.1</version>
     </dependency>
     <dependency>
          <groupId>com.github.jarod</groupId>
          <artifactId>qqwry-java</artifactId>
          <version>0.9.0</version>
     </dependency>

     

  • UDF
    这里以IP地址转换为例,具体的代码逻辑如下:
    import com.github.jarod.qqwry.IPZone;
    import com.github.jarod.qqwry.QQWry;
    import org.apache.commons.lang3.StringUtils;
    import org.apache.hadoop.hive.ql.exec.UDF;
    
    import java.io.IOException;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    
    /**
     * @author Shydow
     * @date 2021-08-18
     */
    public class Ip2Region extends UDF {
    
        private QQWry qqWry;
    
        public Ip2Region() throws IOException {
            qqWry = new QQWry();
        }
    
        public String evaluate(String ip) {
            if (isIp(ip)) {
                IPZone ipZone = qqWry.findIP(ip);
                return ipZone.getMainInfo();
            }
            return null;
        }
    
        private static boolean isIp(String ip) {
            if (ip.length() < 7 || ip.length() > 15 || StringUtils.isBlank(ip)) {
                return false;
            }
            String rule = "([1-9]|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])(\\.(\\d|[1-9]\\d|1\\d{2}|2[0-4]\\d|25[0-5])){3}";
            Pattern pat = Pattern.compile(rule);
            Matcher mat = pat.matcher(ip);
            return mat.find();
        }
    }

     

  • UDAF
    package org.shydow.hadoop.hive;
    
    
    import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
    import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression;
    import org.apache.hadoop.hive.ql.metadata.HiveException;
    import org.apache.hadoop.hive.ql.parse.SemanticException;
    import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver;
    import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
    import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
    import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
    import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
    import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
    import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
    import org.apache.hadoop.io.LongWritable;
    
    /**
     * @author Rainbow
     * @date 2021/11/23 21:20
     * @desc 求某个字段长度大于30的个数
     */
    public class DefaultUDAFDemo extends AbstractGenericUDAFResolver {
    
        /**
         * 参数检查:这里如果传入的参数长度不为2则报错
         *
         * @param params 参数列表
         * @return
         * @throws SemanticException
         */
        @Override
        public GenericUDAFEvaluator getEvaluator(TypeInfo[] params) throws SemanticException {
            if (params.length != 2) {
                throw new UDFArgumentTypeException(params.length - 1, "Exactly two argument is expected");
            }
            return new GenericUDAFBigThanEvaluator(); // 需要返回逻辑处理类
        }
    
        /**
         * 逻辑处理类
         */
        public static class GenericUDAFBigThanEvaluator extends GenericUDAFEvaluator {
    
            private LongWritable result;
            private PrimitiveObjectInspector input01;
            private PrimitiveObjectInspector input02;
    
            // 初始化阶段:map和reduce阶段都会执行
            @Override
            public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
                result = new LongWritable(0);
                input01 = (PrimitiveObjectInspector) parameters[0];
                input02 = (PrimitiveObjectInspector) parameters[1];
                return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
            }
    
            // 存放聚合值的buffer
            @Override
            public AggregationBuffer getNewAggregationBuffer() throws HiveException {
                CountAgg agg = new CountAgg();
                reset(agg);
                return agg;
            }
    
            // 缓存对象初始化
            @Override
            public void reset(AggregationBuffer agg) throws HiveException {
                CountAgg countAgg = (CountAgg) agg;
                countAgg.count = 0;
            }
    
            // 具体逻辑
            @Override
            public void iterate(AggregationBuffer aggregationBuffer, Object[] objects) throws HiveException {
                assert (objects.length == 2);
                if (objects == null || objects[0] == null || objects[1] == null) {
                    return;
                }
                double base = PrimitiveObjectInspectorUtils.getDouble(objects[0], input01);
                double tmp = PrimitiveObjectInspectorUtils.getDouble(objects[1], input02);
                if (base > tmp) {
                    ((CountAgg) aggregationBuffer).count++;
                }
            }
    
            @Override
            public Object terminatePartial(AggregationBuffer aggregationBuffer) throws HiveException {
                result.set(((CountAgg) aggregationBuffer).count);
                return result;
            }
    
            @Override
            public void merge(AggregationBuffer aggregationBuffer, Object o) throws HiveException {
                if (o != null) {
                    long l = PrimitiveObjectInspectorUtils.getLong(o, input01);
                    ((CountAgg) aggregationBuffer).count += l;
                }
            }
    
            @Override
            public Object terminate(AggregationBuffer aggregationBuffer) throws HiveException {
                result.set(((CountAgg) aggregationBuffer).count);
                return result;
            }
    
            public class CountAgg implements AggregationBuffer {
                long count;
            }
        }
    
    }

     

posted @ 2021-11-22 15:47  Shydow  阅读(44)  评论(0编辑  收藏  举报