1. 添加maven依赖
一、pom.xml
| <?xml version="1.0" encoding="UTF-8"?> |
| <project xmlns="http://maven.apache.org/POM/4.0.0" |
| xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
| <modelVersion>4.0.0</modelVersion> |
| |
| <groupId>org.example</groupId> |
| <artifactId>hive_function</artifactId> |
| <version>1.0-SNAPSHOT</version> |
| |
| <dependencies> |
| <dependency> |
| <groupId>org.apache.hive</groupId> |
| <artifactId>hive-exec</artifactId> |
| <version>2.3.8</version> |
| |
| <exclusions> |
| <exclusion> |
| <groupId>org.pentaho</groupId> |
| <artifactId>pentaho-aggdesigner-algorithm</artifactId> |
| </exclusion> |
| </exclusions> |
| </dependency> |
| </dependencies> |
| </project> |
二、自定义一个length()函数
| package udf; |
| |
| import org.apache.hadoop.hive.ql.exec.UDFArgumentException; |
| import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; |
| import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; |
| import org.apache.hadoop.hive.ql.metadata.HiveException; |
| import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; |
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; |
| import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; |
| import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; |
| import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; |
| |
| |
| |
| |
| |
| |
| public class MyLength extends GenericUDF { |
| |
| |
| |
| |
| |
| |
| public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { |
| |
| if (arguments.length != 1) { |
| throw new UDFArgumentLengthException("there can be at most one parameter"); |
| } |
| |
| |
| ObjectInspector inspector = arguments[0]; |
| if (!(inspector instanceof StringObjectInspector)) { |
| throw new UDFArgumentTypeException(0, "paramter type is not string"); |
| } |
| |
| |
| return PrimitiveObjectInspectorFactory.javaIntObjectInspector; |
| } |
| |
| |
| |
| |
| |
| |
| |
| public Object evaluate(DeferredObject[] arguments) throws HiveException { |
| DeferredObject argument = arguments[0]; |
| Object value = argument.get(); |
| String str = value.toString(); |
| return str.length(); |
| } |
| |
| public String getDisplayString(String[] strings) { |
| return null; |
| } |
| } |
三、打jar包


四、创建临时函数
| hive (test)> create temporary function mylength as "udf.MyLength" using jar "hdfs://node1:9000/hive_function-1.0-SNAPSHOT.jar"; |
| Added [/tmp/ce2bebf9-ddf7-443f-badd-2d4f147960bf_resources/hive_function-1.0-SNAPSHOT.jar] to class path |
| Added resources: [hdfs://node1:9000/hive_function-1.0-SNAPSHOT.jar] |
| OK |
| Time taken: 0.995 seconds |
| |
| |
| hive (test)> show functions; |
| OK |
| tab_name |
| |
| |
| hive (test)> desc function extended mylength; |
| OK |
| tab_name |
| There is no documentation for function 'mylength' |
| Function class:udf.MyLength |
| Function type:TEMPORARY |
| Resource:hdfs://node1:9000/hive_function-1.0-SNAPSHOT.jar |
| Time taken: 0.008 seconds, Fetched: 4 row(s) |
| |
| |
| hive (test)> show create table page_ad; |
| OK |
| createtab_stmt |
| CREATE TABLE `page_ad`( |
| `page` string, |
| `aids` string) |
| ROW FORMAT SERDE |
| 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' |
| WITH SERDEPROPERTIES ( |
| 'field.delim'=',', |
| 'line.delim'='\n', |
| 'serialization.format'=',') |
| STORED AS INPUTFORMAT |
| 'org.apache.hadoop.mapred.TextInputFormat' |
| OUTPUTFORMAT |
| 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' |
| LOCATION |
| 'hdfs://node1:9000/user/hive/warehouse/test.db/page_ad' |
| TBLPROPERTIES ( |
| 'transient_lastDdlTime'='1659523482') |
| Time taken: 0.108 seconds, Fetched: 17 row(s) |
| |
| |
| hive (test)> select mylength(aids) from page_ad; |
| OK |
| _c0 |
| 13 |
| 16 |
| 19 |
| Time taken: 0.38 seconds, Fetched: 3 row(s) |
五、创建永久函数
| hive (default)> create function mylength as "udf.MyLength" using jar "hdfs://node1:9000/hive_function-1.0-SNAPSHOT.jar"; |
| Added [/tmp/ac2a0a66-3121-4e8e-8faf-039a09419d7c_resources/hive_function-1.0-SNAPSHOT.jar] to class path |
| Added resources: [hdfs: |
| OK |
| Time taken: 0.396 seconds |
注意
- 临时函数:只有当前连接会话可以使用,重建一个hive的连接会话,临时函数失效
- 永久函数:不管连接多少次,函数都可以使用,临时函数可以使用show functions查询,永久函数无法使用改命令查询
但是可以在hive的元数据库的FUNCS表中找到我们自己新建的永久函数
- 临时函数和永久函数都是和数据库挂钩的,如果在别的数据库中使用这个自定义函数,必须带上数据库的名字
| # 在default数据库中创建的永久函数 |
| hive (default)> create function mylength as "udf.MyLength" using jar "hdfs://node1:9000/hive_function-1.0-SNAPSHOT.jar"; |
| |
| |
| hive (default)> select mylength(name) from student_partition; |
| OK |
| _c0 |
| Time taken: 1.421 seconds |
| |
| |
| hive (default)> use test; |
| OK |
| Time taken: 0.02 seconds |
| |
| |
| hive (test)> select mylength(aids) from page_ad; |
| FAILED: SemanticException [Error 10011]: Invalid function mylength |
| |
| |
| # 必须带上数据库的名字 |
| hive (test)> select default.mylength(aids) from page_ad; |
| OK |
| _c0 |
| 13 |
| 16 |
| 19 |
| Time taken: 0.154 seconds, Fetched: 3 row(s) |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?