udtf一进多出案例2

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>hive_function</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>2.3.8</version>
<!--排除依赖中的某个jar包-->
<exclusions>
<exclusion>
<groupId>org.pentaho</groupId>
<artifactId>pentaho-aggdesigner-algorithm</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.62</version>
</dependency>
</dependencies>
</project>

java编写

package udtf;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
import java.util.List;
public class JSONDataParseUDTF extends GenericUDTF {
@Override
public StructObjectInspector initialize(StructObjectInspector argOIs) throws UDFArgumentException {
List<String> columnNames = new ArrayList<String>();
columnNames.add("name");
columnNames.add("age");
columnNames.add("sex");
columnNames.add("phone");
List<ObjectInspector> objectInspectors = new ArrayList<ObjectInspector>();
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, objectInspectors);
}
public void process(Object[] args) throws HiveException {
String jsonStr = args[0].toString();
JSONObject jsonObject = JSON.parseObject(jsonStr);
List<Object> line = new ArrayList<Object>();
line.add(jsonObject.get("name"));
line.add(jsonObject.getInteger("age"));
line.add(jsonObject.get("sex"));
line.add(jsonObject.get("phone"));
forward(line);
}
public void close() throws HiveException {
}
}

打jar包

因为这里有fastjson这个第三方工具包,所以不能用maven的方式导包







在shell中操作

数据
[root@node1 data]# cat aa.txt
{"name":"zs","age":20,"sex":"man","phone":"13888888888"}
{"name":"ls","age":21,"sex":"woman","phone":"13123148888"}
{"name":"ww","age":22,"sex":"man","phone":"1388883456"}
{"name":"ml","age":23,"sex":"woman","phone":"1388883456"}
{"name":"zb","age":24,"sex":"man","phone":"1388885678"}
{"name":"wb","age":25,"sex":"woman","phone":"13888343488"}
{"name":"lb","age":26,"sex":"man","phone":"1388881188"}
hive (default)> create temporary function parse_json_data as "udtf.JSONDataParseUDTF" using jar "hdfs://node1:9000/hive_function-1.0-SNAPSHOT.jar";
Added [/tmp/d0389ef2-b8cf-4739-b438-d9c595057ef5_resources/hive_function-1.0-SNAPSHOT.jar] to class path
Added resources: [hdfs://node1:9000/hive_function-1.0-SNAPSHOT.jar]
FAILED: Class udtf.JSONDataParseUDTF not found
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.FunctionTask
hive (default)> create table user_info(userjson string) row format delimited fields terminated by '\n';
OK
Time taken: 4.959 seconds
hive (default)> load data local inpath "/opt/data/aa.txt" into table user_info;
Loading data to table default.user_info
OK
Time taken: 2.66 seconds
hive (default)> select * from user_info;
OK
user_info.userjson
{"name":"zs","age":20,"sex":"man","phone":"13888888888"}
{"name":"ls","age":21,"sex":"woman","phone":"13123148888"}
{"name":"ww","age":22,"sex":"man","phone":"1388883456"}
{"name":"ml","age":23,"sex":"woman","phone":"1388883456"}
{"name":"zb","age":24,"sex":"man","phone":"1388885678"}
{"name":"wb","age":25,"sex":"woman","phone":"13888343488"}
{"name":"lb","age":26,"sex":"man","phone":"1388881188"}
Time taken: 4.626 seconds, Fetched: 7 row(s)
hive (default)> create temporary function parse_json_data as "udtf.JSONDataParseUDTF" using jar "hdfs://node1:9000/hive_function.jar";
Added [/tmp/d0389ef2-b8cf-4739-b438-d9c595057ef5_resources/hive_function.jar] to class path
Added resources: [hdfs://node1:9000/hive_function.jar]
OK
Time taken: 0.273 seconds
hive (default)> select parse_json_data(userjson) from user_info;
OK
name age sex phone
zs 20 man 13888888888
ls 21 woman 13123148888
ww 22 man 1388883456
ml 23 woman 1388883456
zb 24 man 1388885678
wb 25 woman 13888343488
lb 26 man 1388881188
Time taken: 1.129 seconds, Fetched: 7 row(s)
posted @   jsqup  阅读(24)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?
点击右上角即可分享
微信分享提示