用udf从将hive的查询结果直接写入mysql数据库中

关于这个问题，网上提供了很多文章，但是可能会有不太明确的地方，我只在阐述一点：

hive>add jar /usr/lib/hive/lib/hive-contrib-0.9.0-cdh4.1.2.jar;

Added /usr/lib/hive/lib/hive-contrib-0.9.0-cdh4.1.2.jar to class path
Added resource: /usr/lib/hive/lib/hive-contrib-0.9.0-cdh4.1.2.jar

hive>add jar /usr/share/java/mysql-connector-java-5.1.17.jar;

Added /usr/share/java/mysql-connector-java-5.1.17.jar to class path
Added resource: /usr/share/java/mysql-connector-java-5.1.17.jar

hive>CREATE TEMPORARY FUNCTION dboutput AS 'org.apache.Hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput';

hive>select dboutput('jdbc:mysql://localhost/result','root','123456','INSERT INTO dc(code,size) VALUES (?,?)',code,size) from accesslog limit 10;

注：result为mysql数据库名，dc为数据库result中的表名 dc(code,size)括号中的字段为mysql表dc字段，values(?,?)对应hive统计结果的值后面的code,size为hive表中的字段，accesslog表示hive中的表名称。

通过以上步骤即可将hive统计结果直接导入到mysql数据库中。

绿色部分为其他博友贡献的，我在试验的时候发现总提示找不到org.apache.Hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput

后来经过琢磨才弄明白org.apache.Hadoop.hive.contrib.genericudf.example.GenericUDFDBOutput部分自己要去编写，编写后打成jar包用add jar添加进去就可以了。

我是把这个方法编写后达成了udf.jar的包 udf下直接写GenericUDFDBOutput，就是说没有那么多层次的包。

add jar /home/hadoop/hive-0.9.0/lib/hive-contrib-0.9.0.jar;
add jar /home/hadoop/hive-0.9.0/lib/mysql-connector-java-5.1.10.jar;
add jar /home/hadoop/hive-0.9.0/lib/hive_contrib.jar;
add jar /home/hadoop/hive-0.9.0/lib/hive-exec-0.9.0.jar;
add jar /home/hadoop/hive-0.9.0/lib/udf.jar;

CREATE TEMPORARY FUNCTION dboutput AS 'GenericUDFDBOutput';

select dboutput('jdbc:mysql://192.168.239.100/hive','hadoop','hadoop','INSERT INTO testroom(Name,CtfId) VALUES (?,?)',Name,CtfId) from hive_hbase limit 10;

咖啡色部分为我自己的代码结果运行成功。

其中GenericUDFDBOutput源码如下，是我借用别人的；

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.IntWritable;

/**
* GenericUDFDBOutput is designed to output data directly from Hive to a JDBC
* datastore. This UDF is useful for exporting small to medium summaries that
* have a unique key.
*
* Due to the nature of hadoop, individual mappers, reducers or entire jobs can
* fail. If a failure occurs a mapper or reducer may be retried. This UDF has no
* way of detecting failures or rolling back a transaction. Consequently, you
* should only only use this to export to a table with a unique key. The unique
* key should safeguard against duplicate data.
*
* Use hive's ADD JAR feature to add your JDBC Driver to the distributed cache,
* otherwise GenericUDFDBoutput will fail.
*/
@Description(name = "dboutput",
value = "_FUNC_(jdbcstring,username,password,preparedstatement,[arguments])"
+ " - sends data to a jdbc driver",
extended = "argument 0 is the JDBC connection string\n"
+ "argument 1 is the user name\n"
+ "argument 2 is the password\n"
+ "argument 3 is an SQL query to be used in the PreparedStatement\n"
+ "argument (4-n) The remaining arguments must be primitive and are "
+ "passed to the PreparedStatement object\n")
@UDFType(deterministic = false)
public class GenericUDFDBOutput extends GenericUDF {
private static final Log LOG = LogFactory
.getLog(GenericUDFDBOutput.class.getName());

private transient ObjectInspector[] argumentOI;
private transient Connection connection = null;
private String url;
private String user;
private String pass;
private final IntWritable result = new IntWritable(-1);

/**
* @param arguments
* argument 0 is the JDBC connection string argument 1 is the user
* name argument 2 is the password argument 3 is an SQL query to be
* used in the PreparedStatement argument (4-n) The remaining
* arguments must be primitive and are passed to the
* PreparedStatement object
*/
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentTypeException {
argumentOI = arguments;

// this should be connection url,username,password,query,column1[,columnn]*
for (int i = 0; i < 4; i++) {
if (arguments[i].getCategory() == ObjectInspector.Category.PRIMITIVE) {
PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) arguments[i]);

if (!(poi.getPrimitiveCategory() == PrimitiveObjectInspector.PrimitiveCategory.STRING)) {
throw new UDFArgumentTypeException(i,
"The argument of function should be \""
+ Constants.STRING_TYPE_NAME + "\", but \""
+ arguments[i].getTypeName() + "\" is found");
}
}
}
for (int i = 4; i < arguments.length; i++) {
if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(i,
"The argument of function should be primative" + ", but \""
+ arguments[i].getTypeName() + "\" is found");
}
}

return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
}

/**
* @return 0 on success -1 on failure
*/
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {

url = ((StringObjectInspector) argumentOI[0])
.getPrimitiveJavaObject(arguments[0].get());
user = ((StringObjectInspector) argumentOI[1])
.getPrimitiveJavaObject(arguments[1].get());
pass = ((StringObjectInspector) argumentOI[2])
.getPrimitiveJavaObject(arguments[2].get());

try {
connection = DriverManager.getConnection(url, user, pass);
} catch (SQLException ex) {
LOG.error("Driver loading or connection issue", ex);
result.set(2);
}

if (connection != null) {
try {

PreparedStatement ps = connection
.prepareStatement(((StringObjectInspector) argumentOI[3])
.getPrimitiveJavaObject(arguments[3].get()));
for (int i = 4; i < arguments.length; ++i) {
PrimitiveObjectInspector poi = ((PrimitiveObjectInspector) argumentOI[i]);
ps.setObject(i - 3, poi.getPrimitiveJavaObject(arguments[i].get()));
}
ps.execute();
ps.close();
result.set(0);
} catch (SQLException e) {
LOG.error("Underlying SQL exception", e);
result.set(1);
} finally {
try {
connection.close();
} catch (Exception ex) {
LOG.error("Underlying SQL exception during close", ex);
}
}
}

return result;
}

@Override
public String getDisplayString(String[] children) {
StringBuilder sb = new StringBuilder();
sb.append("dboutput(");
if (children.length > 0) {
sb.append(children[0]);
for (int i = 1; i < children.length; i++) {
sb.append(",");
sb.append(children[i]);
}
}
sb.append(")");
return sb.toString();
}

}

posted @ 2014-04-14 19:54 逍遥彩上飞阅读(3481) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

高天流云

用udf从将hive的查询结果直接写入mysql数据库中

公告