java调用kettle的job和transfer工具类
package com.woaiyitiaocai.util; import java.util.Map; import java.util.UUID; import org.apache.log4j.Logger; import org.pentaho.di.core.KettleEnvironment; import org.pentaho.di.core.util.EnvUtil; import org.pentaho.di.job.Job; import org.pentaho.di.job.JobMeta; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransMeta; /** * @功能描述: 执行kettle * @项目版本: 1.0.0 * @相对路径: com.woaiyitiaocai.util.ExecKettleUtil.java * @创建作者: woaiyitiaocai * @问题反馈: zhup@woaiyitiaocai.com * @创建日期: 2017年2月27日 下午3:36:10 */ public class ExecKettleUtil { private static Logger logger_info = Logger.getLogger("api-info"); private static Logger logger_error = Logger.getLogger("api-error"); /** * @功能描述: 执行job * @使用对象: woaiyitiaocai * @创建作者: woaiyitiaocai * @创建日期: 2017年2月27日 下午3:51:33 * @param initKettleParam job参数 * @param kjbFilePath job路径 * @return */ public static boolean runKettleJob(Map<String,String> initKettleParam, String kjbFilePath) { String uuid = UUID.randomUUID().toString(); logger_info.info("ExecKettleUtil@runKettleJob:"+uuid+" {kjbFilePath:"+kjbFilePath+"}"); try { KettleEnvironment.init(); //初始化job路径 JobMeta jobMeta = new JobMeta(kjbFilePath, null); Job job = new Job(null, jobMeta); //初始化job参数,脚本中获取参数值:${variableName} for (String variableName : initKettleParam.keySet()) { job.setVariable(variableName, initKettleParam.get(variableName)); } job.start(); job.waitUntilFinished(); if (job.getErrors() > 0) { logger_info.info("ExecKettleUtil@runKettleJob:"+uuid+" 执行失败"); }else{ logger_info.info("ExecKettleUtil@runKettleJob:"+uuid+" 执行成功"); } return true; } catch (Exception e) { logger_error.error("ExecKettleUtil@runKettleJob:"+uuid, e); return false; } } /** * @功能描述: 执行Transfer * @使用对象: woaiyitiaocai * @创建作者: woaiyitiaocai * @创建日期: 2017年2月27日 下午3:51:33 * @param initKettleParam Transfer参数 * @param ktrFilePath Transfer路径 * @return */ public static boolean runKettleTransfer(Map<String,String> initKettleParam, String ktrFilePath) { Trans trans = null; String uuid = UUID.randomUUID().toString(); logger_info.info("ExecKettleUtil@runKettleTransfer:"+uuid+" {ktrFilePath:"+ktrFilePath+"}"); try { //初始化 KettleEnvironment.init(); EnvUtil.environmentInit(); TransMeta transMeta = new TransMeta(ktrFilePath); //转换 trans = new Trans(transMeta); //初始化trans参数,脚本中获取参数值:${variableName} for (String variableName : initKettleParam.keySet()) { trans.setVariable(variableName, initKettleParam.get(variableName)); } //执行转换 trans.execute(null); //等待转换执行结束 trans.waitUntilFinished(); if (trans.getErrors() > 0) { logger_info.info("ExecKettleUtil@runKettleTransfer:"+uuid+" 执行失败"); }else{ logger_info.info("ExecKettleUtil@runKettleTransfer:"+uuid+" 执行成功"); } return true; } catch (Exception e) { logger_error.error("ExecKettleUtil@runKettleTransfer:"+uuid, e); return false; } } }
今天要做的项目中用到了在系统中执行kettle脚本,于是自己写了一个,java调用kettle的job和transfer工具类。相关的jar包从etl的开发工具中复制出来就ok了,也可以去官网上面下载。此示例是由程序中出发执行kettle程序,如果不需要由程序触发最简单的就是直接写到linux的定时器去执行了。那就不需要这个类了。
kettle在这里是做数据的抽取,清洗,用kettle开发更有效率,程序也更健壮。这里就不给出kettle的脚本了。java工具类代码如上面所示。
网上也有很多各种各样现成的示例,我的仅供参考,欢迎吐槽。
以下是相关maven依赖
<dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-vfs2</artifactId> <version>2.0</version> </dependency> <dependency> <groupId>org.scannotation</groupId> <artifactId>scannotation</artifactId> <version>1.0.3</version> </dependency> <dependency> <groupId>dom4j</groupId> <artifactId>dom4j</artifactId> <version>1.6.1</version> </dependency> <dependency> <groupId>***</groupId> <artifactId>kettle-vfs</artifactId> <version>5.2.0.0</version> <classifier>pentaho</classifier> </dependency> <dependency> <groupId>***</groupId> <artifactId>kettle-engine</artifactId> <version>5.2.0.0</version> </dependency> <dependency> <groupId>***</groupId> <artifactId>kettle-core</artifactId> <version>5.2.0.0</version> </dependency>