HDFS目标大文件夹下文件索引建立
一、索引命令
1、非分布式,单机
1 hadoop jar /usr/lib/hadoop/lib/hadoop-lzo.jar com.hadoop.compression.lzo.LzoIndexer ${path}
2、分布式:任务提交到yarn,需要使用集群资源
1 hadoop jar /usr/lib/hadoop/lib/hadoop-lzo.jar com.hadoop.compression.lzo.DistributedLzoIndexer ${path}
注: 可能遇到的情况,在某些需要队列或者有keyberos认证的机器, DistributedLzoIndexer 可能不会那么理想,需要添加队列或者加上keyberos参数。
二、执行linux shell命令java参考模板
1 public class ExecCMD { 2 3 private static final Logger log = LoggerFactory.getLogger(ExecCMD.class); 4 5 public static String exec(String cmd) { 6 try { 7 String[] cmdA = { "/bin/sh", "-c", cmd }; 8 Process process = Runtime.getRuntime().exec(cmdA); 9 LineNumberReader br = new LineNumberReader(new InputStreamReader(process.getInputStream())); 10 StringBuffer buff = new StringBuffer(); 11 String line; 12 while ((line = br.readLine()) != null) { 13 buff.append(line).append("\n"); 14 } 15 String result = buff.toString(); 16 if (!cmd.isEmpty()) 17 log.info("cmd executed, result: " + result); 18 return result; 19 } catch (Exception e) { 20 log.error("Failed to exec cmd: " + cmd, e); 21 } 22 return null; 23 } 24 }