WordCount_命令行运行时指定参数

WordCountApp命令行运行时指定参数
1、修改之前的WordCountApp.java的代码
  1 package cmd;
  2 
  3 import java.net.URI;
  4 import org.apache.hadoop.conf.Configuration;
  5 import org.apache.hadoop.conf.Configured;
  6 import org.apache.hadoop.fs.FileSystem;
  7 import org.apache.hadoop.fs.Path;
  8 import org.apache.hadoop.io.LongWritable;
  9 import org.apache.hadoop.io.Text;
 10 import org.apache.hadoop.mapreduce.Job;
 11 import org.apache.hadoop.mapreduce.Mapper;
 12 import org.apache.hadoop.mapreduce.Reducer;
 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 15 import org.apache.hadoop.util.Tool;
 16 import org.apache.hadoop.util.ToolRunner;
 17 
 18 public class WordCountApp extends Configured implements Tool{
 19     static String INPUT_PATH = "";
 20     static String OUT_PATH = "";
 21 
 22     public int run(String[] arg0) throws Exception {
 23         INPUT_PATH = arg0[0];
 24         OUT_PATH = arg0[1];
 25         
 26         Configuration conf = new Configuration();
 27         FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
 28         Path outPath = new Path(OUT_PATH);
 29         if (fileSystem.exists(outPath)) {
 30             fileSystem.delete(outPath, true);
 31         }
 32 
 33         Job job = new Job(conf, WordCountApp.class.getSimpleName());
 34         
 35         //打包运行时必须执行的秘密方法
 36         job.setJarByClass(WordCountApp.class);
 37         
 38         // 1.1指定读取的文件位于哪里
 39         FileInputFormat.setInputPaths(job, INPUT_PATH);
 40         // 指定如何对输入的文件进行格式化，把输入文件每一行解析成键值对
 41         //job.setInputFormatClass(TextInputFormat.class);
 42 
 43         // 1.2指定自定义的map类
 44         job.setMapperClass(MyMapper.class);
 45         // map输出的<k,v>类型。如果<k3,v3>的类型与<k2,v2>类型一致，则可以省略
 46         //job.setOutputKeyClass(Text.class);
 47         //job.setOutputValueClass(LongWritable.class);
 48 
 49         // 1.3分区
 50         //job.setPartitionerClass(org.apache.hadoop.mapreduce.lib.partition.HashPartitioner.class);
 51         // 有一个reduce任务运行
 52         //job.setNumReduceTasks(1);
 53 
 54         // 1.4排序、分组
 55 
 56         // 1.5归约
 57 
 58         // 2.2指定自定义reduce类
 59         job.setReducerClass(MyReducer.class);
 60         // 指定reduce的输出类型
 61         job.setOutputKeyClass(Text.class);
 62         job.setOutputValueClass(LongWritable.class);
 63 
 64         // 2.3指定写出到哪里
 65         FileOutputFormat.setOutputPath(job, outPath);
 66         // 指定输出文件的格式化类
 67         //job.setOutputFormatClass(TextOutputFormat.class);
 68 
 69         // 把job提交给jobtracker运行
 70         job.waitForCompletion(true);
 71         return 0;
 72     }
 73     
 74     public static void main(String[] args) throws Exception {
 75         ToolRunner.run(new WordCountApp(), args);
 76     }
 77 
 78     /**
 79      * 
 80      * KEYIN     即K1     表示行的偏移量 
 81      * VALUEIN     即V1     表示行文本内容 
 82      * KEYOUT     即K2     表示行中出现的单词 
 83      * VALUEOUT 即V2        表示行中出现的单词的次数，固定值1
 84      * 
 85      */
 86     static class MyMapper extends
 87             Mapper<LongWritable, Text, Text, LongWritable> {
 88         protected void map(LongWritable k1, Text v1, Context context)
 89                 throws java.io.IOException, InterruptedException {
 90             String[] splited = v1.toString().split("\t");
 91             for (String word : splited) {
 92                 context.write(new Text(word), new LongWritable(1));
 93             }
 94         };
 95     }
 96 
 97     /**
 98      * KEYIN     即K2     表示行中出现的单词 
 99      * VALUEIN     即V2     表示出现的单词的次数 
100      * KEYOUT     即K3     表示行中出现的不同单词
101      * VALUEOUT 即V3     表示行中出现的不同单词的总次数
102      */
103     static class MyReducer extends
104             Reducer<Text, LongWritable, Text, LongWritable> {
105         protected void reduce(Text k2, java.lang.Iterable<LongWritable> v2s,
106                 Context ctx) throws java.io.IOException,
107                 InterruptedException {
108             long times = 0L;
109             for (LongWritable count : v2s) {
110                 times += count.get();
111             }
112             ctx.write(k2, new LongWritable(times));
113         };
114     }
115 
116 }
2、修改完之后，不是在eclipse中运行，而是要打包导出，然后通过WinSCP复制到Linux中/usr/local目录下。
3、在Linux命令行中运行，运行成功后，在查看运行后的结果。
posted @ 2017-04-01 00:07 ahu-lichang 阅读(614) 评论(0) 收藏举报
ahu-lichang

WordCount_命令行运行时指定参数

公告