WordCount_命令行运行时指定参数
WordCountApp命令行运行时指定参数
1、修改之前的WordCountApp.java的代码
1 package cmd; 2 3 import java.net.URI; 4 import org.apache.hadoop.conf.Configuration; 5 import org.apache.hadoop.conf.Configured; 6 import org.apache.hadoop.fs.FileSystem; 7 import org.apache.hadoop.fs.Path; 8 import org.apache.hadoop.io.LongWritable; 9 import org.apache.hadoop.io.Text; 10 import org.apache.hadoop.mapreduce.Job; 11 import org.apache.hadoop.mapreduce.Mapper; 12 import org.apache.hadoop.mapreduce.Reducer; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 import org.apache.hadoop.util.Tool; 16 import org.apache.hadoop.util.ToolRunner; 17 18 public class WordCountApp extends Configured implements Tool{ 19 static String INPUT_PATH = ""; 20 static String OUT_PATH = ""; 21 22 public int run(String[] arg0) throws Exception { 23 INPUT_PATH = arg0[0]; 24 OUT_PATH = arg0[1]; 25 26 Configuration conf = new Configuration(); 27 FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf); 28 Path outPath = new Path(OUT_PATH); 29 if (fileSystem.exists(outPath)) { 30 fileSystem.delete(outPath, true); 31 } 32 33 Job job = new Job(conf, WordCountApp.class.getSimpleName()); 34 35 //打包运行时必须执行的秘密方法 36 job.setJarByClass(WordCountApp.class); 37 38 // 1.1指定读取的文件位于哪里 39 FileInputFormat.setInputPaths(job, INPUT_PATH); 40 // 指定如何对输入的文件进行格式化,把输入文件每一行解析成键值对 41 //job.setInputFormatClass(TextInputFormat.class); 42 43 // 1.2指定自定义的map类 44 job.setMapperClass(MyMapper.class); 45 // map输出的<k,v>类型。如果<k3,v3>的类型与<k2,v2>类型一致,则可以省略 46 //job.setOutputKeyClass(Text.class); 47 //job.setOutputValueClass(LongWritable.class); 48 49 // 1.3分区 50 //job.setPartitionerClass(org.apache.hadoop.mapreduce.lib.partition.HashPartitioner.class); 51 // 有一个reduce任务运行 52 //job.setNumReduceTasks(1); 53 54 // 1.4排序、分组 55 56 // 1.5归约 57 58 // 2.2指定自定义reduce类 59 job.setReducerClass(MyReducer.class); 60 // 指定reduce的输出类型 61 job.setOutputKeyClass(Text.class); 62 job.setOutputValueClass(LongWritable.class); 63 64 // 2.3指定写出到哪里 65 FileOutputFormat.setOutputPath(job, outPath); 66 // 指定输出文件的格式化类 67 //job.setOutputFormatClass(TextOutputFormat.class); 68 69 // 把job提交给jobtracker运行 70 job.waitForCompletion(true); 71 return 0; 72 } 73 74 public static void main(String[] args) throws Exception { 75 ToolRunner.run(new WordCountApp(), args); 76 } 77 78 /** 79 * 80 * KEYIN 即K1 表示行的偏移量 81 * VALUEIN 即V1 表示行文本内容 82 * KEYOUT 即K2 表示行中出现的单词 83 * VALUEOUT 即V2 表示行中出现的单词的次数,固定值1 84 * 85 */ 86 static class MyMapper extends 87 Mapper<LongWritable, Text, Text, LongWritable> { 88 protected void map(LongWritable k1, Text v1, Context context) 89 throws java.io.IOException, InterruptedException { 90 String[] splited = v1.toString().split("\t"); 91 for (String word : splited) { 92 context.write(new Text(word), new LongWritable(1)); 93 } 94 }; 95 } 96 97 /** 98 * KEYIN 即K2 表示行中出现的单词 99 * VALUEIN 即V2 表示出现的单词的次数 100 * KEYOUT 即K3 表示行中出现的不同单词 101 * VALUEOUT 即V3 表示行中出现的不同单词的总次数 102 */ 103 static class MyReducer extends 104 Reducer<Text, LongWritable, Text, LongWritable> { 105 protected void reduce(Text k2, java.lang.Iterable<LongWritable> v2s, 106 Context ctx) throws java.io.IOException, 107 InterruptedException { 108 long times = 0L; 109 for (LongWritable count : v2s) { 110 times += count.get(); 111 } 112 ctx.write(k2, new LongWritable(times)); 113 }; 114 } 115 116 }
2、修改完之后,不是在eclipse中运行,而是要打包导出,然后通过WinSCP复制到Linux中/usr/local目录下。
3、在Linux命令行中运行,运行成功后,在查看运行后的结果。
一起学习,一起进步