026_默认的MapReduce Driver(最小驱动问题)
1、 最小配置的MapReduce Driver
读取输入文件中的内容,输出到指定目录的输出文件中,此时文件中的内容为:
Key---输入文件每行内容的起始位置。
Value---输入文件每行的原始内容。
输出文件中的内容就是:key+\t+value.
1 package org.dragon.hadoop.mapreduce.app.minDriver; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.conf.Configuration; 6 import org.apache.hadoop.fs.Path; 7 import org.apache.hadoop.mapreduce.Job; 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 10 11 /** 12 * 13 * @author ZhuXY 14 * @time 2016-3-13 下午9:24:49 15 * 16 */ 17 18 /** 19 * function:最小配置的MapReduce Driver 20 * 21 * 读取输入文件中的内容,输出到指定目录的输出文件中, 22 * 此时文件中的内容为: Key---输入文件每行内容的起始位置。 23 * Value---输入文件每行的原始内容。 24 * 输出文件中的内容就是:key+\t+value. 25 * 26 * @author ZhuXY 27 * 28 */ 29 public class MinimalDriverMapReduce { 30 31 /* 32 * Mapper Class 33 */ 34 35 /* 36 * Reducer Class 37 */ 38 39 /* 40 * Driver Code 41 */ 42 43 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 44 args=new String[]{ 45 "hdfs://hadoop-master.dragon.org:9000/wc/mininput/", 46 "hdfs://hadoop-master.dragon.org:9000/wc/minoutput" 47 }; 48 49 // get conf 50 Configuration conf=new Configuration(); 51 52 // create job 53 Job job=new Job(conf, MinimalDriverMapReduce.class.getSimpleName()); 54 55 // set job 56 job.setJarByClass(MinimalDriverMapReduce.class); 57 // 1) set input 58 FileInputFormat.addInputPath(job, new Path(args[0])); 59 60 // 2) set map 61 62 // 3) set reduce 63 64 // 4) set output 65 FileOutputFormat.setOutputPath(job, new Path(args[1])); 66 67 // submit job 68 boolean isSuccess=job.waitForCompletion(true); 69 70 // return status 71 System.exit(isSuccess?0:1); 72 } 73 }
2、查看默认的配置
主要在这个类中:
3、Map与reduce的默认输入输出类型。
4、写最小配置默认
导包:
1 package org.dragon.hadoop.mapreduce.app.minDriver; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.conf.Configuration; 6 import org.apache.hadoop.fs.Path; 7 import org.apache.hadoop.io.LongWritable; 8 import org.apache.hadoop.io.Text; 9 import org.apache.hadoop.mapreduce.Job; 10 import org.apache.hadoop.mapreduce.Mapper; 11 import org.apache.hadoop.mapreduce.Reducer; 12 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 13 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 16 import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
真正的代码:
1 * @author ZhuXY 2 * @time 2016-3-13 下午9:45:02 3 * 4 */ 5 6 /** 7 * MapReduce Minimal Driver默认配置 8 * 9 * @author ZhuXY 10 * 11 */ 12 public class TotalDefaultMinimalDriverMP { 13 /* 14 * Mapper Class 15 */ 16 17 /* 18 * Reducer Class 19 */ 20 21 /* 22 * Driver Code 23 */ 24 25 public static void main(String[] args) throws IOException, 26 ClassNotFoundException, InterruptedException { 27 args = new String[] { 28 "hdfs://hadoop-master.dragon.org:9000/wc/mininput/", 29 "hdfs://hadoop-master.dragon.org:9000/wc/minoutput" }; 30 31 // step 1:get conf 32 Configuration conf = new Configuration(); 33 34 // step 2:create job 35 Job job = new Job(conf, MinimalDriverMapReduce.class.getSimpleName()); 36 37 // step 3:set job 38 // 1) set run jar class 39 job.setJarByClass(MinimalDriverMapReduce.class); 40 41 // 2) set input format 42 job.setInputFormatClass(TextInputFormat.class); //可省 43 44 // 3) set input path 45 FileInputFormat.addInputPath(job, new Path(args[0])); 46 47 // 4) set mapper class 48 job.setMapperClass(Mapper.class); //可省 49 50 // 5)set map input key/value class 51 job.setMapOutputKeyClass(LongWritable.class); //可省 52 job.setMapOutputValueClass(Text.class); //可省 53 54 // 6) set partitioner class 55 job.setPartitionerClass(HashPartitioner.class); //可省 56 57 // 7) set reducer number 58 job.setNumReduceTasks(1);//default 1 //可省 59 // 8)set sort comparator class 60 job.setSortComparatorClass(LongWritable.Comparator.class); //可省 61 62 // 9) set group comparator class 63 job.setGroupingComparatorClass(LongWritable.Comparator.class); //可省 64 65 // 10) set combiner class 66 //job.setCombinerClass(null);默认是null,但是此处不能写 //可省 67 68 // 11) set reducer class 69 job.setReducerClass(Reducer.class); //可省 70 71 // 12) set output format 72 job.setOutputFormatClass(TextOutputFormat.class); //可省 73 74 // 13) job output key/value class 75 job.setOutputKeyClass(LongWritable.class); //可省 76 job.setOutputValueClass(Text.class); //可省 77 78 // 14) job output path 79 FileOutputFormat.setOutputPath(job, new Path(args[1])); 80 81 // step 4: submit job 82 boolean isSuccess = job.waitForCompletion(true); 83 84 // step 5: return status 85 System.exit(isSuccess ? 0 : 1); 86 } 87 }
如有转载请请务必保留此出处:http://www.cnblogs.com/xiangyangzhu/