MapReduce_partition
1 package MapReduce; 2 3 import java.net.URI; 4 5 import org.apache.hadoop.conf.Configuration; 6 import org.apache.hadoop.fs.FileSystem; 7 import org.apache.hadoop.fs.Path; 8 import org.apache.hadoop.io.LongWritable; 9 import org.apache.hadoop.io.NullWritable; 10 import org.apache.hadoop.io.Text; 11 import org.apache.hadoop.mapreduce.Job; 12 import org.apache.hadoop.mapreduce.Mapper; 13 import org.apache.hadoop.mapreduce.Partitioner; 14 import org.apache.hadoop.mapreduce.Reducer; 15 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 16 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 17 import org.apache.hadoop.util.GenericOptionsParser; 18 19 20 public class MyPartitioner { 21 private static final String INPUT_PATH = "hdfs://h201:9000/user/hadoop/input_par"; 22 private static final String OUTPUT_PATH = "hdfs://h201:9000/user/hadoop/output"; 23 24 public static class MyPartitionerMap extends Mapper<LongWritable, Text, Text, Text> { 25 26 protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) 27 throws java.io.IOException, InterruptedException { 28 29 String arr_value[] = value.toString().split("\t"); 30 if (arr_value.length > 3) { 31 context.write(new Text("long"), value); 32 } else if (arr_value.length < 3) { 33 context.write(new Text("short"), value); 34 } else { 35 context.write(new Text("right"), value); 36 } 37 } 38 } 39 40 /** 41 * partitioner的输入就是map的输出 42 * 43 * @author Administrator 44 */ 45 public static class MyPartitionerPar extends Partitioner<Text, Text> { 46 47 @Override 48 public int getPartition(Text key, Text value, int numPartitions) { 49 int result = 0; 50 /*********************************************************************/ 51 /***key.toString().equals("long") must use toString()!!!! ***/ 52 /***开始的时候我没有用 ,导致都在一个区里,结果也在一个reduce输出文件中。 ***/ 53 /********************************************************************/ 54 if (key.toString().equals("long")) { 55 result = 0 % numPartitions; 56 } else if (key.toString().equals("short")) { 57 result = 1 % numPartitions; 58 } else if (key.toString().equals("right")) { 59 result = 2 % numPartitions; 60 } 61 return result; 62 } 63 } 64 65 public static class MyPartitionerReduce extends Reducer<Text, Text, Text, Text> { 66 protected void reduce(Text key, java.lang.Iterable<Text> value, Context context) throws java.io.IOException, 67 InterruptedException { 68 for (Text val : value) { 69 context.write(key, val); 70 //context.write(key, val); 71 } 72 } 73 } 74 75 public static void main(String[] args) throws Exception { 76 Configuration conf = new Configuration(); 77 /* 78 String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 79 if (otherArgs.length != 2) { 80 System.err.println("Usage: MyPartitioner <in> <out>"); 81 System.exit(2); 82 } 83 */ 84 conf.set("mapred.jar","mp1.jar"); 85 final FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf); 86 fileSystem.delete(new Path(OUTPUT_PATH), true); 87 Job job = new Job(conf, "MyPartitioner"); 88 job.setNumReduceTasks(3); 89 90 job.setJarByClass(MyPartitioner.class); 91 92 job.setMapperClass(MyPartitionerMap.class); 93 job.setCombinerClass(MyPartitionerReduce.class); 94 95 job.setMapOutputKeyClass(Text.class); 96 job.setMapOutputValueClass(Text.class); 97 98 job.setPartitionerClass(MyPartitionerPar.class); 99 job.setReducerClass(MyPartitionerReduce.class); 100 101 job.setOutputKeyClass(Text.class); 102 job.setOutputValueClass(Text.class); 103 104 FileInputFormat.setInputPaths(job, INPUT_PATH); 105 FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH)); 106 System.exit(job.waitForCompletion(true) ? 0 : 1); 107 } 108 }
vim partititon
aa 1 2
bb 2 22
cc 11
dd 1
ee 99 99 999
ff 12 23 123
[hadoop@h201 ~]$ hadoop fs -cat /user/hadoop/output/part-r-00001
18/06/10 17:55:02 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
short dd 1
short cc 11
[hadoop@h201 ~]$ hadoop fs -cat /user/hadoop/output/part-r-00000
18/06/10 17:55:16 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
long ff 12 23 123
long ee 99 99 999
[hadoop@h201 ~]$ hadoop fs -cat /user/hadoop/output/part-r-00002
18/06/10 18:01:37 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
right bb 2 22
right aa 1 2