MapReduce实例
数据去重:
1 import java.io.IOException; 2 3 import org.apache.hadoop.fs.Path; 4 import org.apache.hadoop.io.Text; 5 import org.apache.hadoop.mapreduce.Job; 6 import org.apache.hadoop.mapreduce.Mapper; 7 import org.apache.hadoop.mapreduce.Reducer; 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 10 11 public class Dedup { 12 public static class Map extends Mapper<Object, Text, Text, Text>{ 13 private static Text line=new Text(); 14 15 public void map(Object key,Text value,Context context) throws IOException, InterruptedException { 16 line=value; 17 context.write(line, new Text("")); 18 } 19 } 20 21 public static class Reduce extends Reducer<Text, Text, Text, Text>{ 22 public void ruduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException { 23 context.write(key, new Text("")); 24 } 25 } 26 27 public static void main(String[] args) throws Exception { 28 if (args.length != 2) { 29 System.err.println("Usage: DataAverage <input path> <output path>"); 30 System.exit(-1); 31 } 32 Job job = new Job(); 33 job.setJobName("Dedup"); 34 job.setJarByClass(Dedup.class); 35 job.setMapperClass(Map.class); 36 job.setCombinerClass(Reducer.class); 37 job.setReducerClass(Reducer.class); 38 job.setOutputKeyClass(Text.class); 39 job.setOutputValueClass(Text.class); 40 FileInputFormat.addInputPath(job, new Path(args[0])); 41 FileOutputFormat.setOutputPath(job, new Path(args[1])); 42 System.exit(job.waitForCompletion(true) ? 0 : 1); 43 } 44 }
排序:
1 import java.io.IOException; 2 3 import org.apache.hadoop.fs.Path; 4 import org.apache.hadoop.io.IntWritable; 5 import org.apache.hadoop.io.Text; 6 import org.apache.hadoop.mapreduce.Job; 7 import org.apache.hadoop.mapreduce.Mapper; 8 import org.apache.hadoop.mapreduce.Reducer; 9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 11 12 13 public class Sort { 14 public static class Map extends Mapper<Object, Text, IntWritable, IntWritable>{ 15 private static IntWritable data= new IntWritable(); 16 public void map(Object key,Text value,Context context) throws IOException, InterruptedException { 17 String lineString=value.toString(); 18 data.set(Integer.parseInt(lineString)); 19 context.write(data, new IntWritable(1)); 20 } 21 } 22 23 public static class Reduce extends Reducer<IntWritable ,IntWritable,IntWritable,IntWritable>{ 24 private static IntWritable linenum=new IntWritable(1); 25 public void reduce(IntWritable key,Iterable< IntWritable> values,Context context) throws IOException, InterruptedException { 26 for(IntWritable value:values){ 27 context.write(linenum, key); 28 linenum=new IntWritable(linenum.get()+1); 29 } 30 } 31 } 32 33 public static void main(String[] args) throws Exception { 34 if (args.length != 2) { 35 System.err.println("Usage: DataAverage <input path> <output path>"); 36 System.exit(-1); 37 } 38 Job job = new Job(); 39 job.setJobName("Sort"); 40 job.setJarByClass(Sort.class); 41 42 job.setMapperClass(Map.class); 43 job.setReducerClass(Reducer.class); 44 45 job.setOutputKeyClass(IntWritable.class); 46 job.setOutputValueClass(IntWritable.class); 47 48 FileInputFormat.addInputPath(job, new Path(args[0])); 49 FileOutputFormat.setOutputPath(job, new Path(args[1])); 50 System.exit(job.waitForCompletion(true) ? 0 : 1); 51 } 52 }
求平均值:
1 import java.io.IOException; 2 import java.util.StringTokenizer; 3 4 import org.apache.hadoop.fs.Path; 5 import org.apache.hadoop.io.IntWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Job; 8 import org.apache.hadoop.mapreduce.Mapper; 9 import org.apache.hadoop.mapreduce.Reducer; 10 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 11 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 12 13 public class Score { 14 public static class Map extends Mapper<Object, Text, Text, IntWritable>{ 15 public void map(Object key,Text value,Context context) throws IOException, InterruptedException{ 16 String lineString =value.toString(); 17 StringTokenizer stringTokenizer=new StringTokenizer(lineString,"\n"); 18 while (stringTokenizer.hasMoreElements()) { 19 StringTokenizer tokenizerLine = new StringTokenizer(stringTokenizer.nextToken()); 20 String strName=tokenizerLine.nextToken(); 21 String strScore=tokenizerLine.nextToken(); 22 Text name=new Text(strName); 23 int score=Integer.parseInt(strScore); 24 context.write(name, new IntWritable(score)); 25 } 26 } 27 } 28 29 public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>{ 30 public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{ 31 int sum=0; 32 int count=0; 33 while (values.iterator().hasNext()) { 34 sum+=values.iterator().next().get(); 35 count++; 36 } 37 int averate=(int)sum/count; 38 context.write(key, new IntWritable(averate)); 39 } 40 } 41 42 public static void main(String[] args) throws Exception { 43 if (args.length != 2) { 44 System.err.println("Usage: DataAverage <input path> <output path>"); 45 System.exit(-1); 46 } 47 Job job = new Job(); 48 job.setJobName("Score"); 49 job.setJarByClass(Score.class); 50 51 job.setMapperClass(Map.class); 52 job.setReducerClass(Reducer.class); 53 54 job.setOutputKeyClass(Text.class); 55 job.setOutputValueClass(IntWritable.class); 56 57 FileInputFormat.addInputPath(job, new Path(args[0])); 58 FileOutputFormat.setOutputPath(job, new Path(args[1])); 59 System.exit(job.waitForCompletion(true) ? 0 : 1); 60 } 61 }