MapReduce应用实例--排序
1 实例描述
输入文件中每行内容为一个数字,输出文件中每行2个数组,第一个数字代表序号,第二个数字代表原始数据
输入:
输入: file1: 2 32 654 32 15 756 65223 file2: 5956 22 650 92 输出: 1 2 2 15 3 22 4 32 5 32 6 92 7 650 8 654 9 756 10 5956 11 65223
2.设计思路
利用mapreduce过程中本身过程中的排序。当key为数字类型时按照大小排序,为文本时按照字典排序
3.代码
<!--maven依赖--> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-core</artifactId> <version>1.2.1</version> </dependency>
public class Sort { public static class SortMapper extends MapReduceBase implements Mapper<Object, Text, IntWritable, IntWritable> { private static IntWritable data = new IntWritable(); // map的输出 private final static IntWritable one = new IntWritable(1); // 代表数字在map过程中出现一次 public void map(Object key, Text value, OutputCollector<IntWritable, IntWritable> output, Reporter report) throws IOException { String line = value.toString(); data.set(Integer.parseInt(line)); output.collect(data, one); } } public static class SortReduce extends MapReduceBase implements Reducer<IntWritable, IntWritable, IntWritable, IntWritable> { private static IntWritable lineum = new IntWritable(1); public void reduce(IntWritable key, Iterator<IntWritable> values, OutputCollector<IntWritable, IntWritable> output, Reporter report) throws IOException { while (values.hasNext()) { // 利用MapReduce在shuffle排序聚集分发功能进行排序(以linenum的顺序) values.next(); output.collect(lineum, key); lineum = new IntWritable(lineum.get()+1); } } } public static void main(String[] args) throws IOException { // 将输入文件存入hdfs // bin/hadoop fs -put ~/input/sort/file1 /test/sort/input/file1 // bin/hadoop fs -put ~/input/sort/file2 /test/sort/input/file2 String input = "hdfs://192.168.75.128:9000/test/sort/input"; String output = "hdfs://192.168.75.128:9000/test/sort/output"; JobConf conf = new JobConf(WordCount.class); conf.setJobName("Sort"); conf.addResource("classpath:/hadoop/core-site.xml"); conf.addResource("classpath:/hadoop/hdfs-site.xml"); conf.addResource("classpath:/hadoop/mapred-site.xml"); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(SortMapper.class); //conf.setCombinerClass(WordCountReducer.class); // 由于在reduce过程中就完成了排序,所以不需要进行combiner conf.setReducerClass(SortReduce.class); conf.setInputFormat(TextInputFormat.class);//每行记录单独作为map的输入 conf.setOutputFormat(TextOutputFormat.class);//每条记录以一行的形式存入文本 FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); System.exit(0); }
4.查看结果
bin/hadoop fs -cat /test/sort/output/part-00000 1 2 2 15 3 22 4 32 5 32 6 92 7 650 8 654 9 756 10 5956 11 65223
posted on 2015-09-03 17:31 huifukejian 阅读(317) 评论(0) 编辑 收藏 举报