DistributedCache的使用

public class WordCount {
    public static class TokenizerMapper extends
            Mapper<Object, Text, Text, Text> {
        Map<String, String> map = new HashMap<String, String>();

        private Text word = new Text();
        Path[] words;

        protected void setup(Context context) throws IOException,
                InterruptedException {
            words = DistributedCache.getLocalCacheFiles(context.getConfiguration());
            String path = words[0].toString();
            String line;
            String[] tokens;
            BufferedReader joinReader = new BufferedReader(new FileReader(path+"/456.txt"));    
                while ((line = joinReader.readLine()) != null) {                    
                    tokens = line.split(",");
                    map.put(tokens[0], tokens[1]);
                }
                joinReader.close();
        }

        public void map(Object key, Text value, Context context)
                throws IOException, InterruptedException {
            StringTokenizer itr = new StringTokenizer(value.toString());
            while (itr.hasMoreTokens()) {
                word.set(itr.nextToken());
                context.write(word,new Text(map.get("1") + map.get("2")+map.get("3")+map.get("4")+map.get("5")));
            }
        }
    }

    public static class IntSumReducer extends Reducer<Text, Text, Text, Text> {

        public void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            String url = "";
            for (Text val : values) {
                url = url + val.toString();
            }
            context.write(key, new Text(url));
        }
    }

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        
        DistributedCache.addCacheFile(new URI("/test/output"),conf);
        Job job = new Job(conf, "word count");
        job.setJarByClass(WordCount.class);

        job.setMapperClass(TokenizerMapper.class);

        job.setCombinerClass(IntSumReducer.class);

        job.setReducerClass(IntSumReducer.class);

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(Text.class);

        

        FileInputFormat.addInputPath(job, new Path(args[0]));

        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}
posted @ 2014-05-21 09:25 沙漠里的小鱼阅读(373) 评论(0) 编辑收藏举报
会员力量，点亮园子希望
刷新页面返回顶部
DistributedCache的使用

公告