Wordcount

package com.ibifeng.hadoop.senior.mapreduce;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.lib.aggregate.ValueAggregator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.jboss.netty.handler.codec.http.HttpHeaders.Values;

public class WordCount {
 //step 1: Map calss
    public static class WordcountmMap extends Mapper<LongWritable, Text, Text, IntWritable>{
      private Text mapOutputKey = new Text();
      private final static IntWritable mapOutputValue = new IntWritable(1);
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            //line value
            String lineValue = value.toString();
            //split stringTokenizer
            StringTokenizer stringTokenizer = new StringTokenizer(lineValue);
            // iterator
            while(stringTokenizer.hasMoreTokens()){
                 //get value
                String wordValu = stringTokenizer.nextToken();
                //set value
                mapOutputKey.set(wordValu);
                //output
                context.write(mapOutputKey, mapOutputValue);
            }
            
        }
        
    }
    //step 2: Reduce class
    public  static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
        private IntWritable OutputValue = new IntWritable(1);
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values,
                Context context) throws IOException, InterruptedException {
                 //sum tmp
           int sum=0;
           //iterator
           for(IntWritable value: values){
               //total
               sum += value.get();
           }
            //set value
           OutputValue.set(sum);
            //output
           context.write(key, OutputValue);
        }
        
    }
    //step Driver , commponent job
    public int run(String[] args) throws IOException, Exception, InterruptedException{
        //1.get confifuration
        Configuration configuration = new Configuration();
        //2.create Job
        Job job = Job.getInstance(configuration,this.getClass().getSimpleName());
        //run jar
        job.setJarByClass(this.getClass());
        //3.set job
        Path inPath = new Path(args[0]);
        org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, inPath);
        job.setMapperClass(WordcountmMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        //3.reduce
        job.setReducerClass(WordCountReducer.class);
        job.setMapOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        //output
        Path outPath = new Path(args[1]);
        FileOutputFormat.setOutputPath(job, outPath);
        //submit job
        boolean isSuccess = job.waitForCompletion(true);
        return isSuccess ? 0 : 1;
    }
    //step rin program
    public static void main(String[] args) throws IOException, InterruptedException, Exception {
        int status = new WordCount().run(args);
        System.exit(status);
    }
}

posted on 2018-03-17 13:06  JETIME庚  阅读(139)  评论(0编辑  收藏  举报

导航