Wordcount

package com.ibifeng.hadoop.senior.mapreduce;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.lib.aggregate.ValueAggregator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.jboss.netty.handler.codec.http.HttpHeaders.Values;

public class WordCount {
//step 1: Map calss
   public static class WordcountmMap extends Mapper<LongWritable, Text, Text, IntWritable>{
      private Text mapOutputKey = new Text();
      private final static IntWritable mapOutputValue = new IntWritable(1);
       @Override
       protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
               throws IOException, InterruptedException {
           //line value
           String lineValue = value.toString();
           //split stringTokenizer
           StringTokenizer stringTokenizer = new StringTokenizer(lineValue);
           // iterator
           while(stringTokenizer.hasMoreTokens()){
                 //get value
               String wordValu = stringTokenizer.nextToken();
               //set value
               mapOutputKey.set(wordValu);
               //output
               context.write(mapOutputKey, mapOutputValue);
           }

       }

   }
   //step 2: Reduce class
   public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
       private IntWritable OutputValue = new IntWritable(1);
       @Override
       protected void reduce(Text key, Iterable<IntWritable> values,
               Context context) throws IOException, InterruptedException {
                 //sum tmp
           int sum=0;
           //iterator
           for(IntWritable value: values){
               //total
               sum += value.get();
           }
           //set value
           OutputValue.set(sum);
           //output
           context.write(key, OutputValue);
       }

   }
   //step Driver , commponent job
   public int run(String[] args) throws IOException, Exception, InterruptedException{
       //1.get confifuration
       Configuration configuration = new Configuration();
       //2.create Job
       Job job = Job.getInstance(configuration,this.getClass().getSimpleName());
       //run jar
       job.setJarByClass(this.getClass());
       //3.set job
       Path inPath = new Path(args[0]);
       org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, inPath);
       job.setMapperClass(WordcountmMap.class);
       job.setMapOutputKeyClass(Text.class);
       job.setMapOutputValueClass(IntWritable.class);
       //3.reduce
       job.setReducerClass(WordCountReducer.class);
       job.setMapOutputKeyClass(Text.class);
       job.setOutputValueClass(IntWritable.class);
       //output
       Path outPath = new Path(args[1]);
       FileOutputFormat.setOutputPath(job, outPath);
       //submit job
       boolean isSuccess = job.waitForCompletion(true);
       return isSuccess ? 0 : 1;
   }
   //step rin program
   public static void main(String[] args) throws IOException, InterruptedException, Exception {
       int status = new WordCount().run(args);
       System.exit(status);
   }
}

posted on 2018-03-17 13:06 JETIME庚阅读(137) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

Wordcount

导航

公告