BigInsights创建一个WordCount程序

首先得有一台部署好了的BigInsights服务器

一、安装插件（注意Eclipse的版本，这里是Juno-4.2）

Eclipse -> Help -> Install New Software -> Add

填写插件名与插件下载地址，下载地址可以根据服务器引导获取

在浏览器中 http:// 加上服务器IP与8080端口号进入服务器，找到引导，如下图：

二、新建工程

Eclipse -> New -> BigInsights -> BigInsights Project

Eclipse -> New -> BigInsights -> Java MapReduce Program

假如创建了一个工程aaaaaa，想把WordCount程序放在这个项目中，就如下图：

1. Mapper类

后面四个选项表示输入与输出的<K, V>类型，WordCount程序通常是Text为输入，IntWritable为输出

2. Reducer类

3. Driver类

三、编码（注意包名）

1. Mapper

package znufe.wordcount;

import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Mapper;

public class WordMapper extends Mapper<Object, Text, Text, IntWritable> {

    private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();
    @Override
    public void map(Object key, Text value, Context context)
            throws IOException, InterruptedException {
        StringTokenizer itr = new StringTokenizer(value.toString());
        while (itr.hasMoreTokens()) {
            word.set(itr.nextToken());
            context.write(word,  one);
        }
    }

}

2. Reducer

package znufe.wordcount;

import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;

public class WordReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    private IntWritable result = new IntWritable();
    public void reduce(Text key, Iterable<IntWritable> values, Context context)
            throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable val : values) {
            sum += val.get();
        }
        result.set(sum);
        context.write(key, result);
    }

}

3. Driver

package znufe.wordcount;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class WordMain {

    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        // Use programArgs array to retrieve program arguments.
        String[] programArgs = new GenericOptionsParser(conf, args)
                .getRemainingArgs();
        /**
         * 这里必须有输入和输出
         */
        if (programArgs.length != 2) {
            System.out.println("Usage: wordcount <in> <out>");
            System.exit(2);
        }
        Job job = new Job(conf, "word count");
        job.setJarByClass(WordMain.class);              //主类
        job.setMapperClass(WordMapper.class);           //Mapper
        job.setCombinerClass(WordReducer.class);        //作业合成类
        job.setReducerClass(WordReducer.class);         //Reducer

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        // TODO: Update the input path for the location of the inputs of the map-reduce job.
        FileInputFormat.addInputPath(job, new Path(programArgs[0]));
        // TODO: Update the output path for the output directory of the map-reduce job.
        FileOutputFormat.setOutputPath(job, new Path(programArgs[1]));

        // Submit the job and wait for it to finish.
        //job.waitForCompletion(true);
        // Submit and return immediately: 
        // job.submit();
        System.exit(job.waitForCompletion(true) ? 0 : 1);   //等待完成退出
    }

}