hadoop第一个程序WordCount
hadoop第一个程序WordCount
package test; import org.apache.hadoop.mapreduce.Job; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /* * 作者:翟超科 * 时间:2019.9.3 * 任务:mapreduce 实现单词计数 * */ public class WordCount { //map类继承Mapper,实现map功能 public static class doMapper extends Mapper<Object, Text, Text, IntWritable>{ //定义变量 one 为数字1 public static final IntWritable one = new IntWritable(1); //定义关键字变量 word public static Text word = new Text(); @Override protected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { //将hdfs上的文件按行分割放入tokenzer集合中 StringTokenizer tokenizer = new StringTokenizer(value.toString(),"\t"); //将每一行作为一个关键字 word.set(tokenizer.nextToken()); //每个关键字出现1次,将键值对写入缓存。 context.write(word, one); } } //reduce部分整合缓存的键值对, public static class doReduce extends Reducer<Text, IntWritable, Text, IntWritable>{ //定义每次读入的键值对的同键值对的个数 private IntWritable result = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int sum = 0;//定义每个键对应的值只用0个 for(IntWritable value:values) { sum += value.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Job job = Job.getInstance(); job.setJobName("WordCount"); job.setJarByClass(WordCount.class); job.setMapperClass(doMapper.class); job.setReducerClass(doReduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); Path in = new Path("hdfs://192.168.13.101:9000/data");//文件所处位置 Path out = new Path("hdfs://192.168.13.101:9000/output");//输出位置 FileInputFormat.addInputPath(job,in); FileOutputFormat.setOutputPath(job,out); System.exit(job.waitForCompletion(true) ? 0 : 1); } }