Hadoop 求单词count数
package com.yw.hadoop273; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /** * @Auther: YW * @Date: 2019/9/18 20:58 * @Description: */ public class WCWordCount extends Mapper<LongWritable, Text, Text, IntWritable> { /* * Mapper
* 把单词分割出来 * */ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Text keyOut = new Text(); IntWritable valueOut = new IntWritable(); String[] arr = value.toString().split(""); for (String s : arr) { keyOut.set(s); valueOut.set(1); context.write(keyOut,valueOut); } } }
package com.yw.hadoop273;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
* @Auther: YW
* @Date: 2019/9/18 21:20
* @Description:
*/
public class WCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
/***
* 聚合
*/
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count=0;
for (IntWritable value : values) {
count = count + value.get();
}
context.write(key,new IntWritable(count));
}
}
package com.yw.hadoop273; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; /** * @Auther: YW * @Date: 2019/9/16 21:20 * @Description: */ public class WCApp { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); // 删除已有的目录 if (args.length>1){ FileSystem.get(conf).delete(new Path(args[1])); } Job job = Job.getInstance(conf); // 设置job属性 job.setJobName("WCApp"); // 设置作业名称 job.setJarByClass(WCApp.class); // 设置搜索类 job.setInputFormatClass(TextInputFormat.class);// 设置输入格式 FileInputFormat.addInputPath(job,new Path(args[0])); // 输入路径 FileOutputFormat.setOutputPath(job,new Path(args[1]));// 输出路径 job.setMapperClass(WCWordCount.class); // 设置mapper 类 job.setReducerClass(WCReducer.class); // 设置reducer类 job.setNumReduceTasks(1); // reducer个数 job.setMapOutputKeyClass(Text.class); job.setMapOutputKeyClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputKeyClass(IntWritable.class); } }