Hadoop--map/reduce实现单词计数
1 import org.apache.hadoop.fs.Path; 2 import org.apache.hadoop.io.*; 3 import org.apache.hadoop.mapred.*; 4 5 import java.io.IOException; 6 import java.util.*; 7 8 public class WordCount { 9 10 /* 11 * 实现输入内容单词的计数功能 12 * 一、mapper方法将输入内容处理为<key1,value1>形式 13 * 二、reduce方法接收mapper的结果,将相同key1的value值相加得到单词的个数 14 * 三、输出得到的结果到hdfs中 15 * 16 * */ 17 18 //main函数 19 public static void main(String[] args) throws Exception{ 20 JobConf conf=new JobConf(WordCount.class); 21 conf.setJobName("WordCount"); 22 conf.setOutputKeyClass(Text.class); 23 conf.setOutputValueClass(IntWritable.class); 24 25 conf.setMapperClass(Map.class); 26 conf.setReducerClass(Reduce.class); 27 28 conf.setInputFormat(TextInputFormat.class); 29 conf.setOutputFormat(TextOutputFormat.class); 30 31 FileInputFormat.setInputPaths(conf,new Path(args[0])); 32 FileOutputFormat.setOutputPath(conf, new Path(args[1])); 33 34 JobClient.runJob(conf); 35 36 } 37 38 //map函数 39 public static class Map extends MapReduceBase implements Mapper<LongWritable,Text, 40 Text,IntWritable>{ 41 private final static IntWritable one=new IntWritable(1); 42 private Text word=new Text(); 43 44 public void map(LongWritable key,Text value, 45 OutputCollector<Text,IntWritable>output,Reporter reporter)throws IOException{ 46 String line=value.toString(); 47 StringTokenizer tokenizer=new StringTokenizer(line); 48 while(tokenizer.hasMoreTokens()){ 49 word.set(tokenizer.nextToken()); 50 output.collect(word, one); 51 52 } 53 54 } 55 56 } 57 58 //reduce函数 59 public static class Reduce extends MapReduceBase implements Reducer<Text,IntWritable, 60 Text,IntWritable>{ 61 public void reduce(Text key,Iterator<IntWritable>values,OutputCollector<Text, 62 IntWritable>output,Reporter repoter) throws IOException{ 63 int sum=0; 64 while(values.hasNext()){ 65 sum+=values.next().get(); 66 } 67 output.collect(key,new IntWritable(sum)); 68 } 69 } 70 71 72 73 }
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步