ssslinppp

  博客园 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 订阅 :: 管理 ::
下面是WordCount.java类
  1. package com.ll;
  2. import java.io.IOException;
  3. import java.util.Iterator;
  4. import java.util.StringTokenizer;
  5. import org.apache.hadoop.fs.Path;
  6. import org.apache.hadoop.io.IntWritable;
  7. import org.apache.hadoop.io.LongWritable;
  8. import org.apache.hadoop.io.Text;
  9. import org.apache.hadoop.mapred.FileInputFormat;
  10. import org.apache.hadoop.mapred.FileOutputFormat;
  11. import org.apache.hadoop.mapred.JobClient;
  12. import org.apache.hadoop.mapred.JobConf;
  13. import org.apache.hadoop.mapred.MapReduceBase;
  14. import org.apache.hadoop.mapred.Mapper;
  15. import org.apache.hadoop.mapred.OutputCollector;
  16. import org.apache.hadoop.mapred.Reducer;
  17. import org.apache.hadoop.mapred.Reporter;
  18. import org.apache.hadoop.mapred.TextInputFormat;
  19. import org.apache.hadoop.mapred.TextOutputFormat;
  20. public class WordCount {
  21. public static class Map extends MapReduceBase implements
  22. Mapper<LongWritable, Text, Text, IntWritable> {
  23. private final static IntWritable one = new IntWritable(1);
  24. private Text word = new Text();
  25. public void map(LongWritable key, Text value,
  26. OutputCollector<Text, IntWritable> output, Reporter reporter)
  27. throws IOException {
  28. String line = value.toString();
  29. StringTokenizer tokenizer = new StringTokenizer(line);
  30. while (tokenizer.hasMoreTokens()) {
  31. word.set(tokenizer.nextToken());
  32. output.collect(word, one);
  33. }
  34. }
  35. }
  36. public static class Reduce extends MapReduceBase implements
  37. Reducer<Text, IntWritable, Text, IntWritable> {
  38. public void reduce(Text key, Iterator<IntWritable> values,
  39. OutputCollector<Text, IntWritable> output, Reporter reporter)
  40. throws IOException {
  41. int sum = 0;
  42. while (values.hasNext()) {
  43. sum += values.next().get();
  44. }
  45. output.collect(key, new IntWritable(sum));
  46. }
  47. }
  48. public static void main(String[] args) throws Exception {
  49. JobConf conf = new JobConf(WordCount.class);
  50. conf.setJobName("wordcount");
  51. conf.setOutputKeyClass(Text.class);
  52. conf.setOutputValueClass(IntWritable.class);
  53. conf.setMapperClass(Map.class);
  54. conf.setCombinerClass(Reduce.class);
  55. conf.setReducerClass(Reduce.class);
  56. conf.setInputFormat(TextInputFormat.class);
  57. conf.setOutputFormat(TextOutputFormat.class);
  58. FileInputFormat.setInputPaths(conf, new Path(args[0]));
  59. FileOutputFormat.setOutputPath(conf, new Path(args[1]));
  60. JobClient.runJob(conf);
  61. }
  62. }

使用Linux命令行编译java代码,并生成jar包
  1. mkdir wordcount_classes
  2. javac -classpath ${HADOOP_HOME}/hadoop-${HADOOP_VERSION}-core.jar -d wordcount_classes/ WordCount.java
  3. jar -cvf /usr/joe/wordcount121.jar -C wordcount_classes/ .
说明:
  • javac :用于编译java--->class文件;
  • -classpath:后面指定依赖的jara包存储的位置;
  • -d:指定输出目录;








posted on 2016-10-09 16:19  ssslinppp  阅读(7533)  评论(0编辑  收藏  举报