Mapreduce实践

Posted on 2019-10-30 23:01  咳咳你  阅读(150)  评论(0编辑  收藏  举报

按教程配置好,按实验输入文本

 1 买家id 商品id 收藏日期
 2 10181 1000481 2010-04-04 16:54:31
 3 20001 1001597 2010-04-07 15:07:52
 4 20001 1001560 2010-04-07 15:08:27
 5 20042 1001368 2010-04-08 08:20:30
 6 20067 1002061 2010-04-08 16:45:33
 7 20056 1003289 2010-04-12 10:50:55
 8 20056 1003290 2010-04-12 11:57:35
 9 20056 1003292 2010-04-12 12:05:29
10 20054 1002420 2010-04-14 15:24:12
11 20055 1001679 2010-04-14 19:46:04
12 20054 1010675 2010-04-14 15:23:53
13 20054 1002429 2010-04-14 17:52:45
14 20076 1002427 2010-04-14 19:35:39
15 20054 1003326 2010-04-20 12:54:44
16 20056 1002420 2010-04-15 11:24:49
17 20064 1002422 2010-04-15 11:35:54
18 20056 1003066 2010-04-15 11:43:01
19 20056 1003055 2010-04-15 11:43:06
20 20056 1010183 2010-04-15 11:45:24
21 20056 1002422 2010-04-15 11:45:49
22 20056 1003100 2010-04-15 11:45:54
23 20056 1003094 2010-04-15 11:45:57
24 20056 1003064 2010-04-15 11:46:04
25 20056 1010178 2010-04-15 16:15:20
26 20076 1003101 2010-04-15 16:37:27
27 20076 1003103 2010-04-15 16:37:05
28 20076 1003100 2010-04-15 16:37:18
29 20076 1003066 2010-04-15 16:37:31
30 20054 1003103 2010-04-15 16:40:14
31 20054 1003100 2010-04-15 16:40:16
代码:
package mapreduce;
  
  import java.io.IOException;
  import java.util.StringTokenizer;
  import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.io.IntWritable;
  import org.apache.hadoop.io.Text;
  import org.apache.hadoop.mapreduce.Job;
  import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 
 
public class WordCount {
 
 public static class doMapper extends Mapper<Object, Text, Text, IntWritable>{
     //第一个object表示输入key的类型,第二个text表示输入value的类型;第三个text表示输出建的类型;
     //第四个INtWritable表示输出值的类型
     
 public static final IntWritable one = new IntWritable(1);
 public static Text word = new Text();
 @Override
 protected void map(Object key, Text value, Context context)
 //key value是输入的key value context是记录输入的key,value
 throws IOException, InterruptedException {
 StringTokenizer tokenizer = new StringTokenizer(value.toString(), "\t");
 //StringTokenizer是Java的工具包中的一个类,用于将字符串进行拆分
 word.set(tokenizer.nextToken());
//返回当前位置到下一个分隔符之间的字符串
 context.write(word, one);
 //讲word存到容器中计一个数 }
}
 public static class doReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
     //输入键类型,输入值类型 输出建类型,输出值类型
 private IntWritable result = new IntWritable();
 @Override
 protected void reduce(Text key, Iterable<IntWritable> values, Context context)
 throws IOException, InterruptedException {
 int sum = 0;
 for (IntWritable value : values) {
 sum += value.get();
 }
 result.set(sum);
context.write(key, result);
 }
 }
 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
 Job job = Job.getInstance();
 job.setJobName("WordCount");
 job.setJarByClass(WordCount.class);
 job.setMapperClass(doMapper.class);
 job.setReducerClass(doReducer.class);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);
 Path in = new Path("hdfs://localhost:9000/mymapreduce1/in/buyer_favorite1");
 Path out = new Path("hdfs://localhost:9000/mymapreduce1/out");
 FileInputFormat.addInputPath(job, in);
 FileOutputFormat.setOutputPath(job, out);
 System.exit(job.waitForCompletion(true) ? 0 : 1);    
 }
 }

mapreduce对表的直接显示让我很惊喜,少了很多终端语句的繁杂,可以直接查看数据的增删改查。