Hadoop MapReduce入门
一:配置pom
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-core</artifactId> <version>1.0.1</version> </dependency>
二:测试代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | package com.jachs.hadoop; import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; /*** * * @author zhanchaohan * */ public class WordCount { /*输入文件地址,测试内容如下: * ------------------------ Hello Workd Bye World Hello Hadoop Goodbye Hadoop Hello Workd Bye World And Hello Hadoop Goodbye Hadoop ------------------------ */ private static final String InputFile= "/usr/jachs/hadoop/A" ; //输出路径 private static final String OutDir= "/usr/jachs/hadoop/B" ; //将读取文件的内容切割出每个单词,标记数量为1,<word,1>形式,然后交给Reduce处理 public static class Map extends MapReduceBase implements Mapper<LongWritable,Text,Text,IntWritable>{ private final static IntWritable one= new IntWritable( 1 ); private Text word= new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line=value.toString(); StringTokenizer token= new StringTokenizer(line); while (token.hasMoreTokens()) { word.set(token.nextToken()); output.collect(value, one); } } } //Reduce简单将数值累计求和 public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable>{ public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum= 0 ; while (values.hasNext()) { sum+=values.next().get(); } output.collect(key, new IntWritable(sum)); } } public static void main(String[] args) throws IOException { JobConf conf= new JobConf(WordCount. class ); conf.setJobName( "wordCount" ); conf.setOutputKeyClass(Text. class ); conf.setOutputValueClass(IntWritable. class ); conf.setMapperClass(Map. class ); conf.setReducerClass(Reduce. class ); conf.setInputFormat(TextInputFormat. class ); conf.setOutputFormat(TextOutputFormat. class ); FileInputFormat.setInputPaths(conf, new Path(InputFile)); FileOutputFormat.setOutputPath(conf, new Path(OutDir)); JobClient.runJob(conf); } } |
三:执行windows下执行会报异常需要修改源码。返回结构如下图打印每行单词个数
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek “源神”启动!「GitHub 热点速览」
· 微软正式发布.NET 10 Preview 1:开启下一代开发框架新篇章
· C# 集成 DeepSeek 模型实现 AI 私有化(本地部署与 API 调用教程)
· DeepSeek R1 简明指南:架构、训练、本地部署及硬件要求
· 2 本地部署DeepSeek模型构建本地知识库+联网搜索详细步骤