hadoop2.7.2 wordcount案例
1.配置pom.xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | <!--依赖--> <dependencies> <dependency> <groupId>junit< /groupId > <artifactId>junit< /artifactId > <version>4.12< /version > < /dependency > <dependency> <groupId>org.apache.hadoop< /groupId > <artifactId>hadoop-common< /artifactId > <version>2.7.2< /version > < /dependency > <dependency> <groupId>org.apache.hadoop< /groupId > <artifactId>hadoop-client< /artifactId > <version>2.7.2< /version > < /dependency > <dependency> <groupId>org.apache.hadoop< /groupId > <artifactId>hadoop-hdfs< /artifactId > <version>2.7.2< /version > < /dependency > <dependency> <groupId>org.apache.logging.log4j< /groupId > <artifactId>log4j-core< /artifactId > <version>2.19.0< /version > < /dependency > <!--打包--> <build> <plugins> <plugin> <artifactId>maven-compiler-plugin< /artifactId > <version>3.8.1< /version > <configuration> < source >1.8< /source > <target>1.8< /target > < /configuration > < /plugin > <plugin> <artifactId>maven-assembly-plugin < /artifactId > <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies< /descriptorRef > < /descriptorRefs > <archive> <manifest> <mainClass>com.hxy.mr.wordcount.WordCountDriver< /mainClass > < /manifest > < /archive > < /configuration > <executions> <execution> < id > make -assembly< /id > <phase>package< /phase > <goals> <goal>single< /goal > < /goals > < /execution > < /executions > < /plugin > < /plugins > < /build > |
2.mapper
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | package com.hxy.mr.wordcount; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; public class WordcountMapper extends Mapper<LongWritable, Text,Text, IntWritable>{ Text k = new Text(); IntWritable v = new IntWritable( 1 ); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { //获取第一行数据 String line = value.toString(); //切割单词 String[] words = line.split( " " ); //循环写出 for (String word : words) { k.set(word); context.write(k,v); //写入缓冲区 } } } |
3.reducer
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | package com.hxy.mr.wordcount; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class WordCountReducer extends Reducer<Text, IntWritable,Text,IntWritable> { IntWritable v = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { //值求和 int sum= 0 ; for (IntWritable value : values) { sum += value.get(); } v.set(sum); context.write(key,v); } } |
4.driver
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | package com.hxy.mr.wordcount; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Cluster; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class WordCountDriver { private static Configuration conf = new Configuration(); public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { //获取job对象 Job job = Job.getInstance(conf); //设置jar位置 job.setJarByClass(WordCountDriver. class ); //关联mapper与reducer job.setMapperClass(WordcountMapper. class ); job.setReducerClass(WordCountReducer. class ); //设置mapper的输出k、v类型 job.setMapOutputKeyClass(Text. class ); job.setMapOutputValueClass(IntWritable. class ); //设置最终数据输出的k、v类型 job.setOutputKeyClass(Text. class ); job.setOutputValueClass(IntWritable. class ); //设置程序运行的输入数据与输出数据地址 FileInputFormat.setInputPaths(job, new Path(args[ 0 ])); FileOutputFormat.setOutputPath(job, new Path(args[ 1 ])); //提交任务 job.waitForCompletion( true ); } } |
5.idea运行环境
6.log4j.properties
1 2 3 4 5 6 7 8 9 | #????DEBUG????????console?file???????console?file????????? log4j.rootLogger=DEBUG,console #?????????? log4j.appender.console = org.apache.log4j.ConsoleAppender log4j.appender.console.Target = System.out log4j.appender.console.Threshold=DEBUG log4j.appender.console.layout = org.apache.log4j.PatternLayout log4j.appender.console.layout.ConversionPattern=[%c]-%m%n |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!