1. 编程
pom.xml文件
| <?xml version="1.0" encoding="UTF-8"?> |
| <project xmlns="http://maven.apache.org/POM/4.0.0" |
| xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
| <modelVersion>4.0.0</modelVersion> |
| |
| <groupId>org.example</groupId> |
| <artifactId>mapreduce_code</artifactId> |
| <version>1.0-SNAPSHOT</version> |
| |
| <properties> |
| <hadoop.version>2.8.5</hadoop.version> |
| </properties> |
| |
| <dependencies> |
| <dependency> |
| <groupId>org.apache.hadoop</groupId> |
| <artifactId>hadoop-common</artifactId> |
| <version>${hadoop.version}</version> |
| </dependency> |
| |
| <dependency> |
| <groupId>org.apache.hadoop</groupId> |
| <artifactId>hadoop-client</artifactId> |
| <version>${hadoop.version}</version> |
| </dependency> |
| |
| <dependency> |
| <groupId>junit</groupId> |
| <artifactId>junit</artifactId> |
| <version>4.12</version> |
| <scope>test</scope> |
| </dependency> |
| </dependencies> |
| </project> |
WCMapper
| package com.sxuek; |
| |
| import org.apache.hadoop.io.LongWritable; |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapreduce.Mapper; |
| import java.io.IOException; |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable> { |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| @Override |
| protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { |
| String[] s = value.toString().split(" "); |
| for (String str: s) { |
| |
| context.write(new Text(str), new LongWritable(1L)); |
| } |
| } |
| } |
WCReducer
| package com.sxuek; |
| |
| import org.apache.hadoop.io.LongWritable; |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapreduce.Reducer; |
| |
| import java.io.IOException; |
| |
| |
| |
| |
| |
| |
| public class WCReducer extends Reducer<Text, LongWritable, Text, LongWritable> { |
| |
| |
| |
| |
| |
| |
| |
| |
| @Override |
| protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { |
| long count = 0L; |
| for (LongWritable value: values) { |
| long num = value.get(); |
| count += num; |
| } |
| context.write(key, new LongWritable(count)); |
| } |
| } |
WCDriver驱动类的实现
| package com.sxuek; |
| |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.io.LongWritable; |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapreduce.Job; |
| import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; |
| import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
| |
| import java.io.IOException; |
| import java.net.URI; |
| import java.net.URISyntaxException; |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| public class WCDriver { |
| public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { |
| |
| |
| |
| |
| |
| Configuration conf = new Configuration(); |
| |
| conf.set("fs.defaultFS", "hdfs://node1:9000"); |
| |
| |
| Job job = Job.getInstance(conf); |
| |
| |
| job.setMapperClass(WCMapper.class); |
| |
| job.setMapOutputKeyClass(Text.class); |
| job.setMapOutputValueClass(LongWritable.class); |
| |
| |
| job.setReducerClass(WCReducer.class); |
| job.setOutputKeyClass(Text.class); |
| job.setOutputValueClass(LongWritable.class); |
| |
| |
| |
| FileInputFormat.setInputPaths(job, new Path("/wc.txt"), new Path("/p/wc.txt")); |
| |
| |
| |
| |
| Path path = new Path("/output"); |
| FileSystem fs = FileSystem.get(new URI("hdfs://node1:9000"), conf, "root"); |
| if (fs.exists(path)) { |
| fs.delete(path, true); |
| } |
| FileOutputFormat.setOutputPath(job, path); |
| |
| |
| boolean flag = job.waitForCompletion(true); |
| if (flag) { |
| System.out.println("成功"+flag); |
| } else { |
| System.out.println("失败"+flag); |
| } |
| } |
| } |
运行
方式一 直接运行驱动类,成功
方式二 将项目打成jar包,在linux中用命令执行
| # 将项目打成jar包,上传 |
| [root@node1 data]# rz |
| |
| [root@node1 data]# ll |
| 总用量 12 |
| -rw-r--r--. 1 root root 5219 7月 25 10:51 mapreduce_code-1.0-SNAPSHOT.jar |
| -rw-r--r--. 1 root root 108 7月 25 09:32 wc.txt |
| |
| # 运行jar包 |
| [root@node1 data]# hadoop jar mapreduce_code-1.0-SNAPSHOT.jar com.sxuek.WCDriver |
| |
| # 报错找不到Error: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class com.sxuek.WCMapper not found |
| # 解决:在WCDriver类中添加如下代码 |
| job.setJarByClass(WCDriver.class); |
| |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?