每日总结2023/9/27(mapreduce案例)
参考例文、
MapReduce经典案例实战_mapreduce编程案例-CSDN博客
map代码
package cn.com.sise.mapreduce.invertedindex; import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class InvertedIndexReducer extends Reducer<Text, Text, Text, Text> { private static Text result = new Text(); //输入: <MapReduce file3:2> //输出: <MapReduce file1:1;file2:1;file3:2;> @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //生成文档列表 String fileList = new String(); for (Text value : values) { fileList += value.toString() +";"; } result.set(fileList); context.write(key, result); } }
reduce代码
package cn.com.sise.mapreduce.invertedindex; import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class InvertedIndexReducer extends Reducer<Text, Text, Text, Text> { private static Text result = new Text(); //输入: <MapReduce file3:2> //输出: <MapReduce file1:1;file2:1;file3:2;> @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //生成文档列表 String fileList = new String(); for (Text value : values) { fileList += value.toString() +";"; } result.set(fileList); context.write(key, result); } }
runner代码
package cn.com.sise.mapreduce.invertedindex; import java.io.IOException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input. FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.yarn.service.api.records.Configuration; public class InvertedIndexDriver { /** * @param args * @throws InterruptedException * @throws IOException * @throws ClassNotFoundException */ public static void main(String[] args) throws ClassNotFoundException,IOException,InterruptedException{ Configuration conf = new Configuration(); Job job = Job.getInstance(); job.setJarByClass(InvertedIndexDriver.class); job.setMapperClass(InvertedIndexMapper.class); job.setCombinerClass(InvertedIndexCombiner.class); job.setReducerClass(InvertedIndexReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:9000/user/hadoop/inputdata")); //指定处理完成之后的结果所保存的位置 FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/user/hadoop/outputdata")); //向yarn集群提交这个job boolean res =job.waitForCompletion(true); System.exit(res? 0: 1); } }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南