每日总结2023/9/27(mapreduce案例)
参考例文、
MapReduce经典案例实战_mapreduce编程案例-CSDN博客
map代码
package cn.com.sise.mapreduce.invertedindex; import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class InvertedIndexReducer extends Reducer<Text, Text, Text, Text> { private static Text result = new Text(); //输入: <MapReduce file3:2> //输出: <MapReduce file1:1;file2:1;file3:2;> @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //生成文档列表 String fileList = new String(); for (Text value : values) { fileList += value.toString() +";"; } result.set(fileList); context.write(key, result); } }
reduce代码
package cn.com.sise.mapreduce.invertedindex; import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class InvertedIndexReducer extends Reducer<Text, Text, Text, Text> { private static Text result = new Text(); //输入: <MapReduce file3:2> //输出: <MapReduce file1:1;file2:1;file3:2;> @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { //生成文档列表 String fileList = new String(); for (Text value : values) { fileList += value.toString() +";"; } result.set(fileList); context.write(key, result); } }
runner代码
package cn.com.sise.mapreduce.invertedindex; import java.io.IOException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input. FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.yarn.service.api.records.Configuration; public class InvertedIndexDriver { /** * @param args * @throws InterruptedException * @throws IOException * @throws ClassNotFoundException */ public static void main(String[] args) throws ClassNotFoundException,IOException,InterruptedException{ Configuration conf = new Configuration(); Job job = Job.getInstance(); job.setJarByClass(InvertedIndexDriver.class); job.setMapperClass(InvertedIndexMapper.class); job.setCombinerClass(InvertedIndexCombiner.class); job.setReducerClass(InvertedIndexReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:9000/user/hadoop/inputdata")); //指定处理完成之后的结果所保存的位置 FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/user/hadoop/outputdata")); //向yarn集群提交这个job boolean res =job.waitForCompletion(true); System.exit(res? 0: 1); } }