12-3实验
public static class WorldCount_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{ @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { System.out.println("split:<" + key + ","+ value + ">" ); String[] strs = value.toString().split(" "); for (String string : strs) { System.out.println("map:<" + key + ","+ value + ">" ); context.write(new Text(string),new IntWritable(1)); } } }
public static class WorldCount_Reducer extends Reducer<Text, IntWritable, Text, IntWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int index = 0; for (IntWritable intWritable : values) { System.out.println("reduce:<" + key + ","+ intWritable + ">" ); index += intWritable.get(); } context.write(key,new IntWritable(index)); } }
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Job job = Job.getInstance(); job.setJarByClass(WorldCount.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(WorldCount_Mapper.class); job.setReducerClass(WorldCount_Reducer.class); FileInputFormat.addInputPath(job,new Path("hdfs://192.168.100.123:8020/input")); FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.100.123:8020/output")); job.waitForCompletion(true); }
package hadoop.mapreduce; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class MyWordCount { /* * KEYIN:是map阶段输入的key(偏移量) * VALUEIN:是map阶段输入的value(文本文件的内容--行) * KEYOUT:是map阶段输出的key(单词) * VALUEOUT:是map阶段输出的value(单词的计数--1) * * Java基本数据类型: * int、short、long、double、float、char、boolean、byte * hadoop数据类型 * IntWritable、ShortWritable、LongWritable、DoubleWritable、FloatWritable * ByteWritable、BooleanWritable、NullWritable、Text * Text:使用utf8编码的文本类型 */ public static class WordCount_Mapper extends Mapper<LongWritable, Text, Text, IntWritable>{ @Override //方法的重写 protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { String[] line = value.toString().split(" "); //将获取到的数据以空格进行切分成一个个单词 for (String word : line) { //遍历单词的数组 context.write(new Text(word), new IntWritable(1)); //单词进行计数,将中间结果写入context } } } /* * KEYIN:reduce阶段输入的key(单词) * VALUEIN:reduce阶段输入的value(单词的计数) * KEYOUT:reduce阶段输出的key(单词) * VALUEOUT:reduce阶段输出的value(单词计数的总和) * * reduce方法中做以下修改: * 将Text arg0改为Text key * 将Iterable<IntWritable> arg1改为Iterable<IntWritable> value * 将Context arg2修改为Context context */ public static class WordCount_Reducer extends Reducer<Text, IntWritable, Text, IntWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { int sum = 0; //创建一个变量,和 for (IntWritable intWritable : values) { //遍历相同key单词的计数 sum += intWritable.get(); //将相同key单词的计数进行累加 } context.write(key, new IntWritable(sum)); //将计算的结果写入context } } //提交工作 public static void main(String[] args) throws Exception { String inPath= "hdfs://192.168.182.10:8020/input.txt"; String outPath = "hdfs://192.168.182.10:8020/output/"; Configuration conf = new Configuration(); Job job = Job.getInstance(); //创建Job对象job FileSystem fs = FileSystem.get(conf); if (fs.exists(new Path(outPath))) { fs.delete(new Path(outPath), true); } job.setJarByClass(MyWordCount.class); //设置运行的主类MyWordCount job.setMapperClass(WordCount_Mapper.class); //设置Mapper的主类 job.setReducerClass(WordCount_Reducer.class); //设置Reduce的主类 job.setOutputKeyClass(Text.class); //设置输出key的类型 job.setOutputValueClass(IntWritable.class); //设置输出value的类型 //设置文件的输入路径(根据自己的IP和HDFS地址设置) FileInputFormat.addInputPath(job, new Path(inPath)); //设置计算结果的输出路径(根据自己的IP和HDFS地址设置) FileOutputFormat.setOutputPath(job, new Path(outPath)); System.exit((job.waitForCompletion(true)?0:1)); //提交任务并等待任务完成 } }