使用老版本的java api提交hadoop作业
还是使用之前的单词计数的例子
自定义Mapper类
import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; //自定义的Mapper类必须继承MapReduceBase 并且实现Mapper接口 public class JMapper extends MapReduceBase implements Mapper<LongWritable, Text, Text, LongWritable> { @Override public void map(LongWritable key, Text value, OutputCollector<Text, LongWritable> collector, Reporter reporter) throws IOException { String[] ss = value.toString().split("\t"); for (String s : ss) { //使用collector.collect而不是context.write collector.collect(new Text(s), new LongWritable(1)); } } }
自定义Reducer类
import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; //自定义的Reducer类必须继承MapReduceBase 并且实现Reducer接口 public class JReducer extends MapReduceBase implements Reducer<Text, LongWritable, Text, LongWritable> { @Override public void reduce(Text key, Iterator<LongWritable> value, OutputCollector<Text, LongWritable> collector, Reporter reporter) throws IOException { long sum = 0; //由于value不在可以用foreach循环,所以用while代替 while (value.hasNext()) { sum += value.next().get(); } collector.collect(key, new LongWritable(sum)); } }
运行提交代码的类JSubmit
import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; public class JSubmit { public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Path outPath = new Path("hdfs://localhost:9000/out"); Path inPath = new Path("/home/hadoop/word"); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(new URI("hdfs://localhost:9000"), conf); if (fs.exists(outPath)) { fs.delete(outPath, true); } // 使用JobConf 而不是Job JobConf job = new JobConf(conf, JSubmit.class); FileInputFormat.setInputPaths(job, inPath); job.setInputFormat(TextInputFormat.class); job.setMapperClass(JMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(JReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileOutputFormat.setOutputPath(job, outPath); job.setOutputFormat(TextOutputFormat.class); // 使用JobClient.runJob而不是job.waitForCompletion JobClient.runJob(job); } }
可以看到
其实老版本的api差别不大,只是用了少数几个类替换了而已
注意老版本api的类虽然和新版本api的类名字很多都是一模一样的
但是所在的包不同,老版本所在的包都是mapred的,而新版本的都在mapreduce