YARN集群的mapreduce测试(一)
hadoop集群搭建中配置了mapreduce的别名是yarn
[hadoop@master01 hadoop]$ mv mapred-site.xml.template mapred-site.xml
[hadoop@master01 hadoop]$ vi mapred-site.xml
<property> <name>mapreduce.framework.name </name> <value>yarn</value> </property>
单词分类计数可以联系到sql语句的分组进行理解;
根据key设置的不同来进行计数,再传递给reduceTask按照设定的key值进行汇总;
测试准备:
首先同步时间,然后master先开启hdfs集群,再开启yarn集群;用jps查看:
master上: 先有NameNode、SecondaryNameNode;再有ResourceManager;
slave上: 先有DataNode;再有NodeManager;
如果master启动hdfs和yarn成功,但是slave节点有的不成功,则可以使用如下命令手动启动:
hadoop-daemon.sh start datanode |
yarn-daemon.sh start nodemanager |
在本地创建几个txt文件,并上传到集群的"/data/wordcount/src"目录下;
单词计数:
工程结构图:
代码:大数据学习交流QQ群:217770236 让我们一起学习大数据
1 package com.mmzs.bigdata.yarn.mapreduce; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.io.LongWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Mapper; 8 import org.apache.hadoop.mapreduce.lib.input.FileSplit; 9 10 11 /** 12 * 这个是Mapper类,每一个Mapreduce作业必须存在Mapper类,Reduce类则是可选; 13 * Mapper类的主要作用是完成数据的筛选和过滤 14 * 15 * 自定义的Mapper类必须继承于Hadoop提供的Mapper类,并重写其中的方法完成MapTask 16 * 超类Mapper的泛型参数从左到右依次表示: 17 * 读取记录的键类型、读取记录的值类型、写出数据的键类型、写出数据的值类型 18 * 19 * Hadoop官方提供了一套基于高效网络IO传送的数据类型(如:LongWritable、Text等), 20 * 数据类型于java中原生的数据类型相对应,比如:LongWritable即为Long类型、Text即为String类型 21 * 22 * Hadoop的数据类型转换为Java类型只需要调用get方法即可(特例:Text转换为String类型调用toString) 23 * Java数据类型转换为Hadoop类型只需要使用构造方法包装即可,如: 24 * Long k = 10L; 25 * LongWritable lw = new LongWritable(k); 26 * 27 * @author hadoop 28 * 29 */ 30 public class WordCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> { 31 private Text outKey; 32 private LongWritable outValue; 33 /** 34 * 这是Mapper类的实例初始化方法,每一个MapTask对应一个Mapper实例, 35 * 每一个Mapper类被实例化之后将首先调用setup方法完成初始化操作, 36 * 对于每一个MapTask,setup方法有且仅被调用一次; 37 */ 38 @Override 39 protected void setup(Mapper<LongWritable, Text, Text, LongWritable>.Context context) 40 throws IOException, InterruptedException { 41 outKey = new Text(); 42 outValue = new LongWritable(); 43 } 44 45 46 /** 47 * 此方法在setup方法之后,cleanup方法之前调用,此方法会被调用多次,被处理的文件中的每一条记录都会调用一次该方法; 48 * 第一个参数:key 代表所读取记录相对于文件开头的起始偏移量(单位:byte) 49 * 第二个参数:value 代表所读取到的记录内容本身 50 * 第三个参数:contex 记录迭代过程的上下文 51 */ 52 @Override 53 protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) 54 throws IOException, InterruptedException { 55 56 FileSplit fp = (FileSplit) context.getInputSplit(); 57 String fileName = fp.getPath().getName(); 58 // int i = fileName.lastIndexOf("."); 59 // String fileNameSimple = fileName.substring(0, 1); 60 61 String line = value.toString(); 62 String[] words = line.split("\\s+"); 63 for (String word : words) { 64 outKey.set(fileName+":: "+word); 65 outValue.set(1); 66 context.write(outKey, outValue); 67 } 68 } 69 70 /** 71 * 这是Mapper类的实例销毁方法, 72 * 每一个Mapper类的实例将数据处理完成之后,于对象销毁之前有且仅调用一次cleanup方法 73 */ 74 @Override 75 protected void cleanup(Mapper<LongWritable, Text, Text, LongWritable>.Context context) 76 throws IOException, InterruptedException { 77 outKey = null; 78 outValue = null; 79 } 80 81 }
1 package com.mmzs.bigdata.yarn.mapreduce; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.io.LongWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Reducer; 8 9 /** 10 * 这是Reducer类,该类是可选的,不是必须的;一般在需要统计和分组的业务中都存在Reducer类; 11 * Reducer类产生的实例被ReducerText所调用,ReducerText,任务结束之后Reducer实例被销毁 12 * 13 * 四个泛型参数从左到右依次表示: 14 * 读取记录的键类型(读取到的记录来自于MapTask的输出) 15 * 读取记录的值类型 16 * 读出记录的键类型 17 * 读出记录的值类型 18 * 19 * 有ReducerText的MapReducer作业,其ReducerText的输出结果作为整个Job的最终输出结果 20 * 没有ReducerText的MapReducer作业,其MapText的输出结果作为整个Job的最终输出结果 21 * 22 * @author hadoop 23 * 24 */ 25 public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> { 26 private LongWritable outValue; //将需要多次使用的对象定义为全局变量 27 /** 28 * 用于Reducer实例的初始化: 29 * 在Reducer类被实例化之后,首先调用此方法,该方法有且仅被调用一次, 30 */ 31 @Override 32 protected void setup(Reducer<Text, LongWritable, Text, LongWritable>.Context context) 33 throws IOException, InterruptedException { 34 outValue = new LongWritable();//在此处进行一次初始化 35 } 36 37 /** 38 * 此方法是迭代方法,该方法会针对每条记录被调用一次 39 * key: MapTask的输出键 40 * values: MapTask输出值集合 41 * context: reduceTask运行的上下文 42 */ 43 @Override 44 protected void reduce(Text key, Iterable<LongWritable> values, 45 Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException { 46 Long sum = 0L; 47 for (LongWritable count : values) { 48 sum += count.get();//将关键词相同的循环遍历相加 49 } 50 outValue.set(sum); 51 context.write(key, outValue); 52 } 53 54 /** 55 * 用于Reducer实例销毁之前处理的工作: 56 * 该方法有且仅被调用一次 57 */ 58 @Override 59 protected void cleanup(Reducer<Text, LongWritable, Text, LongWritable>.Context context) 60 throws IOException, InterruptedException { 61 outValue = null; //用完之后进行销毁 62 } 63 64 }
1 package com.mmzs.bigdata.yarn.mapreduce; 2 3 import java.io.IOException; 4 import java.net.URI; 5 import java.net.URISyntaxException; 6 7 import org.apache.hadoop.conf.Configuration; 8 import org.apache.hadoop.fs.FileSystem; 9 import org.apache.hadoop.fs.Path; 10 import org.apache.hadoop.io.LongWritable; 11 import org.apache.hadoop.io.Text; 12 import org.apache.hadoop.mapreduce.Job; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 16 public class WordCountDriver { 17 18 private static FileSystem fs; 19 private static Configuration conf; 20 static { 21 String uri = "hdfs://master01:9000/"; 22 conf = new Configuration(); 23 try { 24 fs = FileSystem.get(new URI(uri), conf, "hadoop"); 25 } catch (IOException e) { 26 e.printStackTrace(); 27 } catch (InterruptedException e) { 28 e.printStackTrace(); 29 } catch (URISyntaxException e) { 30 e.printStackTrace(); 31 } 32 } 33 34 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 35 if (null==args || args.length<2) return; 36 //放置需要处理的数据所在的HDFS路径 37 Path inputPath = new Path(args[0]); 38 //放置Job作业执行完成之后其处理结果的输出路径 39 Path outputPath = new Path(args[1]); 40 41 //如果输入目录已经存在,则将其删除并重建 42 if (!fs.exists(inputPath)) { 43 return; 44 } 45 if (fs.exists(outputPath)) { 46 fs.delete(outputPath, true);//true表示递归删除 47 } 48 //fs.mkdirs(outputPath); 49 50 //获取Job实例 51 Job wcJob = Job.getInstance(conf, "WordCountJob"); 52 //设置运行此jar包入口类 53 //wcJob的入口是WordCountDriver类 54 wcJob.setJarByClass(WordCountDriver.class); 55 //设置Job调用的Mapper类 56 wcJob.setMapperClass(WordCountMapper.class); 57 //设置Job调用的Reducer类(如果一个Job没有Reducer则可以不调用此条语句) 58 wcJob.setReducerClass(WordCountReducer.class); 59 60 //设置MapTask的输出键类型 61 wcJob.setMapOutputKeyClass(Text.class); 62 //设置MapTask的输出值类型 63 wcJob.setMapOutputValueClass(LongWritable.class); 64 65 //设置整个Job的输出键类型(如果一个Job没有Reducer则可以不调用此条语句) 66 wcJob.setOutputKeyClass(Text.class); 67 //设置整个Job的输出值类型(如果一个Job没有Reducer则可以不调用此条语句) 68 wcJob.setOutputValueClass(LongWritable.class); 69 70 //设置整个Job需要处理数据的输入路径 71 FileInputFormat.setInputPaths(wcJob, inputPath); 72 //设置整个Job计算结果的输出路径 73 FileOutputFormat.setOutputPath(wcJob, outputPath); 74 75 //提交Job到集群并等待Job运行完成,参数true表示将Job运行时的状态信息返回到客户端 76 boolean flag = wcJob.waitForCompletion(true); 77 System.exit(flag?0:1); 78 } 79 }
运行时传入参数是:
如果在eclipse上运行:传参需要加上集群的master的uri即 hdfs://master01:9000
输入路径参数: /data/wordcount/src
输出路径参数: /data/wordcount/dst
运行结果:
1、出现第一张图的结果表示有可能成功了,因为成功创建了输出目录;
2、进入part-r-00000查看内容,确认的确成功;
单词计数(按文件统计):
只需要将单词计数的代码中的WordCountMapper类中的map方法添加如下代码片段:
FileSplit fp=(FileSplit)context.getInputSplit();
String fileName=fp.getPath().getName();
在给outKey设置值时就需要传“word+"\t"+filename”;
运行时传入参数是:
如果在eclipse上运行:传参需要加上集群的master的uri即 hdfs://master01:9000
输入路径参数: /data/wordcount/src
输出路径参数: /data/wordcount/dst
运行结果:
单词计数(每个文件中的出现次数):
工程结构图:
代码:
1 package com.mmzs.bigdata.yarn.mapreduce; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.io.LongWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Mapper; 8 import org.apache.hadoop.mapreduce.lib.input.FileSplit; 9 10 public class WordTimeMapper01 extends Mapper<LongWritable, Text, Text, LongWritable>{ 11 12 private Text outKey; 13 private LongWritable outValue; 14 15 @Override 16 protected void setup(Mapper<LongWritable, Text, Text, LongWritable>.Context context) 17 throws IOException, InterruptedException { 18 outKey = new Text(); 19 outValue = new LongWritable(1L); 20 } 21 22 @Override 23 protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) 24 throws IOException, InterruptedException { 25 FileSplit fp= (FileSplit) context.getInputSplit(); 26 String fileName = fp.getPath().getName(); 27 28 String line = value.toString(); 29 String[] words = line.split("\\s+"); 30 31 for (String word : words) { 32 outKey.set(word+"\t"+fileName); 33 context.write(outKey, outValue); 34 } 35 36 } 37 38 @Override 39 protected void cleanup(Mapper<LongWritable, Text, Text, LongWritable>.Context context) 40 throws IOException, InterruptedException { 41 outKey = null; 42 outValue = null; 43 } 44 45 }
1 package com.mmzs.bigdata.yarn.mapreduce; 2 3 import java.io.IOException; 4 import java.util.Iterator; 5 6 import org.apache.hadoop.io.LongWritable; 7 import org.apache.hadoop.io.Text; 8 import org.apache.hadoop.mapreduce.Reducer; 9 import org.apache.hadoop.mapreduce.lib.input.FileSplit; 10 11 public class WordTimeReducer01 extends Reducer<Text, LongWritable, Text, LongWritable> { 12 13 private LongWritable outValue; 14 @Override 15 protected void setup(Reducer<Text, LongWritable, Text, LongWritable>.Context context) 16 throws IOException, InterruptedException { 17 outValue = new LongWritable(); 18 } 19 20 @Override 21 protected void reduce(Text key, Iterable<LongWritable> values, 22 Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException { 23 24 long count = 0L; 25 for (Iterator<LongWritable> its = values.iterator(); its.hasNext();) { 26 count += its.next().get(); 27 } 28 outValue.set(count); 29 context.write(key, outValue);//key和outValue默认用\t分割 30 31 } 32 33 @Override 34 protected void cleanup(Reducer<Text, LongWritable, Text, LongWritable>.Context context) 35 throws IOException, InterruptedException { 36 outValue = null; 37 } 38 39 }
1 package com.mmzs.bigdata.yarn.mapreduce; 2 3 import java.io.IOException; 4 import java.net.URI; 5 import java.net.URISyntaxException; 6 7 import org.apache.hadoop.conf.Configuration; 8 import org.apache.hadoop.fs.FileSystem; 9 import org.apache.hadoop.fs.Path; 10 import org.apache.hadoop.io.LongWritable; 11 import org.apache.hadoop.io.Text; 12 import org.apache.hadoop.mapreduce.Job; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 16 /** 17 * @author hadoop 18 * 19 */ 20 /** 21 * @author hadoop 22 * 23 */ 24 /** 25 * @author hadoop 26 * 27 */ 28 public class WordTimeDriver01 { 29 30 private static FileSystem fs; 31 private static Configuration conf; 32 static { 33 String uri = "hdfs://master01:9000/"; 34 conf = new Configuration(); 35 try { 36 fs = FileSystem.get(new URI(uri), conf, "hadoop"); 37 } catch (IOException e) { 38 e.printStackTrace(); 39 } catch (InterruptedException e) { 40 e.printStackTrace(); 41 } catch (URISyntaxException e) { 42 e.printStackTrace(); 43 } 44 } 45 46 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 47 48 Job wcJob = getJob(args); 49 if (null == wcJob) { 50 return; 51 } 52 //提交Job到集群并等待Job运行完成,参数true表示将Job运行时的状态信息返回到客户端 53 boolean flag = false; 54 flag = wcJob.waitForCompletion(true); 55 System.exit(flag?0:1); 56 } 57 58 /** 59 * 获取Job实例 60 * @param args 61 * @return 62 * @throws IOException 63 */ 64 public static Job getJob(String[] args) throws IOException { 65 if (null==args || args.length<2) return null; 66 //放置需要处理的数据所在的HDFS路径 67 Path inputPath = new Path(args[0]); 68 //放置Job作业执行完成之后其处理结果的输出路径 69 Path outputPath = new Path(args[1]); 70 71 //如果输入目录已经存在,则将其删除并重建 72 if (!fs.exists(inputPath)) { 73 return null; 74 } 75 if (fs.exists(outputPath)) { 76 fs.delete(outputPath, true);//true表示递归删除 77 } 78 //fs.mkdirs(outputPath); 79 80 //获取Job实例 81 Job wcJob = Job.getInstance(conf, "WordCountJob"); 82 //设置运行此jar包入口类 83 //wcJob的入口是WordCountDriver类 84 wcJob.setJarByClass(WordTimeDriver01.class); 85 //设置Job调用的Mapper类 86 wcJob.setMapperClass(WordTimeMapper01.class); 87 //设置Job调用的Reducer类(如果一个Job没有Reducer则可以不调用此条语句) 88 wcJob.setReducerClass(WordTimeReducer01.class); 89 90 //设置MapTask的输出键类型 91 wcJob.setMapOutputKeyClass(Text.class); 92 //设置MapTask的输出值类型 93 wcJob.setMapOutputValueClass(LongWritable.class); 94 95 //设置整个Job的输出键类型(如果一个Job没有Reducer则可以不调用此条语句) 96 wcJob.setOutputKeyClass(Text.class); 97 //设置整个Job的输出值类型(如果一个Job没有Reducer则可以不调用此条语句) 98 wcJob.setOutputValueClass(LongWritable.class); 99 100 //设置整个Job需要处理数据的输入路径 101 FileInputFormat.setInputPaths(wcJob, inputPath); 102 //设置整个Job计算结果的输出路径 103 FileOutputFormat.setOutputPath(wcJob, outputPath); 104 105 return wcJob; 106 } 107 108 }
1 package com.mmzs.bigdata.yarn.mapreduce; 2 3 import java.io.IOException; 4 5 import org.apache.hadoop.io.LongWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Mapper; 8 import org.apache.hadoop.mapreduce.lib.input.FileSplit; 9 10 public class WordTimeMapper02 extends Mapper<LongWritable, Text, Text, Text>{ 11 12 private Text outKey; 13 private Text outValue; 14 15 @Override 16 protected void setup(Mapper<LongWritable, Text, Text, Text>.Context context) 17 throws IOException, InterruptedException { 18 outKey = new Text(); 19 outValue = new Text(); 20 } 21 22 @Override 23 protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) 24 throws IOException, InterruptedException { 25 26 //将第一次的分组结果,用关键字再次切分:单词、文件名、出现次数 27 String line = value.toString(); 28 String[] filesAndTimes = line.split("\t"); 29 String word = filesAndTimes[0]; 30 String fileName = filesAndTimes[1]; 31 String times = filesAndTimes[2]; 32 33 outKey.set(word);//将单词设置为关键字分组 34 outValue.set(fileName+"-"+times);//将文件名和出现次数作为输出 35 context.write(outKey, outValue);//写一次 36 37 } 38 39 @Override 40 protected void cleanup(Mapper<LongWritable, Text, Text, Text>.Context context) 41 throws IOException, InterruptedException { 42 outKey = null; 43 outValue = null; 44 } 45 46 }
1 package com.mmzs.bigdata.yarn.mapreduce; 2 3 import java.io.IOException; 4 import java.util.Iterator; 5 6 import org.apache.hadoop.io.LongWritable; 7 import org.apache.hadoop.io.Text; 8 import org.apache.hadoop.mapreduce.Reducer; 9 import org.apache.hadoop.mapreduce.lib.input.FileSplit; 10 11 public class WordTimeReducer02 extends Reducer<Text, Text, Text, Text> { 12 13 private Text outValue; 14 @Override 15 protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { 16 outValue = new Text(); 17 } 18 19 @Override 20 protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) 21 throws IOException, InterruptedException { 22 StringBuilder builder = new StringBuilder(); 23 Iterator<Text> its = values.iterator(); 24 while (its.hasNext()) { 25 String fileNameAndTimes = its.next().toString(); 26 builder.append(fileNameAndTimes+"\t"); 27 } 28 29 if (builder.length()>0) { 30 builder.deleteCharAt(builder.length()-1); 31 } 32 33 outValue.set(builder.toString()); 34 context.write(key, outValue); 35 } 36 37 @Override 38 protected void cleanup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { 39 outValue = null; 40 } 41 42 }
1 package com.mmzs.bigdata.yarn.mapreduce; 2 3 import java.io.IOException; 4 import java.net.URI; 5 import java.net.URISyntaxException; 6 7 import org.apache.hadoop.conf.Configuration; 8 import org.apache.hadoop.fs.FileSystem; 9 import org.apache.hadoop.fs.Path; 10 import org.apache.hadoop.io.LongWritable; 11 import org.apache.hadoop.io.Text; 12 import org.apache.hadoop.mapreduce.Job; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 16 public class WordTimeDriver02 { 17 18 private static FileSystem fs; 19 private static Configuration conf; 20 static { 21 String uri = "hdfs://master01:9000/"; 22 conf = new Configuration(); 23 try { 24 fs = FileSystem.get(new URI(uri), conf, "hadoop"); 25 } catch (IOException e) { 26 e.printStackTrace(); 27 } catch (InterruptedException e) { 28 e.printStackTrace(); 29 } catch (URISyntaxException e) { 30 e.printStackTrace(); 31 } 32 } 33 34 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 35 36 Job wcJob = getJob(args); 37 if (null == wcJob) { 38 return; 39 } 40 //提交Job到集群并等待Job运行完成,参数true表示将Job运行时的状态信息返回到客户端 41 boolean flag = wcJob.waitForCompletion(true); 42 System.exit(flag?0:1); 43 } 44 45 /** 46 * 获取Job实例 47 * @param args 48 * @return 49 * @throws IOException 50 */ 51 public static Job getJob(String[] args) throws IOException { 52 if (null==args || args.length<2) return null; 53 //放置需要处理的数据所在的HDFS路径 54 Path inputPath = new Path(args[0]); 55 //放置Job作业执行完成之后其处理结果的输出路径 56 Path outputPath = new Path(args[1]); 57 58 //如果输入目录已经存在,则将其删除并重建 59 if (!fs.exists(inputPath)) { 60 return null; 61 } 62 if (fs.exists(outputPath)) { 63 fs.delete(outputPath, true);//true表示递归删除 64 } 65 //fs.mkdirs(outputPath); 66 67 //获取Job实例 68 Job wcJob = Job.getInstance(conf, "WordCountJob"); 69 //设置运行此jar包入口类 70 //wcJob的入口是WordCountDriver类 71 wcJob.setJarByClass(WordTimeDriver02.class); 72 //设置Job调用的Mapper类 73 wcJob.setMapperClass(WordTimeMapper02.class); 74 //设置Job调用的Reducer类(如果一个Job没有Reducer则可以不调用此条语句) 75 wcJob.setReducerClass(WordTimeReducer02.class); 76 77 //设置MapTask的输出键类型 78 wcJob.setMapOutputKeyClass(Text.class); 79 //设置MapTask的输出值类型 80 wcJob.setMapOutputValueClass(Text.class); 81 82 //设置整个Job的输出键类型(如果一个Job没有Reducer则可以不调用此条语句) 83 wcJob.setOutputKeyClass(Text.class); 84 //设置整个Job的输出值类型(如果一个Job没有Reducer则可以不调用此条语句) 85 wcJob.setOutputValueClass(Text.class); 86 87 //设置整个Job需要处理数据的输入路径 88 FileInputFormat.setInputPaths(wcJob, inputPath); 89 //设置整个Job计算结果的输出路径 90 FileOutputFormat.setOutputPath(wcJob, outputPath); 91 return wcJob; 92 } 93 }
1 package com.mmzs.bigdata.yarn.mapreduce; 2 3 import java.io.IOException; 4 import java.net.URI; 5 import java.net.URISyntaxException; 6 7 import org.apache.hadoop.conf.Configuration; 8 import org.apache.hadoop.fs.FileSystem; 9 import org.apache.hadoop.fs.Path; 10 import org.apache.hadoop.io.LongWritable; 11 import org.apache.hadoop.io.Text; 12 import org.apache.hadoop.mapreduce.Job; 13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 16 public class WordTimeDriver { 17 18 private static FileSystem fs; 19 private static Configuration conf; 20 private static final String TEMP= "hdfs://master01:9000/data/wordcount/tmp"; 21 static { 22 String uri = "hdfs://master01:9000/"; 23 conf = new Configuration(); 24 try { 25 fs = FileSystem.get(new URI(uri), conf, "hadoop"); 26 } catch (IOException e) { 27 e.printStackTrace(); 28 } catch (InterruptedException e) { 29 e.printStackTrace(); 30 } catch (URISyntaxException e) { 31 e.printStackTrace(); 32 } 33 } 34 35 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 36 37 String[] params01 = {args[0], TEMP}; 38 39 //运行第1个Job 40 Job wcJob01 = WordTimeDriver01.getJob(params01); 41 if (null == wcJob01) { 42 return; 43 } 44 //提交Job到集群并等待Job运行完成,参数true表示将Job运行时的状态信息返回到客户端 45 boolean flag01 = wcJob01.waitForCompletion(true); 46 if (!flag01) { 47 return; 48 } 49 50 //运行第2个Job 51 String[] params02 = {TEMP, args[1]}; 52 Job wcJob02 = WordTimeDriver02.getJob(params02); 53 if (null == wcJob02) { 54 return; 55 } 56 //提交Job到集群并等待Job运行完成,参数true表示将Job运行时的状态信息返回到客户端 57 boolean flag02 = wcJob02.waitForCompletion(true); 58 if (flag02) {//等待Job02完成后就删掉中间目录并退出; 59 fs.delete(new Path(TEMP), true); 60 System.exit(0); 61 } 62 System.out.println("job is failing......"); 63 System.exit(1); 64 } 65 66 }
运行时传入参数是:
如果在eclipse上运行:传参需要加上集群的master的uri即 hdfs://master01:9000
输入路径参数: /data/wordcount/src
输出路径参数: /data/wordcount/dst
运行结果:
测试完毕,先关闭yarn集群,再关闭hdfs集群。
运行时查看详情:
http://master的IP:50070 |
http://master的IP:8088 |