统计单词出现次数的mapreduce
1、新建Java项目
2、导包
E:\工具\大数据\大数据提升资料\01-软件资料\06-Hadoop\安装包\Java1.8
环境下编译\hadoop-2.7.3\hadoop-2.7.3\share\hadoop\mapreduce
+hsfs的那些包+common
3、写项目
3.1 WCMapper
package com.zy.wc; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable>{ //map /* * 输入<0,"tom lili tom"> 输出<"tom",1> * */ //public class WCMapper extends Mapper<KEYIN, VALUEIN, KEYOUT,VALUEOUT> // 输入的key long value String 输出的 key String value long类型 @Override //数字 //string protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context) throws IOException, InterruptedException { //输入的value是一行字符串"tom lili tom" //切分 String[] split = value.toString().split("\t");//tab键隔开 制表符 for (String name : split) { //mapper输出内容 context.write(new Text(name), new LongWritable(1)); } } }
3.2 WCReduce
package com.zy.wc; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.WordCount.Reduce; import org.apache.hadoop.mapreduce.Reducer; public class WCReduce extends Reducer<Text,LongWritable,Text,LongWritable>{ //输入<"tom",{1,1,1,1,1,1,1}> 输出<"tom",7> @Override //输入键 //输入值 protected void reduce(Text key, Iterable<LongWritable> value, Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException { //计算迭代其中1的累加值 long sum=0; for (LongWritable longWritable : value) { sum+=1; } //输出的键值 context.write(key, new LongWritable(sum)); } }
3.3 WCApp
package com.zy.wc; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WCApp { public static void main(String[] args) throws Exception { //创建配置对象 Configuration configuration = new Configuration(); //得到job实例 Job job = Job.getInstance(configuration); //指定job运行类 job.setJarByClass(WCApp.class); //指定job中的mapper job.setMapperClass(WCMapper.class); //指定mapper中的输出键和值类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); //指定job中的reducer job.setReducerClass(WCReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); //指定输入文件 FileInputFormat.setInputPaths(job, new Path("/wc.txt")); //指定输出文件 FileOutputFormat.setOutputPath(job, new Path("/myWCResult")); //提交作业 job.waitForCompletion(true); } }
4、打包上传
把项目打包 (java打成jar包,web项目打成war包),上传到linux,然后hadoop jar WCAPP.jar运行jar包