统计单词出现次数的mapreduce

1、新建Java项目

2、导包
E:\工具\大数据\大数据提升资料\01-软件资料\06-Hadoop\安装包\Java1.8
环境下编译\hadoop-2.7.3\hadoop-2.7.3\share\hadoop\mapreduce
+hsfs的那些包+common

3、写项目

3.1 WCMapper

package com.zy.wc;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
//map
    /*
     * 输入<0,"tom lili tom"> 输出<"tom",1>
     * */

//public class WCMapper extends Mapper<KEYIN, VALUEIN, KEYOUT,VALUEOUT>
// 输入的key  long  value  String    输出的  key String  value long类型

@Override            //数字              //string
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, LongWritable>.Context context)
        throws IOException, InterruptedException {
    //输入的value是一行字符串"tom lili tom"
    //切分
    String[] split = value.toString().split("\t");//tab键隔开  制表符
    for (String name : split) {
        //mapper输出内容
        context.write(new Text(name), new LongWritable(1));
        
    }
}
}

3.2 WCReduce

package com.zy.wc;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.WordCount.Reduce;
import org.apache.hadoop.mapreduce.Reducer;

public class WCReduce extends  Reducer<Text,LongWritable,Text,LongWritable>{
    //输入<"tom",{1,1,1,1,1,1,1}>   输出<"tom",7>
           
@Override                //输入键        //输入值
protected void reduce(Text key, Iterable<LongWritable> value,
        Reducer<Text, LongWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
    //计算迭代其中1的累加值
    long sum=0;
    for (LongWritable longWritable : value) {
        sum+=1;
        
    }
    //输出的键值
    context.write(key, new LongWritable(sum));
}


}

3.3 WCApp

package com.zy.wc;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WCApp {

    public static void main(String[] args) throws Exception {
        //创建配置对象
        Configuration configuration = new Configuration();
        //得到job实例
        Job job = Job.getInstance(configuration);
        //指定job运行类
        job.setJarByClass(WCApp.class);
        
        //指定job中的mapper
        job.setMapperClass(WCMapper.class);
        //指定mapper中的输出键和值类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);
        
        //指定job中的reducer
        job.setReducerClass(WCReduce.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);
        
        //指定输入文件
        FileInputFormat.setInputPaths(job, new Path("/wc.txt"));
        //指定输出文件
        FileOutputFormat.setOutputPath(job, new Path("/myWCResult"));
        //提交作业
        job.waitForCompletion(true);
        
    }

}

4、打包上传

把项目打包  (java打成jar包,web项目打成war包),上传到linux,然后hadoop jar WCAPP.jar运行jar包

 

posted @ 2019-08-24 10:48  勤奋的园  阅读(907)  评论(0编辑  收藏  举报