MR框架-->Word2
省份浏览量统计功能实现:
1.统计各个省份的浏览量
- 省份浏览器统计值ip库解析
- 省份浏览器统计之日志解析
- 省份浏览器统计之功能实现
前期准备:
- 4个工具类:GetPageId IPParser IPSeeker LogParser qqwry.dat
Mapper类
//Mapper类 static class LogMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ //定义ONE为全局变量 private IntWritable ONE = new IntWritable(1); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { //获取一行数据 String log = value.toString(); //创建LogParser对象 LogParser logParser = new LogParser(); //从LogParser对象中获取ip Map<String, String> map = logParser.parse(log); String ip = map.get("ip"); //创建IPParser对象 RegionInfo analyseIp = IPParser.getInstance().analyseIp(ip); //通过get方法获取省份 String province = analyseIp.getProvince(); //对省份进行判断 if(StringUtils.isNotBlank(province)) { context.write(new Text(province), ONE); }else { context.write(new Text("-"), ONE); } } }
Reducer类
static class LogReducer extends Reducer<Text, IntWritable, Text, IntWritable>{ protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException { //定义一个空变量 int i = 0 ; //遍历 for (IntWritable value : values) { //结果累加 i += value.get(); } //写入到上下文中 context.write(key, new IntWritable(i)); } }
Submit类
public static void main(String[] args) throws Exception { //加载配置文件 Configuration conf = new Configuration(); //创建Job对象 Job job = Job.getInstance(conf); //设置提交主类 job.setJarByClass(LogApp.class); //设置Mapper类相关的参数 job.setMapperClass(LogMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //设置Reducer类相关的参数 job.setReducerClass(LogReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //设置输入路径 FileInputFormat.setInputPaths(job, new Path(args[0])); //设置输出路径 FileOutputFormat.setOutputPath(job, new Path(args[1])); //提交任务 job.waitForCompletion(true); }
如果hdfs操作不太熟悉请参考前面的Hadoop