MapReduce计算每年最大值

 

1. 测试文件生成程序,参考

https://www.cnblogs.com/jonban/p/10555364.html

 

MapReduce程序示例如下:

2. 新建Maven项目  hadoop

 

3. pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
        http://maven.apache.org/xsd/maven-4.0.0.xsd">


    <modelVersion>4.0.0</modelVersion>
    <groupId>com.java</groupId>
    <artifactId>hadoop</artifactId>
    <version>1.0.0</version>


    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>3.2.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>3.2.0</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.2.0</version>
        </dependency>

    </dependencies>

    <build>
        <finalName>${project.artifactId}</finalName>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.8.0</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

 

4.   MaxMapper.java

package com.java.mapreduce;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/**
 * 按年份映射分组
 * 
 * @author Logan
 * @createDate 2019-03-18
 * @version 1.0.0
 *
 */
public class MaxMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String line = value.toString();
        String year = line.substring(0, 4);
        int num = Integer.parseInt(line.substring(8, 12));

        context.write(new Text(year), new IntWritable(num));
    }

}

 

5.   MaxReducer.java

package com.java.mapreduce;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

/**
 * 计算每年数据中的最大值
 * 
 * @author Logan
 * @createDate 2019-03-18
 * @version 1.0.0
 *
 */
public class MaxReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

        int max = Integer.MIN_VALUE;
        for (IntWritable value : values) {
            max = Math.max(max, value.get());
        }

        context.write(key, new IntWritable(max));
    }

}

 

 

6.   MaxJob.java

package com.java.mapreduce;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * 主程序入口类
 * 
 * @author Logan
 * @createDate 2019-03-18
 * @version 1.0.0
 *
 */
public class MaxJob {
    public static void main(String[] args) {
        try {
            Job job = Job.getInstance();
            job.setJarByClass(MaxJob.class);
            job.setJobName("Get Max");

            // 输入第一个参数为文件输入路径
            FileInputFormat.addInputPath(job, new Path(args[0]));

            // 输入第二个参数为输出结果文件路径
            FileOutputFormat.setOutputPath(job, new Path(args[1]));

            job.setMapperClass(MaxMapper.class);
            job.setReducerClass(MaxReducer.class);

            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);

            job.waitForCompletion(true);

        } catch (Exception e) {
            e.printStackTrace();
        }

    }

}

 

 

 

 

 

 

.

posted @ 2019-03-18 22:48  诚信天下  阅读(662)  评论(0编辑  收藏  举报