每日总结

Mapreduce实例——Map端join

依赖：

<groupId>org.apache.hadoop</groupId>

<artifactId>hadoop-common</artifactId>

</dependency>

<groupId>org.apache.hadoop</groupId>

<artifactId>hadoop-mapreduce-client-app</artifactId>

</dependency>

<groupId>org.apache.hadoop</groupId>

<artifactId>hadoop-hdfs</artifactId>

</dependency>

</dependency>

<groupId>org.apache.hadoop</groupId>

<artifactId>hadoop-client</artifactId>

</dependency>

实验代码：

package mapreduce;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class MapJoin {

    public static class MyMapper extends Mapper<Object, Text, Text, Text> {
        private Map<String, String> dict = new HashMap<>();

        @Override
        protected void setup(Context context) throws IOException,
                InterruptedException {
            String fileName = context.getLocalCacheFiles()[0].getName();
            System.out.println(fileName);
            BufferedReader reader = new BufferedReader(new FileReader(fileName));
            String codeandname = null;
            while (null != (codeandname = reader.readLine())) {
                String str[] = codeandname.split("\t");
                dict.put(str[0], str[2] + "\t" + str[3]);
            }
            reader.close();
        }

        @Override
        protected void map(Object key, Text value, Context context)
                throws IOException, InterruptedException {
            String[] kv = value.toString().split("\t");
            if (dict.containsKey(kv[1])) {
                context.write(new Text(kv[1]), new Text(dict.get(kv[1]) + "\t" + kv[2]));
            }
        }
    }

    public static class MyReducer extends Reducer<Text, Text, Text, Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            for (Text text : values) {
                context.write(key, text);
            }
        }
    }

    public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
        Job job = Job.getInstance();
        job.setJobName("mapjoin");
        job.setJarByClass(MapJoin.class);

        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        Path in = new Path("hdfs://hadoop102:8020/mymapreduce2/in/order_items1");
        Path out = new Path("hdfs://hadoop102:8020/mymapreduce2/out4");
        FileInputFormat.addInputPath(job, in);
        FileOutputFormat.setOutputPath(job, out);

        URI uri = new URI("hdfs://hadoop102:8020/mymapreduce2/in/orders1");
        job.addCacheFile(uri);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

posted @ 2021-12-03 18:48 chenghaixinag 阅读(29) 评论(0) 编辑收藏举报

刷新页面返回顶部

chenghaixinag

每日总结

公告