hadoop的MultipleOutputs多目录输出

复制代码
public class Demo1 {
    public static class MultestMapper extends
            Mapper<Object, Text, Text, NullWritable> {
        private Text outkey = new Text("");
        private MultipleOutputs<Text, NullWritable> mos;
        protected void map(Object key, Text value, Context context)
                throws IOException, InterruptedException {
            String line = value.toString();
            if (line.contains("DOWNLOAD:")) {
                outkey.set(line.substring(line.indexOf("DOWNLOAD:") + 9));
                mos.write("download", outkey, NullWritable.get(), "download/");
            } else if (line.contains("LOGGING:")) {
                outkey.set(line.substring(line.indexOf("LOGGING:") + 8));
                mos.write("logging", outkey, NullWritable.get(), "logging/");
            } else if (line.contains("MONITOR:")) {
                outkey.set(line.substring(line.indexOf("MONITOR:") + 8));
                mos.write("monitor", outkey, NullWritable.get(), "monitor/");
            } else if (line.contains("ACTIVITIES:")) {
                outkey.set(line.substring(line.indexOf("ACTIVITIES:") + 11));
                mos.write("activities", outkey, NullWritable.get(),"activities/");
            }

        }

        @Override
        protected void setup(Context context) throws IOException,
                InterruptedException {
            mos = new MultipleOutputs<Text, NullWritable>(context);
            super.setup(context);
        }

        @Override
        protected void cleanup(Context context) throws IOException,
                InterruptedException {
            mos.close();
            super.cleanup(context);
        }
    }

    
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args)
                .getRemainingArgs();
        if (otherArgs.length != 2) {
            System.err.println("Usage: <in> <out>");
            System.exit(2);
        }
        Job job = new Job(conf, "multest");
        job.setJarByClass(Demo1.class);
        job.setMapperClass(MultestMapper.class);
        job.setNumReduceTasks(0);
        // job.setReducerClass(MultestReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
        MultipleOutputs.addNamedOutput(job, "download", TextOutputFormat.class,
                Text.class, NullWritable.class);
        MultipleOutputs.addNamedOutput(job, "logging", TextOutputFormat.class,
                Text.class, NullWritable.class);
        MultipleOutputs.addNamedOutput(job, "monitor", TextOutputFormat.class,
                Text.class, NullWritable.class);
        MultipleOutputs.addNamedOutput(job, "activities",TextOutputFormat.class, Text.class, NullWritable.class);
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}
复制代码

 

posted @   沙漠里的小鱼  阅读(2475)  评论(0编辑  收藏  举报
编辑推荐:
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
点击右上角即可分享
微信分享提示