分区数与reduce任务数必须一致
MyPartitioner类
| package com.sxuek.partitiontest; |
| |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapreduce.Partitioner; |
| |
| |
| |
| |
| |
| |
| |
| public class MyPartitioner extends Partitioner<Text, FlowBean> { |
| public int getPartition(Text text, FlowBean flowBean, int i) { |
| String head = text.toString().substring(0, 3); |
| if ("134".equals(head)) { |
| return 0; |
| } else if ("135".equals(head)) { |
| return 1; |
| } else if ("136".equals(head)) { |
| return 2; |
| } else if ("137".equals(head)) { |
| return 3; |
| } |
| return 4; |
| } |
| } |
Driver类
| 添加代码: |
| |
| job.setNumReduceTasks(5); |
| job.setPartitionerClass(MyPartitioner.class); |
| |
| package com.sxuek.partitiontest; |
| |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.io.NullWritable; |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapreduce.Job; |
| import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; |
| import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; |
| |
| import java.io.IOException; |
| import java.net.URI; |
| import java.net.URISyntaxException; |
| |
| public class FlowDriver { |
| public static void main(String[] args) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { |
| Configuration conf = new Configuration(); |
| conf.set("fs.defaultFS", "hdfs://node1:9000"); |
| |
| FileSystem fs = FileSystem.get(new URI("hdfs://node1:9000"), conf, "root"); |
| |
| Job job = Job.getInstance(conf); |
| job.setJarByClass(FlowDriver.class); |
| |
| job.setMapperClass(FlowMapper.class); |
| job.setMapOutputKeyClass(Text.class); |
| job.setMapOutputValueClass(FlowBean.class); |
| |
| job.setReducerClass(FlowReducer.class); |
| job.setOutputKeyClass(NullWritable.class); |
| job.setOutputValueClass(FlowBean.class); |
| |
| |
| job.setNumReduceTasks(5); |
| job.setPartitionerClass(MyPartitioner.class); |
| |
| FileInputFormat.setInputPaths(job, new Path("/phone_data.txt")); |
| |
| Path path = new Path("/output"); |
| if (fs.exists(path)) { |
| fs.delete(path, true); |
| } |
| FileOutputFormat.setOutputPath(job, path); |
| boolean flag = job.waitForCompletion(true); |
| |
| System.out.println(flag); |
| } |
| } |
FlowMapper
| package com.sxuek.partitiontest; |
| |
| import org.apache.hadoop.io.LongWritable; |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapreduce.Mapper; |
| |
| import java.io.IOException; |
| |
| public class FlowMapper extends Mapper<LongWritable, Text, Text, FlowBean> { |
| @Override |
| protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { |
| String line = value.toString(); |
| String[] words = line.split(" "); |
| |
| String phoneNumber = words[1]; |
| long upFlow = Long.parseLong(words[words.length-2]); |
| long downFlow = Long.parseLong(words[words.length-3]); |
| |
| FlowBean flowBean = new FlowBean(); |
| flowBean.setPhoneNumber(phoneNumber); |
| flowBean.setUpFlow(upFlow); |
| flowBean.setDownFlow(downFlow); |
| flowBean.setSumFlow(upFlow+downFlow); |
| |
| context.write(new Text(phoneNumber), flowBean); |
| } |
| } |
FlowReducer
| package com.sxuek.partitiontest; |
| |
| import org.apache.hadoop.io.NullWritable; |
| import org.apache.hadoop.io.Text; |
| import org.apache.hadoop.mapreduce.Reducer; |
| |
| import java.io.IOException; |
| |
| public class FlowReducer extends Reducer<Text, FlowBean, NullWritable, FlowBean> { |
| @Override |
| protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException { |
| FlowBean flowBean = new FlowBean(); |
| flowBean.setPhoneNumber(key.toString()); |
| for (FlowBean fb : values) { |
| flowBean.setUpFlow(flowBean.getUpFlow()+fb.getUpFlow()); |
| flowBean.setDownFlow(flowBean.getDownFlow()+fb.getDownFlow()); |
| flowBean.setSumFlow(flowBean.getSumFlow()+fb.getSumFlow()); |
| } |
| context.write(NullWritable.get(), flowBean); |
| } |
| } |
FlowBean
| package com.sxuek.partitiontest; |
| |
| import org.apache.hadoop.io.WritableComparable; |
| |
| import java.io.DataInput; |
| import java.io.DataOutput; |
| import java.io.IOException; |
| |
| |
| |
| |
| |
| |
| |
| |
| public class FlowBean implements WritableComparable<FlowBean> { |
| public int compareTo(FlowBean o) { |
| return 0; |
| } |
| |
| private Long upFlow = 0L; |
| private Long downFlow = 0L; |
| private Long sumFlow = 0L; |
| private String phoneNumber; |
| |
| |
| |
| |
| |
| |
| @Override |
| public String toString() { |
| return phoneNumber + "\t" + upFlow + "\t" + downFlow + "\t" + sumFlow; |
| } |
| |
| |
| |
| |
| public FlowBean() { |
| |
| } |
| |
| |
| |
| |
| |
| |
| public void write(DataOutput dataOutput) throws IOException { |
| dataOutput.writeUTF(phoneNumber); |
| dataOutput.writeLong(upFlow); |
| dataOutput.writeLong(downFlow); |
| dataOutput.writeLong(sumFlow); |
| } |
| |
| |
| |
| |
| |
| |
| public void readFields(DataInput dataInput) throws IOException { |
| phoneNumber = dataInput.readUTF(); |
| upFlow = dataInput.readLong(); |
| downFlow = dataInput.readLong(); |
| sumFlow = dataInput.readLong(); |
| } |
| public String getPhoneNumber() { |
| return phoneNumber; |
| } |
| |
| public void setPhoneNumber(String phoneNumber) { |
| this.phoneNumber = phoneNumber; |
| } |
| |
| public Long getUpFlow() { |
| return upFlow; |
| } |
| |
| public void setUpFlow(Long upFlow) { |
| this.upFlow = upFlow; |
| } |
| |
| public Long getDownFlow() { |
| return downFlow; |
| } |
| |
| public void setDownFlow(Long downFlow) { |
| this.downFlow = downFlow; |
| } |
| |
| public Long getSumFlow() { |
| return sumFlow; |
| } |
| |
| public void setSumFlow(Long sumFlow) { |
| this.sumFlow = sumFlow; |
| } |
| |
| } |
| |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?