利用Key的排序进行二次排序
业务描述:商品价格和订单号进行排序,获取最大的商品价格
第一步:job的描述
public class OrderDriver {
public static void main(String[] args) throws Exception {
// 1 获取配置信息
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
// 2 设置jar包加载路径
job.setJarByClass(OrderDriver.class);
// 3 加载map/reduce类
job.setMapperClass(OrderMapper.class);
job.setReducerClass(OrderReducer.class);
// 4 设置map输出数据key和value类型
job.setMapOutputKeyClass(OrderBean.class);
job.setMapOutputValueClass(NullWritable.class);
// 5 设置最终输出数据的key和value类型
job.setOutputKeyClass(OrderBean.class);
job.setOutputValueClass(NullWritable.class);
// 6 设置输入数据和输出数据路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// // 10 设置reduce端的分组
// job.setGroupingComparatorClass(OrderSortGroupingComparator.class);
//
// 10 关联groupingComparator
job.setGroupingComparatorClass(OrderGroupingCompartor.class);
// 7 设置分区
job.setPartitionerClass(OrderPatitioner.class);
// 8 设置reduce个数
job.setNumReduceTasks(3);
// 9 提交
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}
第二步:Comparable比较器的实现,这个Bean就是key
public class OrderBean implements WritableComparable<OrderBean> {
private String orderId; // 订单id
private Double price; // 商品价格
public OrderBean() {
super();
}
public OrderBean(String orderId, Double price) {
super();
this.orderId = orderId;
this.price = price;
}
public String getOrderId() {
return orderId;
}
public void setOrderId(String orderId) {
this.orderId = orderId;
}
public Double getPrice() {
return price;
}
public void setPrice(Double price) {
this.price = price;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(orderId);
out.writeDouble(price);
}
@Override
public void readFields(DataInput in) throws IOException {
this.orderId = in.readUTF();
this.price = in.readDouble();
}
@Override
public int compareTo(OrderBean o) {
// 两次排序
// 1 按照id号排序
int comResult = this.orderId.compareTo(o.getOrderId());
if (comResult == 0) {
// 2 按照价格倒序排序
comResult = this.price > o.getPrice()?-1:1;
}
return comResult;
}
@Override
public String toString() {
return orderId + "\t" + price;
}
}
第三步:分组对比器的实现
public class OrderGroupingCompartor extends WritableComparator {
// 写一个空参构造
public OrderGroupingCompartor(){
super(OrderBean.class, true);
}
// 重写比较的方法
@Override
public int compare(WritableComparable a, WritableComparable b) {
OrderBean aBean = (OrderBean) a;
OrderBean bBean = (OrderBean) b;
// 根据订单id号比较,判断是否是一组
return aBean.getOrderId().compareTo(bBean.getOrderId());
}
}
第四部:分区的实现,目的是含有相同客户ID的orderDean能排在一个区中,(自定义分区)
public class OrderPatitioner extends Partitioner<OrderBean, NullWritable>{
@Override
public int getPartition(OrderBean key, NullWritable value, int numPartitions) {
// 按照key的orderid的hashCode值分区
return (key.getOrderId().hashCode() & Integer.MAX_VALUE) % numPartitions;
}
}
第五步:mapper的实现
public class OrderMapper extends Mapper<LongWritable, Text, OrderBean, NullWritable>{
OrderBean bean = new OrderBean();
@Override
protected void map(LongWritable key, Text value,
Context context) throws IOException, InterruptedException {
// 1 读取数据
String line = value.toString();
// 2 切割数据
String[] fields = line.split("\t");
// Order_0000002 Pdt_03 522.8
// 3 封装bean对象
bean.setOrderId(fields[0]);
bean.setPrice(Double.parseDouble(fields[2]));
// 4 写出
context.write(bean, NullWritable.get());
}
}
第五部:对已排好序的数据取最大或者最小值
public class OrderReducer extends Reducer<OrderBean, NullWritable, OrderBean, NullWritable>{
@Override
protected void reduce(OrderBean bean, Iterable<NullWritable> values,
Context context) throws IOException, InterruptedException {
for(NullWritable v : values){
//System.out.println(key.id+ " : " + key);
}
// 写出
context.write(bean, NullWritable.get());
}
}