不分发数据,使用单个reducer
set mapred.reduce.tasks=1; select * from dw.dw_app where dt>='2016-09-01' and dt <='2016-09-18' order by stime limit 30000;
包多一层,是用order by
select t.* from ( select * from dw.dw_app where dt>='2016-09-01' and dt <='2016-09-18' and app_id='16099' and msgtype = 'role.recharge' ) t order by t.stime limit 5000;
把所有具有相同的行最终都在一个reducer分区中,在在一个reducer中排序。 cluster by column=distribute by column+sort by colum
select * from dw.dw_app where dt>='2016-09-01' and dt <='2016-09-18' and app_id='16099' and msgtype = 'role.recharge' cluster by dt limit 30000;
查询每天前十名充值用户和充值总额
select t3.* from (select t2.* from (select dt, account_id, sum(recharge_money) as total_money, row_number() over(partition by dt order by sum(recharge_money) desc) rank from (select dt, account_id, recharge_money from dw.dw_app where dt >= '2016-09-01' and dt <= '2016-09-18' and app_id = '16099' and msgtype = 'role.recharge'
cluster by dt, account_id) t group by dt, account_id) t2 where t2.rank <= 10) t3 order by t3.dt asc, rank asc limit 300;