【Hadoop离线基础总结】Hive级联求和

Hive级联求和


  • 建表
    CREATE TABLE t_salary_detail( 
    username string,
    month string,
    salary INT ) 
    ROW format delimited FIELDS TERMINATED BY ',';
    
  • 导入数据
    LOAD DATA LOCAL inpath '/export/servers/hivedatas/click-part-r-00000' INTO TABLE t_salary_detail;
    
    +---------------------------+------------------------+-------------------------+--+
    | t_salary_detail.username  | t_salary_detail.month  | t_salary_detail.salary  |
    +---------------------------+------------------------+-------------------------+--+
    | A                         | 2015-01                | 5                       |
    | A                         | 2015-01                | 15                      |
    | B                         | 2015-01                | 5                       |
    | A                         | 2015-01                | 8                       |
    | B                         | 2015-01                | 25                      |
    | A                         | 2015-01                | 5                       |
    | A                         | 2015-02                | 4                       |
    | A                         | 2015-02                | 6                       |
    | B                         | 2015-02                | 10                      |
    | B                         | 2015-02                | 5                       |
    | A                         | 2015-03                | 7                       |
    | A                         | 2015-03                | 9                       |
    | B                         | 2015-03                | 11                      |
    | B                         | 2015-03                | 6                       |
    +---------------------------+------------------------+-------------------------+--+
    

  • 思路
    累积求和其实就是通过inner join表本身来实现。首先要先明白 统计每个用户每个月总共获得多少小费 的hql语句怎么写
    SELECT username,month,sum(salary)
    FROM t_salary_detail
    GROUP BY username,month;
    
    +-----------+----------+------+--+
    | username  |  month   | _c2  |
    +-----------+----------+------+--+
    | A         | 2015-01  | 33   |
    | A         | 2015-02  | 10   |
    | A         | 2015-03  | 16   |
    | B         | 2015-01  | 30   |
    | B         | 2015-02  | 15   |
    | B         | 2015-03  | 17   |
    +-----------+----------+------+--+
    
    现在通过inner join连接自己
    SELECT a.*,b.*
    FROM (
    SELECT username,month,sum(salary)
    FROM t_salary_detail
    GROUP BY username,month ) a
    INNER JOIN (
    SELECT username,month,sum(salary)
    FROM t_salary_detail
    GROUP BY username,month ) b
    ON a.username = b.username;
    
    +-------------+----------+--------+-------------+----------+--------+--+
    | a.username  | a.month  | a._c2  | b.username  | b.month  | b._c2  |
    +-------------+----------+--------+-------------+----------+--------+--+
    | A           | 2015-01  | 33     | A           | 2015-01  | 33     |
    | A           | 2015-01  | 33     | A           | 2015-02  | 10     |
    | A           | 2015-01  | 33     | A           | 2015-03  | 16     |
    | A           | 2015-02  | 10     | A           | 2015-01  | 33     |
    | A           | 2015-02  | 10     | A           | 2015-02  | 10     |
    | A           | 2015-02  | 10     | A           | 2015-03  | 16     |
    | A           | 2015-03  | 16     | A           | 2015-01  | 33     |
    | A           | 2015-03  | 16     | A           | 2015-02  | 10     |
    | A           | 2015-03  | 16     | A           | 2015-03  | 16     |
    | B           | 2015-01  | 30     | B           | 2015-01  | 30     |
    | B           | 2015-01  | 30     | B           | 2015-02  | 15     |
    | B           | 2015-01  | 30     | B           | 2015-03  | 17     |
    | B           | 2015-02  | 15     | B           | 2015-01  | 30     |
    | B           | 2015-02  | 15     | B           | 2015-02  | 15     |
    | B           | 2015-02  | 15     | B           | 2015-03  | 17     |
    | B           | 2015-03  | 17     | B           | 2015-01  | 30     |
    | B           | 2015-03  | 17     | B           | 2015-02  | 15     |
    | B           | 2015-03  | 17     | B           | 2015-03  | 17     |
    +-------------+----------+--------+-------------+----------+--------+--+
    
    得到以上结果可以看出,我们可以利用b表的salary列来进行累积求和,因为我们要求每个用户每个月总共获得小费,到2月份是要将2月和前一个月的小费累计,也就是表中33,33+10,33+10+16这样
    SELECT a.*,b.*
    FROM (
    SELECT username,month,sum(salary)
    FROM t_salary_detail
    GROUP BY username,month ) a
    INNER JOIN (
    SELECT username,month,sum(salary)
    FROM t_salary_detail
    GROUP BY username,month ) b
    ON a.username = b.username
    WHERE b.month <= a.month;
    
    +-------------+----------+--------+-------------+----------+--------+--+
    | a.username  | a.month  | a._c2  | b.username  | b.month  | b._c2  |
    +-------------+----------+--------+-------------+----------+--------+--+
    | A           | 2015-01  | 33     | A           | 2015-01  | 33     |
    | A           | 2015-02  | 10     | A           | 2015-01  | 33     |
    | A           | 2015-02  | 10     | A           | 2015-02  | 10     |
    | A           | 2015-03  | 16     | A           | 2015-01  | 33     |
    | A           | 2015-03  | 16     | A           | 2015-02  | 10     |
    | A           | 2015-03  | 16     | A           | 2015-03  | 16     |
    | B           | 2015-01  | 30     | B           | 2015-01  | 30     |
    | B           | 2015-02  | 15     | B           | 2015-01  | 30     |
    | B           | 2015-02  | 15     | B           | 2015-02  | 15     |
    | B           | 2015-03  | 17     | B           | 2015-01  | 30     |
    | B           | 2015-03  | 17     | B           | 2015-02  | 15     |
    | B           | 2015-03  | 17     | B           | 2015-03  | 17     |
    +-------------+----------+--------+-------------+----------+--------+--+
    
    得到以上结果后,只需要按照Month分组,对salary列用SUM函数即可
    SELECT SUM(bSalSum)
    FROM (
    SELECT a.month AS aMonth,a.username AS aUser,a.salSum AS aSalSum,
    b.month AS bMonth,b.username AS bUser,b.salSum AS bSalSum
    FROM (
    SELECT month,username,SUM(salary) AS salSum
    FROM t_salary_detail
    GROUP BY username,month ) a
    INNER JOIN (
    SELECT month,username,SUM(salary) AS salSum
    FROM t_salary_detail
    GROUP BY username,month ) b
    ON a.username = b.username
    WHERE b.month <= a.month ) t
    GROUP BY aUser,aMonth;
    
    +------+--+
    | _c0  |
    +------+--+
    | 33   |
    | 43   |
    | 59   |
    | 30   |
    | 45   |
    | 62   |
    +------+--+
    
posted @ 2020-03-12 23:06  _codeRookie  阅读(382)  评论(0编辑  收藏  举报