hive第一天

复制代码
1、模仿建表语句,创建subject表,并使用hdfs dfs -put 命令加载数据
CREATE TABLE `subject`(
  `subject_id` bigint COMMENT '科目id', 
  `subject_name` string COMMENT '科目名称')
COMMENT '科目表'
ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY ',';
2、查询学生分数(输出:学号,姓名,班级,科目id,科目名称,成绩)
input tables: 3张表
不需要分组、聚合、排序,直接三张表关联即可

select  t1.id
        ,t1.name
        ,t1.clazz
        ,t2.score_id
        ,t2.score
        ,t3.subject_name
from students t1
left join score t2
on t1.id = t2.id
left join subject t3
on t2.score_id = t3.subject_id;

3、查询学生总分(输出:学号,姓名,班级,总分)
input tables: students score
要求总分,得按照学生id分组求sum
不需要排序,需要关联一次

关联次数:6000次
先关联再分组求和
select  t1.id
        ,t1.name
        ,t1.clazz
        ,sum(t2.score) as sum_score
from students t1
left join score t2
on t1.id = t2.id
group by t1.id,t1.name,t1.clazz
limit 10;

关联1000次
先分组求和,再关联
select  t1.id
        ,t1.name
        ,t1.clazz
        ,t2.sum_score
from students t1
left join(
    select  id
            ,sum(score) sum_score
    from score
    group by id
) t2 on t1.id = t2.id
limit 10;


4、查询全年级总分排名前三(不分文理科)的学生(输出:学号,姓名,班级,总分)
select  t1.id
        ,t1.name
        ,t1.clazz
        ,t2.sum_score
from students t1
left join(
    select  id
            ,sum(score) sum_score
    from score
    group by id
) t2 on t1.id = t2.id
order by t2.sum_score desc
limit 3;


5、查询文科一班学生总分排名前10的学生(输出:学号,姓名,班级,总分)
select  t1.id
        ,t1.name
        ,t1.clazz
        ,t2.sum_score
from students t1
left join(
    select  id
            ,sum(score) sum_score
    from score
    group by id
) t2 on t1.id = t2.id
where t1.clazz = "文科一班"
order by t2.sum_score desc
limit 10;


6、查询每个班级学生总分的平均成绩(输出:班级,平均分)
select  t1.clazz
        ,avg(t2.sum_score) avg_sum_score
from students t1
left join(
    select  id
            ,sum(score) sum_score
    from score
    group by id
) t2 on t1.id = t2.id
group by t1.clazz;

7、查询每个班级的最高总分(输出:班级,总分)
select  t1.clazz
        ,max(t2.sum_score) max_sum_score
from students t1
left join(
    select  id
            ,sum(score) sum_score
    from score
    group by id
) t2 on t1.id = t2.id
group by t1.clazz;

8、(思考)查询每个班级总分排名前三的学生(输出:学号,姓名,班级,总分)
窗口函数:row_number()
select  ttt1.name
        ,ttt1.score_sum
        ,ttt1.clazz
        ,ttt1.rn
from(
    select  name
            ,score_sum
            ,clazz
            ,row_number() over(partition by clazz order by score_sum desc) as rn
    from (
        select  t1.id
                ,t1.name
                ,t1.clazz
                ,t2.score_sum
        from students t1
        left join (
            select  id
                    ,sum(score) as score_sum
            from score
            group by id
        ) t2 on t1.id = t2.id
    ) tt1
) ttt1 where ttt1.rn <=3;
复制代码
复制代码
create table students(
    >  id bigint comment '学生 id',
    >  name string comment '学生姓名',
    >  age int comment '学生年龄',
    >  gender string comment '学生性别',
    >  clazz string comment '学生班级'
    > ) comment '学生信息表' 
    > ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
 create table score(
    >  id bigint comment '学生 id',
    >  score_id bigint comment '科目 id',
    >  score int comment '学生成绩'
    > ) comment '学生成绩表' 
    > ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
 create table subject(
    > subject_id bigint comment '科目id',
    > subject_name string comment '科目名称'
    > )comment '科目目表'
    > ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
复制代码

 

posted @   坤坤无敌  阅读(86)  评论(0编辑  收藏  举报
编辑推荐:
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
阅读排行:
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· DeepSeek在M芯片Mac上本地化部署
点击右上角即可分享
微信分享提示