相关显示参数设置

  • 显示参数设置
set hive.cli.print.header=true;  // 打印列名 
set hive.cli.print.row.to.vertical=true;   // 开启行转列功能, 前提必须开启打印列名功能 
set hive.cli.print.row.to.vertical.num=1; // 设置每行显示的列数
  • 动态分区参数设置
set hive.exec.dynamic.partition=true;使用动态分区
set hive.exec.dynamic.partition.mode=nonstrict; 
SET hive.exec.max.dynamic.partitions=100000;设置允许最大动态分区的数量
SET hive.exec.max.dynamic.partitions.pernode=100000;

创建数据库

CREATE DATABASE [IF NOT EXISTS] userdb;

建表

  • 普通建表:
create table user_summary_with_tel (
bill_time string comment '订单时间', 
tel string  comment '订购号码',
bill_addr string comment '配送地址',
county string comment '区县',
street string  comment '街道', 
community string  comment '社区', 
campus string  comment '校园', 
company string  comment '公司', 
building string  comment '大厦',
residential_district string  comment '住宅小区', 
industrial_district string  comment '工业区', 
city_village string  comment '城中村', 
address string  comment '地址名称', 
attribute string  comment '属性', 
return_address string  comment '返回地址',
ordermonth string)
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY ',' 
STORED AS TEXTFILE;
  • 分区表
create table tb_class_info
(id int,
createtime timestamp ,
modifytime timestamp)
PARTITIONED BY (day string, class_name string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS TEXTFILE;
  • 临时表
CREATE TABLE use_resident(
tel string, 
bill_addr string, 
address string)
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY ',' 
STORED AS TEXTFILE;

本地数据写入hive表

load data local inpath '/data/use_resident.csv' overwrite into table use_resident;

load data local inpath '/home/hadoop/tbclass.txt' overwrite into table tb_class_info partition(day='2017-06-01', class_name=val2);

查询结果存储

  • 查询结果写入普通表
insert overwrite table tel_imsi_9551 select distinct(tel, imsi) from sgs_data_partition where tac='9551'; 
  • 查询结果写入分区表
INSERT INTO TABLE sgs_data_part
PARTITION (day, tac)
SELECT tel, imsi, time, tac_cid day, tac
FROM sgs_data_src;
  • 查询结果写入本地文件
hive -e "user sgs;select a.imsi, b.tel_num, a.summary from user_summary a left outer join imsi_tel_9551 b on a.imsi=b.imsi;" >> user_summary.txt  

hive调用python的udf

udf文件

# encoding: utf-8
'''
@author: zcc
@license: (C) Copyright 2013-2017, Node Supply Chain Manager Corporation Limited.
@contact: ********@163.com
@software: pycharm
@file: hive_udf.py
@time: 8/25/17 9:00 AM
@desc:自定义hive函数处理合并后的用户信息
'''

import sys
from collections import Counter
for line in sys.stdin:
    imsi, tac_cids = line.strip().split('\t')
    tac_cids = tac_cids.split('|')
    #   单个用户16天内累计SGSIMSIAatach和SGSIMSIDetach次数小于10则剔除该用户
    if len(tac_cids) < 8: continue

    tac_cid_count = Counter(tac_cids)

    tac_cid_elements = list()

    for key, value in tac_cid_count.iteritems():
        if value <= 1: continue
        tac_cid_elements.append("{0}:{1}".format(key, value))

    print '%s\t%s' % (imsi, ','.join(tac_cid_elements))
add file /home/hadoop/hive_udf.py;
insert overwrite table user_summary select transform(*) using 'python hive_udf.py' as (imsi, tac_cids) from user_info_merge_9551;

清空表

truncate table tel_imsi_9551;

删除表

DROP TABLE [IF EXISTS] table_name;

删除数据库

 DROP DATABASE IF EXISTS userdb;

更改表的字段属性

ALTER TABLE table_one CHANGE old_name new_name net_type_name;

删除表的一个指定分区

alter table table_name drop partition (day=partition_name);

查询结果写入一张新表

CREATE TABLE table_name STORED AS SEQUENCEFILE SELECT * FROM src_table_name;
 posted on 2017-09-03 22:47  暴走小飞鼠  阅读(333)  评论(0编辑  收藏  举报