创建数据库

CREATE DATABASE [IF NOT EXISTS] userdb;

建表

普通建表：

create table user_summary_with_tel (
bill_time string comment '订单时间', 
tel string  comment '订购号码',
bill_addr string comment '配送地址',
county string comment '区县',
street string  comment '街道', 
community string  comment '社区', 
campus string  comment '校园', 
company string  comment '公司', 
building string  comment '大厦',
residential_district string  comment '住宅小区', 
industrial_district string  comment '工业区', 
city_village string  comment '城中村', 
address string  comment '地址名称', 
attribute string  comment '属性', 
return_address string  comment '返回地址',
ordermonth string)
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY ',' 
STORED AS TEXTFILE;

分区表

create table tb_class_info
(id int,
createtime timestamp ,
modifytime timestamp)
PARTITIONED BY (day string, class_name string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
STORED AS TEXTFILE;

临时表

CREATE TABLE use_resident(
tel string, 
bill_addr string, 
address string)
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY ',' 
STORED AS TEXTFILE;

本地数据写入hive表

load data local inpath '/data/use_resident.csv' overwrite into table use_resident;

load data local inpath '/home/hadoop/tbclass.txt' overwrite into table tb_class_info partition(day='2017-06-01', class_name=val2);

查询结果存储

查询结果写入普通表

insert overwrite table tel_imsi_9551 select distinct(tel, imsi) from sgs_data_partition where tac='9551';

查询结果写入分区表

INSERT INTO TABLE sgs_data_part
PARTITION (day, tac)
SELECT tel, imsi, time, tac_cid day, tac
FROM sgs_data_src;

查询结果写入本地文件

hive -e "user sgs;select a.imsi, b.tel_num, a.summary from user_summary a left outer join imsi_tel_9551 b on a.imsi=b.imsi;" >> user_summary.txt

hive调用python的udf

udf文件

# encoding: utf-8
'''
@author: zcc
@license: (C) Copyright 2013-2017, Node Supply Chain Manager Corporation Limited.
@contact: ********@163.com
@software: pycharm
@file: hive_udf.py
@time: 8/25/17 9:00 AM
@desc:自定义hive函数处理合并后的用户信息
'''

import sys
from collections import Counter
for line in sys.stdin:
    imsi, tac_cids = line.strip().split('\t')
    tac_cids = tac_cids.split('|')
    #   单个用户16天内累计SGSIMSIAatach和SGSIMSIDetach次数小于10则剔除该用户
    if len(tac_cids) < 8: continue

    tac_cid_count = Counter(tac_cids)

    tac_cid_elements = list()

    for key, value in tac_cid_count.iteritems():
        if value <= 1: continue
        tac_cid_elements.append("{0}:{1}".format(key, value))

    print '%s\t%s' % (imsi, ','.join(tac_cid_elements))

add file /home/hadoop/hive_udf.py;
insert overwrite table user_summary select transform(*) using 'python hive_udf.py' as (imsi, tac_cids) from user_info_merge_9551;

清空表

truncate table tel_imsi_9551;

删除表

DROP TABLE [IF EXISTS] table_name;

删除数据库

 DROP DATABASE IF EXISTS userdb;

更改表的字段属性

ALTER TABLE table_one CHANGE old_name new_name net_type_name;

删除表的一个指定分区

alter table table_name drop partition (day=partition_name);

查询结果写入一张新表

CREATE TABLE table_name STORED AS SEQUENCEFILE SELECT * FROM src_table_name;

posted on 2017-09-03 22:47 暴走小飞鼠阅读(333) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

相关显示参数设置

创建数据库

建表

本地数据写入hive表

查询结果存储

hive调用python的udf

清空表

删除表

删除数据库

更改表的字段属性

删除表的一个指定分区

查询结果写入一张新表

公告