瞌睡中的葡萄虎

博客园 首页 新随笔 联系 订阅 管理

ARRAY

一组有序字段,字段的类型必须相同。Array(1,2)

create table hive_array(ip string, uid array<string>)
row format delimited 
fields terminated by ','
collection items terminated by '|'
stored as textfile;

加载数据

load data local inpath "/home/spark/software/data/hive_array.txt" overwrite into table hive_array;

 

hive_array.txt

192.168.1.1,www.baidu.com|www.google.com|www.qq.com
192.168.1.2,www.baidu.com|www.sina.com|www.sohu.com
192.168.1.3,www.qq.com|www.163.com|www.youku.com
select * from hive_array;                                                                         
192.168.1.1     ["www.baidu.com","www.google.com","www.qq.com"]
192.168.1.2     ["www.baidu.com","www.sina.com","www.sohu.com"]
192.168.1.3     ["www.qq.com","www.163.com","www.youku.com"]

使用下标访问,下标从0开始:

select ip, uid[0] as id from hive_array;
192.168.1.1     www.baidu.com
192.168.1.2     www.baidu.com
192.168.1.3     www.qq.com

查看数据长度:

select size(uid) from hive_array;
3
3
3

数组查找:

select * from hive_array where array_contains(uid, "www.baidu.com");
192.168.1.1     ["www.baidu.com","www.google.com","www.qq.com"]
192.168.1.2     ["www.baidu.com","www.sina.com","www.sohu.com"]

 

MAP

一组无序的键值对,键的类型必须是原子的,值可以是任何类型,同一个映射的键的类型必须相同,值的类型也必须相同。Map('a',1,'b',2)

create table hive_map(ts string, ip string, type string, logtype string, request Map<string,string>, response Map<string, string>)
row format delimited fields terminated by '#'
collection items terminated by '&'
map keys terminated by '='
stored as textfile;

 hive_map.txt

2014-03-03 12:22:34#127.0.0.1#get#amap#src=123&code=456&cookie=789#status=success&time=2s
2014-03-03 11:22:34#127.0.0.1#get#autonavi#src=123&code=456#status=success&time=2s&cookie=789

加载数据:

load data local inpath "/home/spark/software/data/hive_map.txt" overwrite into table hive_map;

查看表结构: desc hive_map

ts                      string                  None                
ip                      string                  None                
type                    string                  None                
logtype                 string                  None                
request                 map<string,string>      None                
response                map<string,string>      None 

查看所有字段:

select * from hive_map;

2014-03-03 12:22:34     127.0.0.1       get     amap    {"src":"123","code":"456","cookie":"789"}       {"status":"success","time":"2s"}
2014-03-03 11:22:34     127.0.0.1       get     autonavi        {"src":"123","code":"456"}      {"status":"success","time":"2s","cookie":"789"}

查看map中指定的字段:

select request['src'], request['code'], request['cookie'] from hive_map;

123     456     789
123     456     NULL

 

STRUCT

一组命名的字段,字段类型可以不同。 Struct('a',1,2,0)

create table hive_struct(ip string, user struct<name:string, age:int>)
row format delimited fields terminated by '#'
collection  items terminated by ':'
stored as textfile;

hive_struct.txt

192.168.1.1#zhangsan:40
192.168.1.2#lisi:50
192.168.1.3#wangwu:60
192.168.1.4#zhaoliu:70

加载数据:

load data local inpath "/home/spark/software/data/hive_struct.txt" overwrite into table hive_struct;

查询所有字段:

select * from hive_struct;

192.168.1.1     {"name":"zhangsan","age":40}
192.168.1.2     {"name":"lisi","age":50}
192.168.1.3     {"name":"wangwu","age":60}
192.168.1.4     {"name":"zhaoliu","age":70}

查询指定字段:

select user.name, user.age from hive_struct;

zhangsan        40
lisi    50
wangwu  60
zhaoliu 70

 

posted on 2014-08-14 13:55  瞌睡中的葡萄虎  阅读(541)  评论(0编辑  收藏  举报