ARRAY
一组有序字段,字段的类型必须相同。Array(1,2)
create table hive_array(ip string, uid array<string>) row format delimited fields terminated by ',' collection items terminated by '|' stored as textfile;
加载数据
load data local inpath "/home/spark/software/data/hive_array.txt" overwrite into table hive_array;
hive_array.txt
192.168.1.1,www.baidu.com|www.google.com|www.qq.com 192.168.1.2,www.baidu.com|www.sina.com|www.sohu.com 192.168.1.3,www.qq.com|www.163.com|www.youku.com
select * from hive_array; 192.168.1.1 ["www.baidu.com","www.google.com","www.qq.com"] 192.168.1.2 ["www.baidu.com","www.sina.com","www.sohu.com"] 192.168.1.3 ["www.qq.com","www.163.com","www.youku.com"]
使用下标访问,下标从0开始:
select ip, uid[0] as id from hive_array; 192.168.1.1 www.baidu.com 192.168.1.2 www.baidu.com 192.168.1.3 www.qq.com
查看数据长度:
select size(uid) from hive_array; 3 3 3
数组查找:
select * from hive_array where array_contains(uid, "www.baidu.com"); 192.168.1.1 ["www.baidu.com","www.google.com","www.qq.com"] 192.168.1.2 ["www.baidu.com","www.sina.com","www.sohu.com"]
MAP
一组无序的键值对,键的类型必须是原子的,值可以是任何类型,同一个映射的键的类型必须相同,值的类型也必须相同。Map('a',1,'b',2)
create table hive_map(ts string, ip string, type string, logtype string, request Map<string,string>, response Map<string, string>) row format delimited fields terminated by '#' collection items terminated by '&' map keys terminated by '=' stored as textfile;
hive_map.txt
2014-03-03 12:22:34#127.0.0.1#get#amap#src=123&code=456&cookie=789#status=success&time=2s 2014-03-03 11:22:34#127.0.0.1#get#autonavi#src=123&code=456#status=success&time=2s&cookie=789
加载数据:
load data local inpath "/home/spark/software/data/hive_map.txt" overwrite into table hive_map;
查看表结构: desc hive_map
ts string None ip string None type string None logtype string None request map<string,string> None response map<string,string> None
查看所有字段:
select * from hive_map; 2014-03-03 12:22:34 127.0.0.1 get amap {"src":"123","code":"456","cookie":"789"} {"status":"success","time":"2s"} 2014-03-03 11:22:34 127.0.0.1 get autonavi {"src":"123","code":"456"} {"status":"success","time":"2s","cookie":"789"}
查看map中指定的字段:
select request['src'], request['code'], request['cookie'] from hive_map; 123 456 789 123 456 NULL
STRUCT
一组命名的字段,字段类型可以不同。 Struct('a',1,2,0)
create table hive_struct(ip string, user struct<name:string, age:int>) row format delimited fields terminated by '#' collection items terminated by ':' stored as textfile;
hive_struct.txt
192.168.1.1#zhangsan:40 192.168.1.2#lisi:50 192.168.1.3#wangwu:60 192.168.1.4#zhaoliu:70
加载数据:
load data local inpath "/home/spark/software/data/hive_struct.txt" overwrite into table hive_struct;
查询所有字段:
select * from hive_struct; 192.168.1.1 {"name":"zhangsan","age":40} 192.168.1.2 {"name":"lisi","age":50} 192.168.1.3 {"name":"wangwu","age":60} 192.168.1.4 {"name":"zhaoliu","age":70}
查询指定字段:
select user.name, user.age from hive_struct; zhangsan 40 lisi 50 wangwu 60 zhaoliu 70