hive splict, explode, lateral view, concat_ws
hive> create table arrays (x array<string>) > row format delimited fields terminated by '\001' > collection items terminated by '\002' > ; OK Time taken: 0.574 seconds hive> show tables; OK arrays jigou Time taken: 0.15 seconds, Fetched: 2 row(s) hive> show create table arrays; OK CREATE TABLE `arrays`( `x` array<string>) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\u0001' COLLECTION ITEMS TERMINATED BY '\u0002' STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION 'hdfs://hdp1.hadoop.test:8020/apps/hive/warehouse/zhangchao.db/arrays' TBLPROPERTIES ( 'transient_lastDdlTime'='1441481876') Time taken: 0.399 seconds, Fetched: 13 row(s) hive> load data local inpath './arrays.text' into arrays; FAILED: ParseException line 1:44 missing TABLE at 'arrays' near '<EOF>' hive> load data local inpath './arrays.text' into table arrays; FAILED: SemanticException Line 1:23 Invalid path ''./arrays.text'': No files matching path file:/usr/java/arrays.text hive> load data local inpath '/home/zhangchao/arrays.text' into table arrays; Loading data to table zhangchao.arrays Table zhangchao.arrays stats: [numFiles=1, totalSize=10] OK Time taken: 1.322 seconds hive> select * from arrays; OK ["a","b"] ["c","d","e"] Time taken: 1.076 seconds, Fetched: 2 row(s)
hive> select explode(x) as xx from arrays; Query ID = zhangchao_20150906052727_87971c63-6a8a-4a15-9465-2564cf22c837 Total jobs = 1 Launching Job 1 out of 1 Status: Running (Executing on YARN cluster with App id application_1440440163499_0008) -------------------------------------------------------------------------------- VERTICES STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED -------------------------------------------------------------------------------- Map 1 .......... SUCCEEDED 1 1 0 0 3 0 -------------------------------------------------------------------------------- VERTICES: 01/01 [==========================>>] 100% ELAPSED TIME: 11.95 s -------------------------------------------------------------------------------- OK a b c d e Time taken: 19.465 seconds, Fetched: 5 row(s)
hive> select concat_ws(',', '1','2','3','4') from arrays; OK 1,2,3,4 1,2,3,4 Time taken: 0.107 seconds, Fetched: 2 row(s) hive> select split(concat_ws(',', '1','2','3','4'),',') from arrays; OK ["1","2","3","4"] ["1","2","3","4"] Time taken: 0.128 seconds, Fetched: 2 row(s) hive> select split(concat_ws(',', '1','2','3','4'),',')[3] from arrays; OK 4 4 Time taken: 0.116 seconds, Fetched: 2 row(s)
hive> select explode(split(concat_ws(',', '1','2','3','4'),',')) from arrays; Query ID = zhangchao_20150906063535_e0067b77-0481-48aa-b3dc-055a7e6b6c3c Total jobs = 1 Launching Job 1 out of 1 Status: Running (Executing on YARN cluster with App id application_1440440163499_0012) -------------------------------------------------------------------------------- VERTICES STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED -------------------------------------------------------------------------------- Map 1 .......... SUCCEEDED 1 1 0 0 1 0 -------------------------------------------------------------------------------- VERTICES: 01/01 [==========================>>] 100% ELAPSED TIME: 10.51 s -------------------------------------------------------------------------------- OK 1 2 3 4 1 2 3 4 Time taken: 11.757 seconds, Fetched: 8 row(s)
hive> select * ,sp from arrays lateral view explode(split(concat_ws(',','1','2','3','4'),',')) a as sp; Query ID = zhangchao_20150906064040_c275e654-c7d1-45d2-86a8-18fb5506d4f2 Total jobs = 1 Launching Job 1 out of 1 Status: Running (Executing on YARN cluster with App id application_1440440163499_0012) -------------------------------------------------------------------------------- VERTICES STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED -------------------------------------------------------------------------------- Map 1 .......... SUCCEEDED 1 1 0 0 1 0 -------------------------------------------------------------------------------- VERTICES: 01/01 [==========================>>] 100% ELAPSED TIME: 5.94 s -------------------------------------------------------------------------------- OK ["a","b"] 1 1 ["a","b"] 2 2 ["a","b"] 3 3 ["a","b"] 4 4 ["c","d","e"] 1 1 ["c","d","e"] 2 2 ["c","d","e"] 3 3 ["c","d","e"] 4 4 Time taken: 6.906 seconds, Fetched: 8 row(s)
hive> select 'xx' ,sp from arrays lateral view explode(split(concat_ws(',','1','2','3','4'),',')) a as sp; Query ID = zhangchao_20150906064545_f1ce3669-80ff-45a5-a1a8-6211f56d77bd Total jobs = 1 Launching Job 1 out of 1 Status: Running (Executing on YARN cluster with App id application_1440440163499_0012) -------------------------------------------------------------------------------- VERTICES STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED -------------------------------------------------------------------------------- Map 1 .......... SUCCEEDED 1 1 0 0 2 0 -------------------------------------------------------------------------------- VERTICES: 01/01 [==========================>>] 100% ELAPSED TIME: 7.20 s -------------------------------------------------------------------------------- OK xx 1 xx 2 xx 3 xx 4 xx 1 xx 2 xx 3 xx 4 Time taken: 8.23 seconds, Fetched: 8 row(s)
hive> select * from lateral_test; OK 999 Time taken: 0.087 seconds, Fetched: 1 row(s) hive> select * , sp from lateral_test lateral view explode(split(concat_ws(',','1','2','3','4','5'),',')) as sp; FAILED: ParseException line 1:104 extraneous input 'sp' expecting EOF near '<EOF>' hive> select * , sp from lateral_test lateral view explode(split(concat_ws(',','1','2','3','4','5'),',')) a as sp; Query ID = zhangchao_20150906065252_ed187ad7-b400-4b71-add9-c7dc005f4af1 Total jobs = 1 Launching Job 1 out of 1 Tez session was closed. Reopening... Session re-established. Status: Running (Executing on YARN cluster with App id application_1440440163499_0014) -------------------------------------------------------------------------------- VERTICES STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED -------------------------------------------------------------------------------- Map 1 .......... SUCCEEDED 1 1 0 0 1 0 -------------------------------------------------------------------------------- VERTICES: 01/01 [==========================>>] 100% ELAPSED TIME: 7.21 s -------------------------------------------------------------------------------- OK 999 1 1 999 2 2 999 3 3 999 4 4 999 5 5 Time taken: 16.504 seconds, Fetched: 5 row(s)
hive> select lateral_test.* , sp from lateral_test lateral view explode(split(concat_ws(',','1','2','3','4','5'),',')) a as sp; Query ID = zhangchao_20150906065757_2db20ede-33d6-467f-886c-574a06995041 Total jobs = 1 Launching Job 1 out of 1 Status: Running (Executing on YARN cluster with App id application_1440440163499_0014) -------------------------------------------------------------------------------- VERTICES STATUS TOTAL COMPLETED RUNNING PENDING FAILED KILLED -------------------------------------------------------------------------------- Map 1 .......... SUCCEEDED 1 1 0 0 0 0 -------------------------------------------------------------------------------- VERTICES: 01/01 [==========================>>] 100% ELAPSED TIME: 7.01 s -------------------------------------------------------------------------------- OK 999 1 999 2 999 3 999 4 999 5 Time taken: 8.257 seconds, Fetched: 5 row(s)