hive创建分区表
#创建分区表
CREATE TABLE if not exists data_center.test_partition (
id int,
name string,
age int
)
PARTITIONED BY (date_id string)
row format delimited fields terminated by ','
stored as textfile
#
LOCATION
'hdfs://master:9000/user/hive/warehouse/data_center.db/test_table';
#添加分区
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-06') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06'
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-07') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07'
alter table tmp.soon_tbl_address add if not exists partition(date_id='2017-06-08') location 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08'
#插入数据操作:
set hive.exec.compress.output=true;
set mapred.output.compress=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec;
SET mapred.output.compression.type=BLOCK;
insert overwrite table seqfile_table select * from textfile_table;
name:pass_address_id,type:string;name:order_id,type:string;name:address_type,type:string;name:receiver,type:string;name:receiver_tel,type:string;name:zip_code,type:string;name:province,type:string;name:city,type:string;name:district,type:string;name:address,type:string;name:x_coordinate,type:string;name:y_coordinate,type:string;name:del_flg,type:string;name:create_time,type:string;name:create_user,type:string;name:update_time,type:string;name:update_user,type:string;name:address_order,type:string;name:midway_order_status,type:string;name:street,type:string;name:order_type,type:string;name:out_order_id,type:string;name:poi_title,type:string
复制表结构
create table tmp.soon_tbl_address like select * from default.soon_tbl_address;
#拷贝数据:从default库soon_tbl_address到tmp库soon_tbl_address
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-05') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 3
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-06') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 6
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-07') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 9
insert overwrite table tmp.soon_tbl_address partition(date_id='2017-06-08') select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address limit 12
pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update,date_id
/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-02-08/000000_0
#删除分区数据
ALTER TABLE soon_tbl_address DROP PARTITION (date_id='2017-06-07');
#删除表中数据,保留表
insert overwrite table tmp.soon_tbl_address select pass_address_id,order_id,address_type,receiver,receiver_tel,zip_code,province,city,district,address,x_coordinate,y_coordinate,del_flg,create_time,create_user,update_time,update_user,address_order,midway_order_status,street,order_type,out_order_id,poi_title,etl_update from ods.soon_tbl_address where 1=0
truncate table tmp.soon_tbl_address
#hadoop查看文件
[ds@master ~]$ hadoop fs -ls /user/hive/warehouse/tmp.db/soon_tbl_address
17/06/09 16:02:20 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 5 items
drwxr-xr-x - ds supergroup 0 2017-06-09 11:07 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-06
drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07
drwxr-xr-x - ds supergroup 0 2017-06-09 14:23 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
drwxr-xr-x - ds supergroup 0 2017-06-09 14:56 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__b6a456c4_73b6_4fec_8dfa_ddbe38f55856
drwxr-xr-x - ds supergroup 0 2017-06-09 11:08 /user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-08
#hadoop删除文件及递归文件
[ds@master ~]$ hadoop fs -rmr hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
rmr: DEPRECATED: Please use 'rm -r' instead.
17/06/09 16:03:24 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
17/06/09 16:03:25 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2' to trash at: hdfs://master:9000/user/ds/.Trash/Current/user/hive/warehouse/tmp.db/soon_tbl_address/date_id=2017-06-07__3bb2a9f8_441b_4a21_975b_fe26c1fb39c2
[ds@master ~]$
#导出数据到本地
INSERT OVERWRITE LOCAL DIRECTORY '/home/users/my' SELECT * FROM tmp.soon_tbl_address
#linux上本地.txt,导入到表某个分区
load data LOCAL INPATH '/home/users/my/test_table.txt' OVERWRITE into table data_center.test_table PARTITION(date='2017-06-07')
load data LOCAL INPATH '/home/users/my/test_partition.txt' OVERWRITE into table tmp.temp_test_partition PARTITION(date='2017-06-13')
#手动创建表
CREATE TABLE IF NOT EXISTS data_center.test_no_partition(
id int ,
name string ,
age int
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
stored as textfile;
创建空表
CREATE TABLE data_center.test_no_partition LIKE tmp.test_no_partition;
#修改表字段名称、类型
ALTER TABLE test_no_partition CHANGE ip id int;
#删除表及数据
drop table test_no_partition;
#删除表数据,表结构还在
delete from test_no_partition;
insert overwrite table test_no_partition select id,age,name from test_no_partition where id=3;
#给hive库加权限
hdfs dfs -chmod 777 hdfs://master:9000/user/hive/warehouse/tmp.db
#hadoop文件夹重命名
hadoop fs -mv hdfs://master:9000/user/hive/warehouse/ods.db/test_table__cb086c0f_88ee_4623_938c_311a1e717c8a hdfs://master:9000/user/hive/warehouse/ods.db/test_table
CREATE TABLE ods.test_table_tmp(
id int,
name_path string,
parent_path string
)
row format delimited fields terminated by ','
stored as textfile
#路径可以不要
LOCATION
'hdfs://master:9000/user/hive/warehouse/ods.db/test_table_tmp'
hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a
#导出到本地文件
#执行导出本地文件命令:
insert overwrite local directory '/home/ds/user/my' select * from ods.test_table;
#查看hdfs文件内容
hdfs dfs -cat hdfs://master:9000/user/hive/warehouse/ods.db/test_table/000000_0__c2175f22_ec6f_4641_a17d_fdc37084713a
CREATE TABLE `tmp.temp_test_partition`(
`id` int,
`name` string,
`age` int,
`date_id` string)
row format delimited fields terminated by ','
stored as textfile
CREATE TABLE `data_center.test_partition`(
`id` int,
`name` string,
`age` int)
PARTITIONED BY ( `date_id` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES ( 'field.delim'=',', 'serialization.format'=',')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'hdfs://master:9000/user/hive/warehouse/data_center.db/test_partition'
TBLPROPERTIES ( 'transient_lastDdlTime'='1497492633')
#手动创建分区表
CREATE TABLE if not exists data_center.test_partition (
id int,
name string,
age int
)
PARTITIONED BY (date string)
row format delimited fields terminated by ','
stored as textfile
#修改分区
ALTER TABLE table_name PARTITION (dt='2008-08-08') SET LOCATION "new location";
ALTER TABLE data_center.test_partition PARTITION (date_id='2008-08-08') RENAME TO PARTITION (dt='20080808');
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
insert overwrite table data_center.test_partition partition (date_id) select * from data_center.test_partition where name in ('ccc','lisi')
#分区增量更新
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
set mapreduce.reduce.shuffle.input.buffer.percent=0.5;
insert overwrite table data_center.test_partition partition(date_id)
select tmp.id,tmp.name,tmp.age,tmp.date_id from tmp.temp_test_partition tmp
union all select a.* from data_center.test_partition a
left outer join tmp.temp_test_partition b on a.id=b.id where b.id is null and a.date_id in ('2017-06-14','2017-06-15','2017-06-16')
insert overwrite table data_center.
/home/ds/users/wcy/mysql_to_hdfs_update.txt
CREATE TABLE zjs_cust_endurance_time_copy (
id int,
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date
)
row format delimited fields terminated by ','
stored as textfile
#从本地加载txt数据到表中
load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_update.txt'
OVERWRITE into table data_center.zjs_cust_endurance_time_copy
hdfs目标表中已有数据:
106 2017-06-21 120001 1062.22 2017-05-27
107 2017-06-21 310001 1387.85 2017-05-27
108 2017-06-21 111111 100.0 2017-05-27
mysql中抽取数据:
106 2017-04-01 120000 2062.22 2017-04-27
107 2017-04-01 310000 2387.85 2017-04-27
执行后hdfs目标表中数据:
108 2017-06-21 111111 100.0 2017-05-27
106 2017-04-01 120000 2062.22 2017-04-27
107 2017-04-01 310000 2387.85 2017-04-27
#表结果中没有id列,后手动添加id
CREATE TABLE zjs_cust_endurance_time_copy (
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date,
id int
)
row format delimited fields terminated by ','
stored as textfile
hdfs目标表中已有数据:
2017-06-21 120001 1062.22 2017-05-27 106
2017-06-21 310001 1387.85 2017-05-27 107
2017-06-21 111111 100.0 2017-05-27 108
mysql表中抽数据:
2017-04-01 120000 2062.22 2017-04-27 06:46:30.0 106
2017-04-01 310000 2387.85 2017-04-27 06:46:30.0 107
2017-04-01 320100 2027.64 2017-04-27 06:46:30.0 108
2017-04-01 330100 3763.16 2017-04-27 06:46:30.0 109
执行后hdfs目标表中数据:
2017-04-01 120000 2062.22 2017-04-27 106
2017-04-01 310000 2387.85 2017-04-27 107
2017-04-01 320100 2027.64 2017-04-27 108
2017-04-01 330100 3763.16 2017-04-27 109
#从本地加载txt数据到分区表中
CREATE TABLE data_center.zjs_cust_endurance_time_copy_part (
id int,
computer_time string ,
user_city_code string ,
avg_patient_time double,
update_time date
)
PARTITIONED BY (date string)
row format delimited fields terminated by ','
stored as textfile
load data LOCAL INPATH '/home/ds/users/wcy/mysql_to_hdfs_partupdate.txt'
OVERWRITE into table data_center.zjs_cust_endurance_time_copy_part PARTITION(date='2017-06-21')
hdfs目标表中已有数据:
select * from data_center.zjs_cust_endurance_time_copy_part;
108 2017-06-21 111111 100.0 2017-05-27 2017-06-21
106 2017-04-01 222222 99.0 2017-04-27 2017-06-21
mysql中抽取数据:
106 2017-04-01 120000 2062.22 2017-04-27 06:46:30.0
107 2017-04-01 310000 2387.85 2017-04-27 06:46:30.0
108 2017-04-01 320100 2027.64 2017-04-27 06:46:30.0
insert overwrite table data_center.zjs_cust_endurance_time_copy_part partition(date)
select tmp.id,tmp.computer_time,tmp.user_city_code,tmp.avg_patient_time,tmp.update_time,tmp.date
from tmp.temp_zjs_cust_endurance_time_copy_part tmp
union all select a.* from data_center.zjs_cust_endurance_time_copy_part a
left outer join tmp.temp_zjs_cust_endurance_time_copy_part b
on a.id=b.id where b.id is null
#hdfs中文件数据下载到本地
hadoop fs -get 源数据路径 本地文件夹路径
hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/word_category_weights/000000_0 /home/ds/mydata
hadoop fs -get hdfs://SAD-HDP-003:9000/user/hive/warehouse/data_center.db/zjs_user_result/000000_0 /home/ds/mydata/zjs_user_result
下载文件
sz /home/ds/mydata/000000_0
create table data_center.word_category_weights(
keyword string,
category_id string,
weight_auto double,
category_name string,
del int
)
row format delimited fields terminated by ','
stored as textfile;
load data LOCAL INPATH '/home/users/wcy/000000_0'
OVERWRITE into table data_center.word_category_weights
load data LOCAL INPATH '/home/users/wcy/000000_0_more_data'
OVERWRITE into table data_center.word_category_weights
load data LOCAL INPATH '/home/users/wcy/zjs_user_result' OVERWRITE into table data_center.zjs_user_result
load data LOCAL INPATH '/home/users/wcy/goodsinfo' OVERWRITE into table data_center.goodsinfo
#启动hive元数据服务进程
hive --service metastore &
hive --service hiveserver2 &
UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"\u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":"\"","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
2017-06-26 16:39:24.963 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:44.629 [job-0] INFO StandAloneJobContainerCommunicator - Total 2560 records, 45917 bytes | Speed 4.48KB/s, 256 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.006s | All Task WaitReaderTime 0.138s | Percentage 0.00%
2017-06-26 16:39:45.393 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:53.884 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
2017-06-26 16:39:54.630 [job-0] INFO StandAloneJobContainerCommunicator - Total 4608 records, 81924 bytes | Speed 3.52KB/s, 204 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 11.211s | All Task WaitReaderT
UnstructuredStorageReaderUtil - CsvReader使用默认值[{"captureRawRecord":true,"columnCount":0,"comment":"#","currentRecord":-1,"delimiter":",","escapeMode":1,"headerCount":0,"rawRecord":"","recordDelimiter":"\u0000","safetySwitch":true,"skipEmptyRecords":true,"textQualifier":"\"","trimWhitespace":true,"useComments":false,"useTextQualifier":true,"values":[]}],csvReaderConfig值为[null]
2017-06-27 10:37:49.949 [0-0-0-writer] WARN CommonRdbmsWriter$Task - 回滚此次写入, 采用每次写入一行方式提交. 因为:Unknown command
hadoop fs -rm hdfs://SAD-HDP-003:9000/user/hive/warehouse/ods.db/soon_tbl_address_part/.hive-staging_hive_2017-06-23_18-23-08_102_5432451293100937443-1
CREATE TABLE goodsinfo(
create_time string,
deal_code bigint ,
store_code bigint,
category_code1 bigint,
category_code2 bigint,
category_code3 bigint,
category_code4 bigint,
goods_code bigint,
term_code int,
term_name string,
store_name string,
goods_name string,
deal_name string,
goods_image string,
goods_pv int,
goods_uv int,
goods_pv_total int,
time_total int,
jump2_pv int,
collect_total int,
add_total int,
pay_total int,
pay_amount decimal(10,2))
row format delimited fields terminated by ','
stored as textfile;
CREATE TABLE zjs_user_result_1(
compute_day string,
member_id string,
mobile string,
uc_user_id String,
real_name string,
register_time string,
order_city_code string,
city_name string,
manage_amount double,
deduction double,
freight double,
actual_paid double,
insurance_amount double,
complete_orders String,
deduction_all double,
freight_all double,
complete_orders_all String,
last_complete_time string,
order_time string,
cancel_order_before String,
cancel_order_after String,
order_nums String,
invite_peoples String,
invite_peoples_all String,
share_orders String,
share_orders_all String)
row format delimited fields terminated by ','
stored as textfile;