@malloc

导航

HIVE小问题随记

1、float与double类型比较,因hive、java等IEEE(IEEE二进制浮点数算术标准)的系统,float 0.3实际存储数据略大于0.3,因此大于double0.3,hive存储浮点数数据建议使用double类型。

HIVE:
select FLOAT(0.3)//0.30000001192092896
JAVA:
        public void floatTest(){
            float a = 0.3f;
            //float :0.30000001192092896000
            System.out.printf("float :%.20f\n",a);
            //double :0.30000000000000000000
            System.out.printf("double :%.20f\n",0.3);
            //字面量0.3,默认为double类型,比较时把float类型转变为double类型,float后面补0,因此float 0.3>double 0.3
            System.out.println("float>double:"+(a>0.3));
    }

 2、

常用命令:
desc formatted test;
show create table test;

show partitions test;
//非严格的分区模式,开启insert动态分区
set hive.exec.dynamic.partition.mode=nonstrict;
//开启强制分桶
set hive.enforce.bucketing=true;

//分区表新增字段,CASCADE自动刷新元数据,否则旧分区新加列数据为null
ALTER TABLE test  ADD COLUMNS (newColumn String) CASCADE;

create table test(
pnl_id string,
event_timekey string
)
partitioned by (factory string)
clustered by (pnl_id) into 40 buckets
row format serde 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
with serdeproperties (
'field.delim'='\t',
'escape.delim'='\n',
'serialization.null.format'='NULL',
'serialization.encoding'='UTF-8'
)
stored as parquet;

LOCATION
  'hdfs://nameservice1/user/hive/warehouse/mdw.db/test1'

LazySimpleSerDe默认\N表示空
以\n作为换行符:'escape.delim'='\n'和'escape.delim'='\\' 等效
以\t或#作为字段分隔符:'field.delim'='\t'、'field.delim'='#'
若已设置'serialization.null.format'='NULL',字段分隔符之间(##)为空,被读取为空字符串''
HBASE为空值,映射为hive表 待测试


--使用RegexSerDe分析http请求日志。
ROW FORMAT SERDE ‘org.apache.hadoop.hive.contrib.serde2.RegexSerDe’
( “input.regex” = “([^ ]) ([^ ]) ([^ ]) (-|\^\\]) ([^ \”]|\”[^\”]\”) (-|[0-9]) (-|[0-9])(?: ([^ \”]|\”[^\”]\”) ([^ \”]|\”[^\”]\”))?”,
“output.format.string” = “%1ss %3ss %5ss %7ss %9$s”
) STORED AS TEXTFILE;


--HIVE  创建HBASE映射表
CREATE EXTERNAL TABLE test (rowkey string, area_code string,eqp_id string)
ROW FORMAT SERDE
  'org.apache.hadoop.hive.hbase.HBaseSerDe'
STORED BY
'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES
("hbase.columns.mapping" = ":key,CF:AREA_CODE,CF:EQP_ID")
TBLPROPERTIES
("hbase.table.name" = "MDW:DWR_UNIT")
--HBASE列名区分大小写,列之间不能有空格




create table test(
c0 string comment '测试',
c1 string,
c2 string
)row format serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
with serdeproperties (
'serialization.null.format'='NULL',
'input.regex' = 'bduid\\[(.*)\\]uid\\[(\\d+)\\]uname\\[(.*)\\]',
'output.format.string' = '%1$s\t%2$s'
) stored as textfile;
--开启事务
STORED AS ORC TBLPROPERTIES ('transactional'='true');serde的in/output format 也需为ORC格式

alter table test set serde 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' with serdeproperties('serialization.null.format'='NULL');
ALTER TABLE test SET SERDEPROPERTIES ('serialization.encoding'='UTF-8');
alter table test set serdeproperties('serialization.null.format'='NULL')
alter table test partition(dt=' xxxxx') set serdeproperties('field.delim'='\t');
外部表的serdeproperties
--JsonSerDe、RegexSerDe jar包需要放到hive lib路径下
<property>
    <name>hive.aux.jars.path</name>
    <value>file:///home/zxq/apache-hive-2.1.1-bin/lib/hive-contrib-2.1.1.jar</value>
</property>

HIVE 0.14之后支持事务,之前可使用覆盖原表的方式进行删除。(https://www.cnblogs.com/rocky-AGE-24/p/7154499.html)
1.2之后可以指定insert 部分字段
insert overwrite table t_table1 select * from t_table1 where XXXX;
--表中字段C1、C2、C3 插入之后C1='111',C3='222',C2为空?
insert into test values('111','','222');  
--从文件load
load data local inpath '/home/0' into table test2;
--导出到文件
INSERT OVERWRITE [LOCAL] DIRECTORY directory1 SELECT ... FROM ...
INSERT OVERWRITE TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
INSERT INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
INSERT INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)](z, y) select_statement1 FROM from_statement;

posted on 2019-06-19 10:20  malloc+  阅读(665)  评论(0编辑  收藏  举报