性能测试
./start-thriftserver.sh --hiveconf hive.server2.thrift.port=10009
hive --service metastore &
hive --service hiveserver2 &
./beeline
!connect jdbc:hive2://node6:10009
1: ./spark-sql --master yarn-client
2: ./spark-sql --master yarn-client --total-executor-cores 20 --driver-memory 1g --executor-memory 6g --executor-cores 6 --num-executors 100 --conf spark.default.parallelism=1000 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3
3: ./spark-sql --master yarn-client --total-executor-cores 20 --driver-memory 1g --executor-memory 6g --executor-cores 6 --num-executors 200 --conf spark.default.parallelism=1200 --conf spark.storage.memoryFraction=0.4 --conf spark.shuffle.memoryFraction=0.4
./spark-sql --master yarn-client --total-executor-cores 20 --driver-memory 1g --executor-memory 6g --executor-cores 6 --num-executors 200 --conf spark.default.parallelism=1200 --conf spark.storage.memoryFraction=0.4 --conf spark.shuffle.memoryFraction=0.4 --conf spark.sql.shuffle.partitions=300
---------------------------------
select sum(num) from hivetest2;
1,93m
2,27m
3,23 20 18 16 16 18 16
---------------------------------
select sum(num) from hivetest2 where year='2019' and month='04';
1,6.3
2,2.6m
3,2.6m 2.6 2.1 2.1 2.2 2.2
---------------------------------
select sum(num) year,month from hivetest2 where year='2019' group by year,month;
1,37m
2,15.5m
3,15 12.6 12.8 12.5
---------------------------------
select count(*) from hivetest2 where year='2019' and month='04';
1,7m
2,2.1m
3,1.39 1.34 1.24 1.26 1.22
---------------------------------
select count(*) from hivetest2 where year='2019' ;
1,28m
2,7m
3,7.8 8.3 7.9 8
---------------------------------
select count(*) from hivetest2 where year='2016' and month='01';
1,3.7m
2,1.3m
3,1.1 1.0 1.2 1.04 1.2 1.05
select sum(num) from hivetest2 where year='2016' and month='01';
1,4.7m
2,1.8m
3,1.84 2.1 2.1 1.8 1.7 2.0
------------------------------------------------------
select tab.b,tab.c,tab.a,tab2.bb,tab2.cc,tab2.aa from (select sum(num) a,year b,month c from hivetest2 where year='2019' and month='04' group by year,month) tab,(select sum(num) aa,year bb,month cc from hivetest2 where year='2016' and month='04' group by year,month) tab2 where tab.c=tab2.cc;
2019 04 312499987500000 2016 04 49999995000000
16/09/28 07:47:12 INFO StatsReportListener: 16.0 B 16.0 B 16.0 B 16.0 B 16.0 B 16.0 B 16.0 B 16.0 B 70.0 B
Time taken: 6.309 seconds, Fetched 1 row(s)
select tab.b,tab.c,tab.a,tab2.bb,tab2.cc,tab2.aa from (select count(*) a,year b,month c from hivetest2 where year='2019' and month='04' group by year,month) tab,(select count(*) aa,year bb,month cc from hivetest2 where year='2016' and month='04' group by year,month) tab2 where tab.c=tab2.cc;
2019 04 25000001 2016 04 10000001
Time taken: 5.41 seconds, Fetched 1 row(s)
select count(*) from hivetest2 where year='2019' and month='04';
25000001
Time taken: 2.254 seconds, Fetched 1 row(s)
SELECT * FROM DEALING CREATE PROCEDURE PROC_DEALING AS DECLARE @DEALING_BUY TABLE ( ACCOUNT_CODE VARCHAR(32), FUND_CODE VARCHAR(32), STATUS VARCHAR(1), AMOUNT INT, TOTAL FLOAT ) INSERT INTO @DEALING_BUY(ACCOUNT_CODE,FUND_CODE,STATUS,AMOUNT,TOTAL) SELECT DEALING.ACCOUNT_CODE,DEALING.FUND_CODE,DEALING.STATUS,SUM(DEALING.AMOUNT),SUM(AMOUNT*VALUE) FROM DEALING WHERE STATUS='B' GROUP BY DEALING.ACCOUNT_CODE,DEALING.FUND_CODE,DEALING.STATUS
./spark-sql --master yarn-client --total-executor-cores 20 --driver-memory 1g --executor-memory 6g --executor-cores 6 --num-executors 200 --conf spark.default.parallelism=1200 --conf spark.storage.memoryFraction=0.4 --conf spark.shuffle.memoryFraction=0.4 --conf spark.sql.shuffle.partitions=300
---------------------------------------------------------------------------
select sum(num) year,month from hivetest2 where year='2019' group by year,month;
31m
./spark-sql --master yarn-client --total-executor-cores 20 --driver-memory 1g --executor-memory 6g --executor-cores 6 --num-executors 200 --conf spark.default.parallelism=1200 --conf spark.storage.memoryFraction=0.4 --conf spark.shuffle.memoryFraction=0.4
--------------------------------------------------------------------------
select sum(num) year,month from hivetest2 where year='2019' group by year,month;
27m
CREATE INDEX hivetest1_index ON TABLE hivetest1(name) AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler' WITH deferred REBUILD IN TABLE hivetest1_index_table;
alter index hivetest1_index on hivetest1 rebuild;
show index on int_drgs_med_rec_fee;
drop index fee_index on int_drgs_med_rec_fee;
create table INT_DRGS_MED_REC_FEE
(COMPCODE STRING,
UPDATETIMESTR STRING,
AMOUNT DEICIMAL(10, 2),
CASE_CHARGE_KIND_ID STRING,
ID STRING,
MED_REC_ID STRING,
ZB06 STRING,
ZB02 int(3, 0) NULL,
AAE040 STRING)
row format delimited fields terminated by '\t'
COMPCODE STRING,UPDATETIMESTR STRING,AAE036 STRING,AAE401 STRING,AKC190 STRING,ID STRING,ZA01C STRING,ZA03 STRING,ZA2 STRING,ZB01 STRING,ZB02 int,ZB03 STRING,AAA01 STRING,AAA02C STRING,AAA03 STRING,AAA04 int,AAA05C STRING,AAA06C STRING,AAA07 STRING,AAA40 int,AAA42 int,AAA08C STRING,AAA14C STRING,AAA17C STRING,AAA18C STRING,AAA19 STRING,AAA45 STRING,AAA46 STRING,AAA47 STRING,AAA49 STRING,AAA50 STRING,AAA51 STRING,AAA20 STRING,AAA21C STRING,AAA22 STRING,AAA23C STRING,AAA24 STRING,AAA25 STRING,AAA26C STRING,AAA27 STRING,AAA28 STRING,AAA29 int,AA30 STRING,AAB01 STRING,AAB02C STRING,AAB03 STRING,AAB06C STRING,AAC01 STRING,AAC02C STRING,AAC03 STRING,AAC04 STRING,AAD01C STRING,ABA01C STRING,ABA01N STRING,ABC01C STRING,ABC01N STRING,ABC03C STRING,ABF01C STRING,ABF01N STRING,ABF04 STRING,ABG01C STRING,ABG01N STRING,AEB01 STRING,AEB02C STRING,AED01C STRING,AED02 STRING,AED03 STRING,AED04 STRING,AEE01 STRING,AEE02 STRING,AEE03 STRING,AEE04 STRING,AEE05 STRING,AEE07 STRING,AEE08 STRING,AEE10 STRING,AEG01C STRING,AEG02C STRING,AEJ01 int,AEJ02 int,AEJ03 int,AEJ04 int,ADA00 int,AEI01C STRING,AEJ05 int,AEJ06 int,AEL01 int,AEM01C STRING,AEM02 STRING,AEM03C STRING,AEM04 STRING,AEN01 int,ADA0001 DEICIMAL(10, 2) ,ADA01 DEICIMAL(10, 2) ,ADA02 DEICIMAL(10, 2) ,ADA03 DEICIMAL(10, 2) ,ADA04 DEICIMAL(10, 2) ,ADA11 DEICIMAL(10, 2) ,ADA12 DEICIMAL(10, 2) ,ADA13 DEICIMAL(10, 2) ,ADA14 DEICIMAL(10, 2) ,ADA21 DEICIMAL(10, 2) ,ADA2101 DEICIMAL(10, 2) ,ADA22 DEICIMAL(10, 2) ,ADA2201 DEICIMAL(10, 2) ,ADA2202 DEICIMAL(10, 2) ,ADA31 DEICIMAL(10, 2) ,ADA41 DEICIMAL(10, 2) ,ADA51 DEICIMAL(10, 2) ,ADA5101 DEICIMAL(10, 2) ,ADA61 DEICIMAL(10, 2) ,ADA62 DEICIMAL(10, 2) ,ADA71 DEICIMAL(10, 2) ,ADA72 DEICIMAL(10, 2) ,ADA73 DEICIMAL(10, 2) ,ADA74 DEICIMAL(10, 2) ,ADA75 DEICIMAL(10, 2) ,ADA81 DEICIMAL(10, 2) ,ADA82 DEICIMAL(10, 2) ,ADA83 DEICIMAL(10, 2) ,ADA91 DEICIMAL(10, 2) ,AAC001 int,BALANCEID int,AKC021 STRING,AAE040 STRING
create table INT_DRGS_PINFO_M(COMPCODE STRING,UPDATETIMESTR STRING,AAE036 STRING,AAE401 STRING,AKC190 STRING,ID STRING,ZA01C STRING,ZA03 STRING,ZA2 STRING,ZB01 STRING,ZB02 int,ZB03 STRING,AAA01 STRING,AAA02C STRING,AAA03 STRING,AAA04 int,AAA05C STRING,AAA06C STRING,AAA07 STRING,AAA40 int,AAA42 int,AAA08C STRING,AAA14C STRING,AAA17C STRING,AAA18C STRING,AAA19 STRING,AAA45 STRING,AAA46 STRING,AAA47 STRING,AAA49 STRING,AAA50 STRING,AAA51 STRING,AAA20 STRING,AAA21C STRING,AAA22 STRING,AAA23C STRING,AAA24 STRING,AAA25 STRING,AAA26C STRING,AAA27 STRING,AAA28 STRING,AAA29 int,AA30 STRING,AAB01 STRING,AAB02C STRING,AAB03 STRING,AAB06C STRING,AAC01 STRING,AAC02C STRING,AAC03 STRING,AAC04 STRING,AAD01C STRING,ABA01C STRING,ABA01N STRING,ABC01C STRING,ABC01N STRING,ABC03C STRING,ABF01C STRING,ABF01N STRING,ABF04 STRING,ABG01C STRING,ABG01N STRING,AEB01 STRING,AEB02C STRING,AED01C STRING,AED02 STRING,AED03 STRING,AED04 STRING,AEE01 STRING,AEE02 STRING,AEE03 STRING,AEE04 STRING,AEE05 STRING,AEE07 STRING,AEE08 STRING,AEE10 STRING,AEG01C STRING,AEG02C STRING,AEJ01 int,AEJ02 int,AEJ03 int,AEJ04 int,ADA00 int,AEI01C STRING,AEJ05 int,AEJ06 int,AEL01 int,AEM01C STRING,AEM02 STRING,AEM03C STRING,AEM04 STRING,AEN01 int,ADA0001 DEICIMAL(10, 2) ,ADA01 DEICIMAL(10, 2) ,ADA02 DEICIMAL(10, 2) ,ADA03 DEICIMAL(10, 2) ,ADA04 DEICIMAL(10, 2) ,ADA11 DEICIMAL(10, 2) ,ADA12 DEICIMAL(10, 2) ,ADA13 DEICIMAL(10, 2) ,ADA14 DEICIMAL(10, 2) ,ADA21 DEICIMAL(10, 2) ,ADA2101 DEICIMAL(10, 2) ,ADA22 DEICIMAL(10, 2) ,ADA2201 DEICIMAL(10, 2) ,ADA2202 DEICIMAL(10, 2) ,ADA31 DEICIMAL(10, 2) ,ADA41 DEICIMAL(10, 2) ,ADA51 DEICIMAL(10, 2) ,ADA5101 DEICIMAL(10, 2) ,ADA61 DEICIMAL(10, 2) ,ADA62 DEICIMAL(10, 2) ,ADA71 DEICIMAL(10, 2) ,ADA72 DEICIMAL(10, 2) ,ADA73 DEICIMAL(10, 2) ,ADA74 DEICIMAL(10, 2) ,ADA75 DEICIMAL(10, 2) ,ADA81 DEICIMAL(10, 2) ,ADA82 DEICIMAL(10, 2) ,ADA83 DEICIMAL(10, 2) ,ADA91 DEICIMAL(10, 2) ,AAC001 int,BALANCEID int,AKC021 STRING,AAE040 STRING) row format delimited fields terminated by ',';
create table INT_DRGS_MED_REC_FEE(COMPCODE STRING,UPDATETIMESTR STRING,AMOUNT DECIMAL(10,2),CASE_CHARGE_KIND_ID STRING,ID STRING,MED_REC_ID STRING,ZB06 STRING,ZB02 int,AAE040 STRING) row format delimited fields terminated by '\t';
create table INT_DRGS_MED_REC_FEE(COMPCODE STRING,UPDATETIMESTR STRING,AMOUNT DECIMAL(10,2),CASE_CHARGE_KIND_ID STRING,ID STRING,MED_REC_ID STRING,ZB06STRING,ZB02 int,AAE040 STRING) row format delimited fields terminated by ',';
load data local inpath '/root/testdata/drgstxt/INT_DRGS_MED_REC_FEE.txt' into table INT_DRGS_MED_REC_FEE
CREATE INDEX fee_index ON TABLE INT_DRGS_MED_REC_FEE(zb06) AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler' WITH deferred REBUILD IN TABLE int_drgs_med_rec_fee_index_table;
alter index fee_index on int_drgs_med_rec_fee_index_table rebuild;
create table INT_DRGS_PINFO_M(COMPCODE STRING,UPDATETIMESTR STRING,AAE036 STRING,AAE401 STRING,AKC190 STRING,ID STRING,ZA01C STRING,ZA03 STRING,ZA2 STRING,ZB01 STRING,ZB02 int,ZB03 STRING,AAA01 STRING,AAA02C STRING,AAA03 STRING,AAA04 int,AAA05C STRING,AAA06C STRING,AAA07 STRING,AAA40 int,AAA42 int,AAA08C STRING,AAA14C STRING,AAA17C STRING,AAA18C STRING,AAA19 STRING,AAA45 STRING,AAA46 STRING,AAA47 STRING,AAA49 STRING,AAA50 STRING,AAA51 STRING,AAA20 STRING,AAA21C STRING,AAA22 STRING,AAA23C STRING,AAA24 STRING,AAA25 STRING,AAA26C STRING,AAA27 STRING,AAA28 STRING,AAA29 int,AA30 STRING,AAB01 STRING,AAB02C STRING,AAB03 STRING,AAB06C STRING,AAC01 STRING,AAC02C STRING,AAC03 STRING,AAC04 STRING,AAD01C STRING,ABA01C STRING,ABA01N STRING,ABC01C STRING,ABC01N STRING,ABC03C STRING,ABF01C STRING,ABF01N STRING,ABF04 STRING,ABG01C STRING,ABG01N STRING,AEB01 STRING,AEB02C STRING,AED01C STRING,AED02 STRING,AED03 STRING,AED04 STRING,AEE01 STRING,AEE02 STRING,AEE03 STRING,AEE04 STRING,AEE05 STRING,AEE07 STRING,AEE08 STRING,AEE10 STRING,AEG01C STRING,AEG02C STRING,AEJ01 int,AEJ02 int,AEJ03 int,AEJ04 int,ADA00 int,AEI01C STRING,AEJ05 int,AEJ06 int,AEL01 int,AEM01C STRING,AEM02 STRING,AEM03C STRING,AEM04 STRING,AEN01 int,ADA0001 DECIMAL(10, 2) ,ADA01 DECIMAL(10, 2) ,ADA02 DECIMAL(10, 2) ,ADA03 DECIMAL(10, 2) ,ADA04 DECIMAL(10, 2) ,ADA11 DECIMAL(10, 2) ,ADA12 DECIMAL(10, 2) ,ADA13 DECIMAL(10, 2) ,ADA14 DECIMAL(10, 2) ,ADA21 DECIMAL(10, 2) ,ADA2101 DECIMAL(10, 2) ,ADA22 DECIMAL(10, 2) ,ADA2201 DECIMAL(10, 2) ,ADA2202 DECIMAL(10, 2) ,ADA31 DECIMAL(10, 2) ,ADA41 DECIMAL(10, 2) ,ADA51 DECIMAL(10, 2) ,ADA5101 DECIMAL(10, 2) ,ADA61 DECIMAL(10, 2) ,ADA62 DECIMAL(10, 2) ,ADA71 DECIMAL(10, 2) ,ADA72 DECIMAL(10, 2) ,ADA73 DECIMAL(10, 2) ,ADA74 DECIMAL(10, 2) ,ADA75 DECIMAL(10, 2) ,ADA81 DECIMAL(10, 2) ,ADA82 DECIMAL(10, 2) ,ADA83 DECIMAL(10, 2) ,ADA91 DECIMAL(10, 2) ,AAC001 int,BALANCEID int,AKC021 STRING,AAE040 STRING) row format delimited fields terminated by ',';
load data local inpath '/root/testdata/drgstxt/INT_DRGS_PINFO_M.txt' into table INT_DRGS_PINFO_M
select f.*,m.AAE040 from INT_DRGS_MED_REC_FEE f left join INT_DRGS_PINFO_M m on f.MED_REC_ID =m.ID
create table MID_DRGS_OPERATION_ICD_BJ(DWID STRING,ATTR STRING,CATEGORY_CODE STRING,ICD_BJ_CODE STRING,ICD_BJ_ID STRING,ICD_BJ_NAME STRING,SPELL STRING,SUBORDER_CODE STRING) row format delimited fields terminated by ',';
load data local inpath '/root/testdata/drgstxt/MID_DRGS_OPERATION_ICD_BJ.txt' into table MID_DRGS_OPERATION_ICD_BJ
create table MID_DRGS_DIAGNOSIS_ICD_BJ(DWID STRING,ATTR STRING,CATEGORY_CODE STRING,ICD_BJ_CODE STRING,ICD_BJ_ID STRING,ICD_BJ_NAME STRING,SPELL STRING,SUBORDER_CODE STRING) row format delimited fields terminated by ',';
load data local inpath '/root/testdata/drgstxt/MID_DRGS_DIAGNOSIS_ICD_BJ.txt' into table MID_DRGS_DIAGNOSIS_ICD_BJ
create table INT_DRGS_PINFO_E2(COMPCODE string,UPDATETIMESTR string,AAE036 string,AAE401 string,ACA01C string,ACA01N string,ACA02 string,ACA03C string,ACA04 string,ACA05 string,AKC190 string,ID string,ZA01C string,ZA03 string,ZA2 string,ZB01 string,ZB02 int,ACA06 string,ACA07C string,ACA08C string,ACA09 string,ACA10 int,MAIN_ID string) row format delimited fields terminated by ',';
load data local inpath '/root/testdata/drgstxt/INT_DRGS_PINFO_E2.txt' into table INT_DRGS_PINFO_E2
create table INT_DRGS_PINFO_E1(COMPCODE string,UPDATETIMESTR string,AAE036 string,AAE401 string,ABD01C string,ABD01N string,ABD03C string,ABD04 int,AKC190 string,ID string,ZA01C string,ZA03 string,ZA2 string,ZB01 string,ZB02 int,MAIN_ID string) row format delimited fields terminated by ',' ;
load data local inpath '/root/testdata/drgstxt/INT_DRGS_PINFO_E1.txt' into table INT_DRGS_PINFO_E1
----------------------------------------------------
select compcode,case_charge_kind_id,sum(amount) from int_drgs_med_rec_fee group by case_charge_kind_id,compcode;
select sum(amount),case_charge_kind_id from int_drgs_med_rec_fee group by case_charge_kind_id;
select compcode,case_charge_kind_id,sum(amount) from int_drgs_med_rec_fee group by case_charge_kind_id,compcode order by compcode,case_charge_kind_id;
---- 创建视图
create view VIEW_INT_DRGS_PINFO_E1 as select COMPCODE ,UPDATETIMESTR ,AAE036 ,AAE401 ,ABD01C ,ABD01N ,ABD03C ,ABD04 ,AKC190 ,ID ,ZA01C ,ZA03 ,ZA2 ,ZB01 ,ZB02 ,MAIN_ID ,d.ICD_BJ_ID,ICD_BJ_NAME,d.CATEGORY_CODE from INT_DRGS_PINFO_E1 e left join MID_DRGS_DIAGNOSIS_ICD_BJ d on e.ABD01C = d.ICD_BJ_CODE
---hive:
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
---apark:
set hive.exec.dynamic.partition.mode=nonstrict
---- 创建存储视图数据表(分区表)
create table FIN_DRGS_PINFO_E1(UPDATETIMESTR string,AAE036 string,AAE401 string,ABD01C string,ABD01N string,ABD03C string,ABD04 int,AKC190 string,ID string,ZA01C string,ZA03 string,ZA2 string,ZB01 string,ZB02 int,MAIN_ID string,ICD_BJ_ID string,ICD_BJ_NAME string,CATEGORY_CODE string) partitioned by (COMPCODE string) row format delimited fields terminated by ',' ;
---- 动态分区,存储视图中的数据
INSERT OVERWRITE TABLE FIN_DRGS_PINFO_E1 PARTITION (COMPCODE) select UPDATETIMESTR ,AAE036 ,AAE401 ,ABD01C ,ABD01N ,ABD03C ,ABD04 ,AKC190 ,ID ,ZA01C ,ZA03 ,ZA2 ,ZB01 ,ZB02 ,MAIN_ID ,ICD_BJ_ID,ICD_BJ_NAME,CATEGORY_CODE,COMPCODE FROM view_int_drgs_pinfo_e1;
---- parquet
create table PARQUET_DRGS_PINFO_E1(UPDATETIMESTR string,AAE036 string,AAE401 string,ABD01C string,ABD01N string,ABD03C string,ABD04 int,AKC190 string,ID string,ZA01C string,ZA03 string,ZA2 string,ZB01 string,ZB02 int,MAIN_ID string,ICD_BJ_ID string,ICD_BJ_NAME string,CATEGORY_CODE string) partitioned by (COMPCODE string) row format delimited fields terminated by ',' STORED AS PARQUET ;
---- 动态分区,存储视图中的数据保存为parquet
INSERT OVERWRITE TABLE PARQUET_DRGS_PINFO_E1 PARTITION (COMPCODE) select UPDATETIMESTR ,AAE036 ,AAE401 ,ABD01C ,ABD01N ,ABD03C ,ABD04 ,AKC190 ,ID ,ZA01C ,ZA03 ,ZA2 ,ZB01 ,ZB02 ,MAIN_ID ,ICD_BJ_ID,ICD_BJ_NAME,CATEGORY_CODE,COMPCODE FROM view_int_drgs_pinfo_e1;
-----parquet测试
create table PARQUET_DRGS_MED_REC_FEE(UPDATETIMESTR STRING,AMOUNT DECIMAL(10,2),CASE_CHARGE_KIND_ID STRING,ID STRING,MED_REC_ID STRING,ZB06 STRING,ZB02 int,AAE040 STRING) partitioned by (COMPCODE string) row format delimited fields terminated by ',' STORED AS PARQUET;
INSERT OVERWRITE TABLE PARQUET_DRGS_MED_REC_FEE PARTITION (COMPCODE) select UPDATETIMESTR ,AMOUNT ,CASE_CHARGE_KIND_ID ,ID ,MED_REC_ID ,ZB06 ,ZB02 ,AAE040 ,COMPCODE FROM INT_DRGS_MED_REC_FEE;
-----orcfile测试
create table ORCFILE_DRGS_MED_REC_FEE(UPDATETIMESTR STRING,AMOUNT DECIMAL(10,2),CASE_CHARGE_KIND_ID STRING,ID STRING,MED_REC_ID STRING,ZB06 STRING,ZB02 int,AAE040 STRING) partitioned by (COMPCODE string) row format delimited fields terminated by ',' STORED AS ORC;
INSERT OVERWRITE TABLE ORCFILE_DRGS_MED_REC_FEE PARTITION (COMPCODE) select UPDATETIMESTR ,AMOUNT ,CASE_CHARGE_KIND_ID ,ID ,MED_REC_ID ,ZB06 ,ZB02 ,AAE040 ,COMPCODE FROM INT_DRGS_MED_REC_FEE;