Phoenix Tips (1)

1. 没有插入值的字段(即为Null的字段),在HBase中不会存储实际数据。


创建表: create table t1 (id varchar not null primary key, cf1.a varchar, cf1.b varchar, cf2.c varchar,cf2.d varchar, cf2.e varchar );

插入数据到c、d字段,a、b、e字段为Null :upsert into t1(id,c,d) values('0000000001','ccc','ddd');

查看数据:

select * from t1;
+------------+------------+------------+------------+------------+------------+
|     ID                |     A       |     B       |       C      |     D        |   E            |  
+------------+------------+------------+------------+------------+------------+
| 0000000001 | null       | null       | ccc        | ddd        |     null      |     
+------------+------------+------------+------------+------------+------------+


hbase(main):028:0> scan 'T1'
ROW                              COLUMN+CELL                                                                                   
 0000000001                       column=CF1:_0, timestamp=1415238625585, value=                                                
 0000000001                      column=CF2:C, timestamp=1415238625585, value=ccc                                              
 0000000001                      column=CF2:D, timestamp=1415238625585, value=ddd 


 

插入数据到a、d字段,b,c ,e字段为Null : upsert into t1 values('0000000002','aaa',null,null,'ddd',null);

select * from t1;
+------------+------------+------------+------------+------------+------------+
|     ID                |     A        |     B       |     C       |     D         |     E        |
+------------+------------+------------+------------+------------+------------+
| 0000000001 | null        | null       | ccc        | ddd        |     null    |
| 0000000002 | aaa        | null       | null       | ddd         |     null     |
+------------+------------+------------+------------+------------+------------+

hbase(main):029:0> scan 'T1'
ROW                              COLUMN+CELL                                                                                   
 0000000001                      column=CF1:_0, timestamp=1415238625585, value=                                                
 0000000001                      column=CF2:C, timestamp=1415238625585, value=ccc                                              
 0000000001                      column=CF2:D, timestamp=1415238625585, value=ddd      

                                        
 0000000002                      column=CF1:A, timestamp=1415238882597, value=aaa                                              
 0000000002                      column=CF1:_0, timestamp=1415238882597, value=                                                
 0000000002                      column=CF2:D, timestamp=1415238882597, value=ddd  


2、索引结构

创建索引:create index index_t1_a_b_in_c_d on t1(a,c) include (b,d)

实际创建一个表:

hbase(main):031:0> describe 'INDEX_T1_A_C_IN_B_D'
                                                                                    
 'INDEX_T1_A_C_IN_B_D', {TABLE_ATTRIBUTES => {MAX_FILESIZE => '7516192768', coprocessor$1 => '|org.apache.phoenix.coprocessor.ScanRegionObserver|1|', coprocessor$2 => '|org.apache.phoenix.coprocessor.UngroupedAggregateRegionObserver|1|', coprocessor$3 =>oprocessor$4=>'|org.apache.phoenix.coprocessor.ServerCachingEndpointImpl|1|',  coprocessor$5 => '|org.apache.hadoop.hbase.regionserver.LocalIndexSplitter|1|'}, { NAME =>'CF1', DATA_BLOCK_ENCODING => 'FAST_DIFF', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0', VERSIONS => '1', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => 'FOREVER', KEEP_DELETED_CELLS => 'true', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}, {NAME => 'CF2', DATA_BLOCK_ENCODING => 'FAST_DIFF', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0', VERSIONS => '1', COMPRESSION => 'NONE', MIN_VERSIONS => '0',TTL => 'FOREVER', KEEP_DELETED_CELLS => 'true', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'} 


插入数据:

upsert into t1 values('0000000001','10','11','12','13',"14");

upsert into t1 values('0000000002','20','21','22','23',"24");

upsert into t1 values('0000000003','30','31','32','33',"24");


select * from t1;
+------------+------------+------------+------------+----------+------------+
|     ID                  |     A      |     B      |     C      |     D        |       E        |
+------------+------------+------------+------------+----------+------------+
| 0000000001 | 10         | 11         | 12         | 13         |      14        |
| 0000000002 | 20         | 21         | 22         | 23         |       24       |
| 0000000003 | 30         | 31         | 32         | 33         |       34       | 
+------------+------------+------------+------------+----------+------------+


索引INDEX_T1_A_C_IN_B_D

select * from INDEX_T1_A_C_IN_B_D;
+------------+------------+------------+------------+------------+
|   CF1:A    |   CF2:C    |    :ID               |   CF1:B    |   CF2:D    |
+------------+------------+------------+------------+------------+
| 10            | 12            | 0000000001 | 11             | 13         |
| 20            | 22            | 0000000002 | 21             | 23         |
| 30            | 32            | 0000000003 | 31             | 33         |
+------------+------------+------------+------------+------------+


scan 'INDEX_T1_A_C_IN_B_D'
ROW                                                   COLUMN+CELL                                                                                   
 10\x0012\x000000000001          column=CF1:CF1:B, timestamp=1415240129108, value=11                                           
 10
\x0012\x000000000001          column=CF1:_0, timestamp=1415240129108, value=                                                
 10\x0012\x000000000001          column=CF2:CF2:D, timestamp=1415240129108, value=13  

                                         
 20\x0022\x000000000002          column=CF1:CF1:B, timestamp=1415240135689, value=21                                           
 20
\x0022\x000000000002          column=CF1:_0, timestamp=1415240135689, value=                                                
 20\x0022\x000000000002          column=CF2:CF2:D, timestamp=1415240135689, value=23  

                                         
 30\x0032\x000000000003          column=CF1:CF1:B, timestamp=1415240148794, value=31                                           
 30
\x0032\x000000000003          column=CF1:_0, timestamp=1415240148794, value=                                                
 30\x0032\x000000000003          column=CF2:CF2:D, timestamp=1415240148794, value=33 


3、如果索引中有null

 

 

upsert into t1 values('0000000004',null,'31',null,'33', '34');


T1数据:
+------------+------------+------------+------------+----------+---------+
|     ID                  |     A      |     B      |     C      |     D        |   E         |
+------------+------------+------------+------------+---------+----------+
| 0000000004 | null       | 31         | null       | 33         |   34       |

+------------+------------+------------+------------+----------+---------+

索引INDEX_T1_A_C_IN_B_D数据:

+------------+------------+--------------------+-------------+------------+
|   CF1:A    |   CF2:C    |    :ID                  |   CF1:B    |   CF2:D    |
+------------+------------+--------------------+------------+------------+
| null          | null           | 0000000004   | 31             | 33           |

+------------+------------+--------------------+-------------+------------+


ROW                                              COLUMN+CELL                                                                                   
 \x00\x000000000004              column=CF1:CF1:B, timestamp=1415240916266, value=31                                           
 \x00\x000000000004              column=CF1:_0, timestamp=1415240916266, value=                                                
 \x00\x000000000004              column=CF2:CF2:D, timestamp=1415240916266, value=33 


4. 索引使用

其定义为:

create index index_t1_a_b_in_c_d on t1(a,c) include (b,d)


(1)、索引中没有包括字段 e, 故只有select字段中有e,就不会用到索引:

explain select  * from t1  where a='10' ;     

CLIENT PARALLEL 1-WAY FULL SCAN OVER T1

        SERVER FILTER BY CF1.A = '10'  

 

explain select  a, e from t1 where a='10'; 
CLIENT PARALLEL 1-WAY FULL SCAN OVER T1

        SERVER FILTER BY CF1.A = '10'  


(2)、不使用e,则可以

explain select a,b,c,d from t1 where a='10';

 CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['10'] 


explain select d,c,b,a from t1 where a='10' and c='12';

|CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['10','12']  


(3)、索引 a、b 的顺序可以调整

explain select d,c,b,a from t1 where c='10' and a='12';

 

CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['12','10'] 

 

 

explain select d,c,b,a from t1 where c>'10' and a='12'; 

CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_B_IN_C_D ['12','10'] - ['12',*]


explain select d,c,b,a from t1 where c='10' and a>'12'; 

CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['12'] - [*]

         SERVER FILTER BY C = '10'


explain select d,c,b,a from t1 where c>'10' and a>'12'; 

 CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['12'] - [*] 

         SERVER FILTER BY C > '10'


(4)、混合使用索引字段a 和include 字段b,可以用到索引

explain select a,b,c,d from t1 where a='10' and b='11';

CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['10']    

           SERVER FILTER BY CF1.B = '11' 


(5)、混合使用索引字段a 和非include 字段e,则用不到索引

explain select d,c,b,a from t1 where a='10' and e='12';

CLIENT PARALLEL 1-WAY FULL SCAN OVER T1  

          SERVER FILTER BY (CF1.A = '10' AND CF2.E = '12')  

 

(6)、Group By  可以使用索引  

explain select b, max(c), min(d) from t1 where a>'10' group by b 

CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['10'] - [*]  
     SERVER AGGREGATE INTO DISTINCT ROWS BY [CF1.B]  
  CLIENT MERGE SORT


explain select a, max(c), min(d) from t1 where b>'10' group by a

CLIENT PARALLEL 32-WAY FULL SCAN OVER INDEX_T1_A_C_IN_B_D 

      SERVER FILTER BY CF1.B > '10' 
     SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [A] 
CLIENT MERGE SORT


explain select a, max(b), min(d) from t1 where c>'10' group by a;

 

 CLIENT PARALLEL 32-WAY FULL SCAN OVER INDEX_T1_A_C_IN_B_D 
     SERVER FILTER BY C > '10' 
     SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [A]  
  CLIENT MERGE SORT 

 
explain select a, count(b), max(c), min(d) from t1 where a>'1' and b>'11' group by a 
CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['1'] - [*] 
      SERVER FILTER BY CF1.B > '11' 
      SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [A]  
  CLIENT MERGE SORT

  

 

explain select a, count(b), max(c), min(d) from t1 where a>'1' and c>'11' group by a

CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['1'] - [*]  
      SERVER FILTER BY C > '11'  
      SERVER AGGREGATE INTO ORDERED DISTINCT ROWS BY [A]  

  CLIENT MERGE SORT  


explain select count(a), count(b), c, min(d) from t1 where a>'1' and c>'11' group by c ;

CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_C_IN_B_D ['1'] - [*]  
     SERVER FILTER BY C > '11'  
      SERVER AGGREGATE INTO DISTINCT ROWS BY [C]  
  CLIENT MERGE SORT 

 

explain select count(a), count(b), c, min(d) from t1 where a='10' and c>'11' group by c 
CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_B_IN_C_D ['10','11'] - ['10',*]  
     SERVER AGGREGATE INTO DISTINCT ROWS BY [C]  
 CLIENT MERGE SORT

 

explain select count(a), b, max(c), min(d) from t1 where a='10' and c>'11' group by b ;

CLIENT PARALLEL 32-WAY RANGE SCAN OVER INDEX_T1_A_B_IN_C_D ['10','11'] - ['10',*] 
      SERVER AGGREGATE INTO DISTINCT ROWS BY [CF1.B]  
 CLIENT MERGE SORT


 

posted @   lihui1625  阅读(179)  评论(0编辑  收藏  举报
努力加载评论中...
点击右上角即可分享
微信分享提示