内核开发之控制 Parts 生成数量.

 
目标
.Figure out part 生成逻辑
 
2.针对某个Insert,根据列中的哪些值生成parts.找出对应代码。SipHash值是如何计算的,计算的列中的哪些行。
 
以 a_local为例
PARTITION BY partition_key.
3.测试
4.Insert 语句是否是并行。答案是 否,见Pipeline部分.
5.查看生成Part的逻辑。 String 类型的Partition Key 生成part的逻辑.
6. min_insert_block_size_bytes这个参数可以控制block写入磁盘的大小。
7. $min_insert_block_size_rows (default 1048576). Part of number = Celling(total_rows/min_insert_block_size).

针对使用可以命中SquashingTransform的查询。 或者 insert_distributed_sync为false的查询.

  

基于版本
v21.10.6.2-stable
 
查看QueryPlan 
 
EXPLAIN PIPELINE
INSERT INTO test.a_local_test SELECT *
FROM test.a_local_
 
Query id: 5d9d0a3e-717f-47db-9891-e28bf54c0d17
 
┌─explain────────────────────────────────────────────────┐
│ digraph │
│ { │
│ rankdir="LR"; │
│ { node [shape = rect] │
│ n140102744993816[label="MergeTreeInOrder"]; │
│ n140102743747864[label="MergeTreeInOrder"]; │
│ n140102743749144[label="MergeTreeInOrder"]; │
│ n140102743750424[label="MergeTreeInOrder"]; │
│ n140106380912440[label="Concat"]; │
│ n140102743769304[label="ExpressionTransform"]; │
│ n140102743768664[label="ExpressionTransform"]; │
│ n140108220154264[label="CountingTransform"]; │
│ n140108220158104[label="SquashingTransform"]; │
│ n140108219190040[label="ConvertingTransform"]; │
│ n140108129453720[label="ReplicatedMergeTreeSink"]; │
│ n140107737463000[label="EmptySink"]; │
│ }
 

  

生成Part的逻辑

生成partition是和 partition_key 的值有关连。
 
# system.parts part 信息
SELECT
partition,
name
FROM system.parts
WHERE (table LIKE 'a_local_test') AND (database = 'test') AND (partition = 'usr_$alipay_avatar')
Query id: 097275f4-9332-42d5-922c-d83849df14eb
 
┌─path──────────────────────────────────────────────────────────────────────────────────────────────────────────┬─rows─┬─partition──────────┬─name───────────────────────────────────┐
│ /data/clickhouse/local/store/311/3119d1f0-f689-45b5-b119-d1f0f68925b5/3f5bc60eaef22f455b8d67065139cf85_0_0_0/ │ 6147 │ usr_$alipay_avatar │ 3f5bc60eaef22f455b8d67065139cf85_0_0_0 │
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴────────────────────┴────────────────────────────────────────┘
 
# 对应directory下信息
[root@clickhouse_ndoe2 0ea28b1a-bc20-48d9-8ea2-8b1abc20c8d9]# cat a99c06f78cd093607f3e220fe22a88a4_0_0_0/count.txt
6109[root@clickhouse_ndoe2 0ea28b1a-bc20-48d9-8ea2-8b1abc20c8dcd /data/clickhouse/local/store/311/3119d1f0-f689-45b5-b119-d1f0f68925b5/3f5bc60eaef22f455b8d67065139cf85_0_0_0/
[root@clickhouse_ndoe2 3f5bc60eaef22f455b8d67065139cf85_0_0_0]# ls
clickhouse_ndoeecksums.txt columns.txt count.txt data.bin data.mrk3 default_compression_codec.txt minmax_partition_key.idx partition.dat primary.idx
[root@clickhouse_ndoe2 3f5bc60eaef22f455b8d67065139cf85_0_0_0]# cat count.txt
6147
 
# 对应表中的记录
clickhouse_ndoe2 :) select count(0) from a_local_test where partition_key = 'usr_$alipay_avatar';
 
SELECT count(0)
FROM a_local_test
WHERE partition_key = 'usr_$alipay_avatar'
 
Query id: 6d700fcf-10ef-4e98-bc40-0b60d00c4bc0
 
┌─count()─┐
│ 6147 │
└─────────┘
 
1 rows in set. Elapsed: 0.003 sec.

  

max_parts_per_blockd

控制一个block可以创建parts的数量
ReplicatedMergeTreeSink.cpp
关键代码
auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context);
 
#将partition的expression表达式计算出值,并写入到这个block。可以通过设置max_parts_per_block来控制写入parts的数量.
一个Insert的block超过max_parts_per_block,就抛出异常。

  

min_insert_block_size_rows, min_insert_block_size_bytes

控制Sink的Block大小
max_block_size
max_insert_block_size
$min_insert_block_size_rows ,可以控制生成parts的最小行数。 特别针对insert select这种trivial sql.
min_insert_block_size_bytes 使用一个就可以
两个值会控制 SquashingBlockOutputStream的对block的大小。

  

One Part (Thread Read) Source

$min_insert_block_size_rows 控制part生成。
单part读入,生成两个parts.
 
│ $min_insert_block_size_rows │ 1048545 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ UInt64 │
 
clickhouse_ndoe2 :) insert into a_local_test select number, '123', '456', 'account1', '20220101', number from numbers(1048576);
 
INSERT INTO a_local_test SELECT
number,
'123',
'456',
'account1',
'20220101',
number
FROM numbers(1048576)
 
Query id: 5f4e4704-6355-4df3-8a19-a00614dfebf4
 
Ok.
 
0 rows in set. Elapsed: 0.280 sec. Processed 2.10 million rows, 16.78 MB (7.48 million rows/s., 59.83 MB/s.)
 
clickhouse_ndoe2 :) select * from system.parts where table = 'a_local_test';
 
 
# 生成
SELECT *
FROM system.parts
WHERE table = 'a_local_test'
 
Query id: 03e6a59b-7152-40d9-8e38-9a3ce7d3b604
 
┌─partition─┬─name───────────────────────────────────┬─uuid─────────────────────────────────┬─part_type─┬─active─┬─marks─┬────rows─┬─bytes_on_disk─┬─data_compressed_bytes─┬─data_uncompressed_bytes─┬─marks_bytes─┬─secondary_indices_compressed_bytes─┬─secondary_indices_uncompressed_bytes─┬─secondary_indices_marks_bytes─┬───modification_time─┬─────────remove_time─┬─refcount─┬───min_date─┬───max_date─┬────────────min_time─┬────────────max_time─┬─partition_id─────────────────────┬─min_block_number─┬─max_block_number─┬─level─┬─data_version─┬─primary_key_bytes_in_memory─┬─primary_key_bytes_in_memory_allocated─┬─is_frozen─┬─database──┬─table───────────┬─engine───────────────────────┬─disk_name───┬─path──────────────────────────────────────────────────────────────────────────────────────────────────────────┬─hash_of_all_files────────────────┬─hash_of_uncompressed_files───────┬─uncompressed_hash_of_compressed_files─┬─delete_ttl_info_min─┬─delete_ttl_info_max─┬─move_ttl_info.expression─┬─move_ttl_info.min─┬─move_ttl_info.max─┬─default_compression_codec─┬─recompression_ttl_info.expression─┬─recompression_ttl_info.min─┬─recompression_ttl_info.max─┬─group_by_ttl_info.expression─┬─group_by_ttl_info.min─┬─group_by_ttl_info.max─┬─rows_where_ttl_info.expression─┬─rows_where_ttl_info.min─┬─rows_where_ttl_info.max─┬─projections─┐
│ 123 │ 72bab4faa3b9c23f2b36e80fe09e76fc_0_0_0 │ 00000000-0000-0000-0000-000000000000 │ Wide │ 1 │ 129 │ 1048545 │ 8538051 │ 8517263 │ 42990345 │ 18576 │ 0 │ 0 │ 0 │ 2022-02-10 10:52:55 │ 1970-01-01 08:00:00 │ 1 │ 1970-01-01 │ 1970-01-01 │ 1970-01-01 08:00:00 │ 1970-01-01 08:00:00 │ 72bab4faa3b9c23f2b36e80fe09e76fc │ 0 │ 0 │ 0 │ 0 │ 3225 │ 10240 │ 0 │ test │ a_local_test │ ReplicatedReplacingMergeTree │ disk_name_0 │ /data/clickhouse/local/store/311/3119d1f0-f689-45b5-b119-d1f0f68925b5/72bab4faa3b9c23f2b36e80fe09e76fc_0_0_0/ │ 929491647a62bcc3788666a1c7a48b16 │ 7e2b7dfdf7b725c0bf7b509acb54dbbb │ cc4c83ae354900cbdf9dc2dcda455b6c │ 1970-01-01 08:00:00 │ 1970-01-01 08:00:00 │ [] │ [] │ [] │ LZ4 │ [] │ [] │ [] │ [] │ [] │ [] │ [] │ [] │ [] │ [] │
│ 123 │ 72bab4faa3b9c23f2b36e80fe09e76fc_1_1_0 │ 00000000-0000-0000-0000-000000000000 │ Compact │ 1 │ 2 │ 31 │ 735 │ 479 │ 1271 │ 208 │ 0 │ 0 │ 0 │ 2022-02-10 10:52:55 │ 1970-01-01 08:00:00 │ 1 │ 1970-01-01 │ 1970-01-01 │ 1970-01-01 08:00:00 │ 1970-01-01 08:00:00 │ 72bab4faa3b9c23f2b36e80fe09e76fc │ 1 │ 1 │ 0 │ 1 │ 50 │ 8256 │ 0 │ test │ a_local_test │ ReplicatedReplacingMergeTree │ disk_name_0 │ /data/clickhouse/local/store/311/3119d1f0-f689-45b5-b119-d1f0f68925b5/72bab4faa3b9c23f2b36e80fe09e76fc_1_1_0/ │ 987e5caeca119e27db86e2ed1b1c4d74 │ 36059fd8346a58235adc8285b8e77f1d │ 9b7dcef94fb22e3c690a980287c6b427 │ 1970-01-01 08:00:00 │ 1970-01-01 08:00:00 │ [] │ [] │ [] │ LZ4 │ [] │ [] │ [] │ [] │ [] │ [] │ [] │ [] │ [] │ [] │
└───────────┴────────────────────────────────────────┴──────────────────────────────────────┴───────────┴────────┴───────┴─────────┴───────────────┴───────────────────────┴─────────────────────────┴─────────────┴────────────────────────────────────┴──────────────────────────────────────┴───────────────────────────────┴─────────────────────┴─────────────────────┴──────────┴────────────┴────────────┴─────────────────────┴─────────────────────┴──────────────────────────────────┴──────────────────┴──────────────────┴───────┴──────────────┴─────────────────────────────┴───────────────────────────────────────┴───────────┴───────────┴─────────────────┴──────────────────────────────┴─────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────────────────────────┴──────────────────────────────────┴───────────────────────────────────────┴─────────────────────┴─────────────────────┴──────────────────────────┴───────────────────┴───────────────────┴───────────────────────────┴───────────────────────────────────┴────────────────────────────┴────────────────────────────┴──────────────────────────────┴───────────────────────┴───────────────────────┴────────────────────────────────┴─────────────────────────┴─────────────────────────┴─────────────┘

  

Multiple Parts(Parallel Thread Read) Source

local -> local

insert into a_local_test select _id, '123', partition_key, _id, update_time, _id_int from a_local_;
clickhouse_ndoe2 clickhouse Settings
│ $min_insert_block_size_rows │ 1048545 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ UInt64 │
│ min_insert_block_size_bytes │ 268427520 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ UInt64 │
 
 
 
select name, rows, data_uncompressed_bytes from system.parts where table = 'a_local_test' and name like '%_0' order by name;
 
Query id: 3c54128f-138b-43e8-88b3-5d763e76e7c0
 
┌─name───────────────────────────────────────┬────rows─┬─data_uncompressed_bytes─┐
│ 72bab4faa3b9c23f2b36e80fe09e76fc_102_102_0 │ 1272762 │ 82376291 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_103_103_0 │ 1285862 │ 84035542 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_104_104_0 │ 12356 │ 790784 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_105_105_0 │ 1048545 │ 67106880 │
..... ..... ..... ..... .....
│ 72bab4faa3b9c23f2b36e80fe09e76fc_190_190_0 │ 810746 │ 53383978 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_191_191_0 │ 1272762 │ 83274958 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_192_192_0 │ 795663 │ 53309410 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_193_193_0 │ 810799 │ 54162075 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_194_194_0 │ 795895 │ 53324963 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_195_195_0 │ 807493 │ 54083962 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_196_196_0 │ 804569 │ 53762970 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_197_197_0 │ 795634 │ 53307478 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_198_198_0 │ 795665 │ 53309555 │
│ 72bab4faa3b9c23f2b36e80fe09e76fc_199_199_0 │ 795673 │ 53303917 │
└────────────────────────────────────────────┴─────────┴─────────────────────────┘

  

local->distribute
test.a_local_ -> test.user_all
 
clickhouse_ndoe1 :) system stop merges a_local_test_;
clickhouse_ndoe1 :)
clickhouse_ndoe1 :) select name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'a_local_test_' and name like '%_0' order by name;
 
90 rows in set. Elapsed: 0.004 sec.
 
clickhouse_ndoe2 :) system stop merges a_local_test_;

  

distributed - > distributed

test.user_all -> test.user_all_
对应 (clickhouse_ndoe1's a_local_test_, clickhouse_ndoe2's a_local_test_) -> (clickhouse_ndoe1's a_local_test__, clickhouse_ndoe2's a_local_test__)

  

初始化环境
clickhouse_ndoe1 :) CREATE TABLE test.a_local_test__
(
`_id` UInt64,
`partition_key` String,
`_value` String,
`id1` String,
`update_time` DateTime64(3, 'Asia/Shanghai'),
`id1_int` UInt64
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/0/test.a_local_test__', '{replica}')
PARTITION BY partition_key
ORDER BY (_id, _id)
SETTINGS index_granularity = 8192, storage_policy = 'multiple_disk'
 
 
clickhouse_ndoe2 :) CREATE TABLE test.a_local_test__
(
`_id` UInt64,
`partition_key` String,
`_value` String,
`id1` String,
`update_time` DateTime64(3, 'Asia/Shanghai'),
`id1_int` UInt64
)
ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/1/test.a_local_test__', '{replica}')
PARTITION BY partition_key
ORDER BY (_id, _id)
SETTINGS index_granularity = 8192, storage_policy = 'multiple_disk'
;
 
CREATE TABLE test.user_all_
(
`_id` UInt64,
`partition_key` String,
`_value` String,
`id1` String,
`update_time` DateTime64(3, 'Asia/Shanghai'),
`id1_int` UInt64
)
ENGINE = Distributed('cluster__no_replica', 'test', 'a_local_test__', abs(javaHash(toString(_id))))

  

查询Pipeline
 
clickhouse_ndoe2 :) explain pipeline insert into user_all_ select _id, '123', partition_key, _id, update_time, _id_int from test.user_all;
 
EXPLAIN PIPELINE
INSERT INTO user_all_ SELECT
_id,
'123',
partition_key,
_id,
update_time,
_id_int
FROM test.user_all
 
Query id: 40a356c0-0d43-435f-b429-ea00e6c8fc46
 
┌─explain────────────────────────────────────────────┐
│ digraph │
│ { │
│ rankdir="LR"; │
│ { node [shape = rect] │
│ n140106606502168[label="MergeTreeInOrder"]; │
│ n140106606503448[label="MergeTreeInOrder"]; │
│ n140106547852568[label="MergeTreeInOrder"]; │
│ n140106377560088[label="MergeTreeInOrder"]; │
│ n140106377561368[label="MergeTreeInOrder"]; │
│ n140106547855128[label="MergeTreeInOrder"]; │
│ n140106377562648[label="MergeTreeInOrder"]; │
│ n140106377563928[label="MergeTreeInOrder"]; │
│ n140106377565208[label="MergeTreeInOrder"]; │
│ n140106377566488[label="MergeTreeInOrder"]; │
│ n140106377567768[label="MergeTreeInOrder"]; │
│ n140104577888600[label="Concat"]; │
│ n140107719944728[label="ExpressionTransform"]; │
│ n140106223818776[label="Remote"]; │
│ n140108793667416[label="Resize"]; │
│ n140106595924504[label="ExpressionTransform"]; │
│ n140108048613784[label="CountingTransform"]; │
│ n140107723734808[label="ConvertingTransform"]; │
│ n140108909441816[label="DistributedSink"]; │
│ n140107705624312[label="EmptySink"]; │
}

  

# Query clickhouse_ndoe1's parts
 
clickhouse_ndoe1 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'a_local_test_' order by name;
 
SELECT
part_type,
name,
rows,
data_uncompressed_bytes,
data_compressed_bytes
FROM system.parts
WHERE table = 'a_local_test_'
ORDER BY name ASC
 
Query id: 3eef9df4-6531-41e4-9b8a-76e0bbe8baf3
 
┌─part_type─┬─name───────────────────────────────────────┬─────rows─┬─data_uncompressed_bytes─┬─data_compressed_bytes─┐
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_107_112_1 │ 2552357 │ 158412446 │ 22742232 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_113_117_1 │ 878071 │ 58139867 │ 8515481 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_118_123_1 │ 1288218 │ 84246272 │ 10378907 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_124_128_1 │ 849752 │ 56772165 │ 7301062 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_129_129_0 │ 400773 │ 26848735 │ 3463873 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_130_130_0 │ 400893 │ 26856713 │ 3465251 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_41_52_2 │ 8910396 │ 525713364 │ 39564323 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_53_81_2 │ 36703531 │ 2165508329 │ 255460351 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_82_87_1 │ 7865385 │ 464057715 │ 50227734 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_88_93_1 │ 7863302 │ 463934818 │ 49029853 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_94_106_2 │ 4500938 │ 271721024 │ 32701173 │
└───────────┴────────────────────────────────────────────┴──────────┴─────────────────────────┴───────────────────────┘
 
 
# Query clickhouse_ndoe2's parts
 
clickhouse_ndoe2 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'a_local_test_' order by name;
 
SELECT
part_type,
name,
rows,
data_uncompressed_bytes,
data_compressed_bytes
FROM system.parts
WHERE table = 'a_local_test_'
ORDER BY name ASC
 
Query id: 10222393-e1ab-4a9e-92e5-ff39fe5ff803
 
┌─part_type─┬─name───────────────────────────────────────┬─────rows─┬─data_uncompressed_bytes─┬─data_compressed_bytes─┐
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_107_112_1 │ 2552357 │ 158412446 │ 22742232 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_113_117_1 │ 878071 │ 58139867 │ 8515481 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_118_123_1 │ 1288218 │ 84246272 │ 10378907 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_124_128_1 │ 849752 │ 56772165 │ 7301062 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_129_129_0 │ 400773 │ 26848735 │ 3463873 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_130_130_0 │ 400893 │ 26856713 │ 3465251 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_41_52_2 │ 8910396 │ 525713364 │ 39564323 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_53_81_2 │ 36703531 │ 2165508329 │ 255460351 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_82_87_1 │ 7865385 │ 464057715 │ 50227734 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_88_93_1 │ 7863302 │ 463934818 │ 49029853 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_94_106_2 │ 4500938 │ 271721024 │ 32701173 │
└───────────┴────────────────────────────────────────────┴──────────┴─────────────────────────┴───────────────────────┘
 
11 rows in set. Elapsed: 0.004 sec.
 
 
# 关闭local表 merge
clickhouse_ndoe2 :) system stop merges a_local_test__;
 
SYSTEM STOP MERGES a_local_test__
 
 
clickhouse_ndoe1 :) system stop merges a_local_test__;
 
SYSTEM STOP MERGES a_local_test__
 
Query id: 17a7883c-610a-4c3a-afc7-e6fe3e3f8336
 
Ok.
 
0 rows in set. Elapsed: 0.001 sec
 
 
clickhouse_ndoe1's parts: 66
 
clickhouse_ndoe1 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'a_local_test__' and name like '%_0' order by name, rows;
 
 
SELECT
part_type,
name,
rows,
data_uncompressed_bytes,
data_compressed_bytes
FROM system.parts
WHERE (table = 'a_local_test__') AND (name LIKE '%_0')
ORDER BY
name ASC,
rows ASC
 
Query id: 6876744e-c5a6-4c99-8b69-10b5c59daef2
 
┌─part_type─┬─name─────────────────────────────────────┬────rows─┬─data_uncompressed_bytes─┬─data_compressed_bytes─┐
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_0_0_0 │ 1048545 │ 51378705 │ 4408214 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_10_10_0 │ 1073333 │ 52593317 │ 7564032 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_11_11_0 │ 1065714 │ 52219986 │ 7503235 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_12_12_0 │ 1067010 │ 52283490 │ 7493430 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_13_13_0 │ 1071376 │ 52497424 │ 7512853 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_14_14_0 │ 1059698 │ 51925202 │ 7394139 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_15_15_0 │ 1092360 │ 53525640 │ 7589944 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_16_16_0 │ 1088743 │ 53348407 │ 7607874 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_17_17_0 │ 1070764 │ 52467436 │ 7510621 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_18_18_0 │ 1068793 │ 52370857 │ 7524363 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_19_19_0 │ 1066306 │ 52248994 │ 7498712 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_1_1_0 │ 1056737 │ 51780113 │ 4428789 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_20_20_0 │ 1061694 │ 52023006 │ 7429082 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_21_21_0 │ 1152445 │ 56469805 │ 7786333 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_22_22_0 │ 1062407 │ 52057943 │ 7366254 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_23_23_0 │ 1062679 │ 52071271 │ 7403463 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_24_24_0 │ 1870465 │ 91652785 │ 12663450 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_25_25_0 │ 1069234 │ 52392466 │ 7421521 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_26_26_0 │ 1065874 │ 52227826 │ 7255113 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_27_27_0 │ 1089914 │ 53405786 │ 8223480 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_28_28_0 │ 1564496 │ 76660304 │ 11547210 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_29_29_0 │ 1509625 │ 73971625 │ 11104334 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_2_2_0 │ 1064929 │ 52181521 │ 4463029 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_30_30_0 │ 1171386 │ 57397914 │ 9518771 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_31_31_0 │ 719909 │ 35275541 │ 4803305 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_32_32_0 │ 1056253 │ 51756397 │ 6155222 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_33_33_0 │ 1048576 │ 51380224 │ 4394521 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_34_34_0 │ 1048576 │ 51380224 │ 4394501 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_35_35_0 │ 1063433 │ 52108217 │ 5996677 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_36_36_0 │ 1055685 │ 51728565 │ 7294406 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_37_37_0 │ 1055659 │ 51727291 │ 7459123 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_38_38_0 │ 1055910 │ 51739590 │ 7491917 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_39_39_0 │ 1055715 │ 51730035 │ 7491357 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_3_3_0 │ 1065285 │ 52198965 │ 4711622 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_40_40_0 │ 1055268 │ 51708132 │ 7484420 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_41_41_0 │ 1054190 │ 51655310 │ 7477190 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_42_42_0 │ 1056200 │ 51753800 │ 7490571 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_43_43_0 │ 1048985 │ 51400265 │ 7426935 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_44_44_0 │ 1051684 │ 51532516 │ 7427323 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_45_45_0 │ 1054421 │ 51666629 │ 7443089 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_46_46_0 │ 1049910 │ 51445590 │ 7348537 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_47_47_0 │ 1051318 │ 51514582 │ 7359462 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_48_48_0 │ 1053818 │ 51637082 │ 7413120 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_49_49_0 │ 1049394 │ 51420306 │ 7416588 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_4_4_0 │ 1356826 │ 66484474 │ 9235569 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_50_50_0 │ 1052175 │ 51556575 │ 7455267 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_51_51_0 │ 1049427 │ 51421923 │ 7421136 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_52_52_0 │ 1054768 │ 51683632 │ 7414282 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_53_53_0 │ 1053142 │ 51603958 │ 7362986 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_54_54_0 │ 1052943 │ 51594207 │ 7341491 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_55_55_0 │ 1054437 │ 51667413 │ 7408297 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_56_56_0 │ 1062588 │ 52066812 │ 7212140 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_57_57_0 │ 1050119 │ 51455831 │ 7244515 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_58_58_0 │ 1050757 │ 51487093 │ 7253409 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_59_59_0 │ 1050279 │ 51463671 │ 7287240 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_5_5_0 │ 1072782 │ 52566318 │ 7456574 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_60_60_0 │ 1054973 │ 51693677 │ 8661525 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_61_61_0 │ 1064216 │ 52146584 │ 7507033 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_62_62_0 │ 1050796 │ 51489004 │ 7350421 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_63_63_0 │ 1048569 │ 51379881 │ 8703986 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_64_64_0 │ 641668 │ 31441732 │ 4865774 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_65_65_0 │ 543330 │ 26623170 │ 4032487 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_6_6_0 │ 1064648 │ 52167752 │ 7506150 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_7_7_0 │ 1056596 │ 51773204 │ 7450610 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_8_8_0 │ 1075573 │ 52703077 │ 7580484 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_9_9_0 │ 1079333 │ 52887317 │ 7608502 │
└───────────┴──────────────────────────────────────────┴─────────┴─────────────────────────┴───────────────────────┘
 
 
clickhouse_ndoe2's parts: 67.
 
clickhouse_ndoe2 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'a_local_test__' and name like '%_0' order by name, rows;
 
 
SELECT
part_type,
name,
rows,
data_uncompressed_bytes,
data_compressed_bytes
FROM system.parts
WHERE (table = 'a_local_test__') AND (name LIKE '%_0')
ORDER BY
name ASC,
rows ASC
 
Query id: e5367688-31c3-4fc3-bfd6-2e0b17400ea5
 
┌─part_type─┬─name─────────────────────────────────────┬────rows─┬─data_uncompressed_bytes─┬─data_compressed_bytes─┐
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_0_0_0 │ 1048545 │ 51378705 │ 4408322 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_10_10_0 │ 1056525 │ 51769725 │ 7448103 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_11_11_0 │ 1064144 │ 52143056 │ 7491296 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_12_12_0 │ 1062848 │ 52079552 │ 7463675 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_13_13_0 │ 1058482 │ 51865618 │ 7425197 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_14_14_0 │ 1070160 │ 52437840 │ 7458963 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_15_15_0 │ 1555536 │ 76221264 │ 10853233 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_16_16_0 │ 1056277 │ 51757573 │ 7381295 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_17_17_0 │ 1057019 │ 51793931 │ 7457516 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_18_18_0 │ 1061703 │ 52023447 │ 7475878 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_19_19_0 │ 1065710 │ 52219790 │ 7482082 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_1_1_0 │ 1056737 │ 51780113 │ 4428774 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_20_20_0 │ 1679952 │ 82317648 │ 11472094 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_21_21_0 │ 1067451 │ 52305099 │ 7400413 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_22_22_0 │ 1067179 │ 52291771 │ 7434351 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_23_23_0 │ 1330692 │ 65203908 │ 9039777 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_24_24_0 │ 1080993 │ 52968657 │ 7371276 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_25_25_0 │ 1062850 │ 52079650 │ 7292721 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_26_26_0 │ 1064791 │ 52174759 │ 7346997 │
│ Compact │ 72bab4faa3b9c23f2b36e80fe09e76fc_27_27_0 │ 8192 │ 401408 │ 34368 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_28_28_0 │ 1048545 │ 51378705 │ 7370922 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_29_29_0 │ 1072333 │ 52544317 │ 8031285 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_2_2_0 │ 1064929 │ 52181521 │ 4463115 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_30_30_0 │ 1489172 │ 72969428 │ 11606566 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_31_31_0 │ 1178732 │ 57757868 │ 9535958 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_32_32_0 │ 713813 │ 34976837 │ 4756513 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_33_33_0 │ 1056133 │ 51750517 │ 6154424 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_34_34_0 │ 1048576 │ 51380224 │ 4394528 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_35_35_0 │ 1048576 │ 51380224 │ 4394570 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_36_36_0 │ 1056536 │ 51770264 │ 5966228 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_37_37_0 │ 1048918 │ 51396982 │ 7206801 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_38_38_0 │ 1049730 │ 51436770 │ 7417516 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_39_39_0 │ 1049389 │ 51420061 │ 7445895 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_3_3_0 │ 1064573 │ 52164077 │ 4704393 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_40_40_0 │ 1049614 │ 51431086 │ 7445440 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_41_41_0 │ 1050091 │ 51454459 │ 7452778 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_42_42_0 │ 1050918 │ 51494982 │ 7457943 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_43_43_0 │ 1049340 │ 51417660 │ 7444296 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_44_44_0 │ 1056633 │ 51775017 │ 7479805 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_45_45_0 │ 1053272 │ 51610328 │ 7438722 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_46_46_0 │ 1051003 │ 51499147 │ 7421385 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_47_47_0 │ 1054835 │ 51686915 │ 7379184 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_48_48_0 │ 1052771 │ 51585779 │ 7394481 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_49_49_0 │ 1054516 │ 51671284 │ 7435018 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_4_4_0 │ 1344220 │ 65866780 │ 9125024 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_50_50_0 │ 1054226 │ 51657074 │ 7459335 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_51_51_0 │ 1052429 │ 51569021 │ 7458373 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_52_52_0 │ 1049021 │ 51402029 │ 7415228 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_53_53_0 │ 1059333 │ 51907317 │ 7435957 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_54_54_0 │ 1055202 │ 51704898 │ 7375629 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_55_55_0 │ 1052667 │ 51580683 │ 7337352 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_56_56_0 │ 1049526 │ 51426774 │ 7366823 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_57_57_0 │ 1060167 │ 51948183 │ 7157798 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_58_58_0 │ 1055812 │ 51734788 │ 7320566 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_59_59_0 │ 1055304 │ 51709896 │ 7258334 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_5_5_0 │ 1057076 │ 51796724 │ 7347057 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_60_60_0 │ 1064719 │ 52171231 │ 7423155 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_61_61_0 │ 1048576 │ 51380224 │ 7337024 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_62_62_0 │ 1055747 │ 51731603 │ 8254679 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_63_63_0 │ 1050727 │ 51485623 │ 8091288 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_64_64_0 │ 1055044 │ 51697156 │ 8692953 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_65_65_0 │ 647363 │ 31720787 │ 4496793 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_66_66_0 │ 429387 │ 21039963 │ 3537669 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_6_6_0 │ 1065210 │ 52195290 │ 7510975 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_7_7_0 │ 1073262 │ 52589838 │ 7565837 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_8_8_0 │ 1054285 │ 51659965 │ 7434474 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_9_9_0 │ 1050525 │ 51475725 │ 7407231 │
└───────────┴──────────────────────────────────────────┴─────────┴─────────────────────────┴───────────────────────┘
 
67 rows in set. Elapsed: 0.005 sec.

  

结论

公式符合
# 使用clickhouse_ndoe1's和clickhouse_ndoe2's的parts数量 符合公式
Part of number <= min(Celling(total_rows/$min_insert_block_size_rows), Celling(total_bytes/min_insert_block_size_bytes))
 
clickhouse_ndoe2 :) select count(0)/2/1048545 from user_all;
 
SELECT (count(0) / 2) / 1048545
FROM user_all
 
Query id: 0a7949db-0c60-4462-8557-af4a0ccbbcd5
 
┌─divide(divide(count(), 2), 1048545)─┐
│ 68.87030694915335 │
└─────────────────────────────────────┘
 
1 rows in set. Elapsed: 0.006 sec.

  

疑问

按照SquashingTransform的规则,这里的值是不对的的问题。(见评论部分)
应该是ReplacingMergeTree本身的原因导致的,出现连续小于$min_insert_block_size_rows的parts.
# 创建 一张 no replacing Engine的表
 
│ CREATE TABLE test.test_no_replace
(
`_id` UInt64,
`partition_key` String,
`_value` String,
`id1` String,
`update_time` DateTime64(3, 'Asia/Shanghai'),
`id1_int` UInt64
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/test.a_local_test_no_relace', '{replica}')
PARTITION BY partition_key
ORDER BY (_id, _id)
SETTINGS index_granularity = 8192, storage_policy = 'multiple_disk'
 

  

# Insert
insert into test_no_replace select _id, '123', partition_key, _id, update_time, _id_int from test.a_local_;
 
#禁止Part Merge.
clickhouse_ndoe2 :) system stop merges test_no_replace;
 
#查看各个part的大小
select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'test_no_replace' and name like '%_0' order by name;
 
 
clickhouse_ndoe2 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'test_no_replace' and name like '%_0' order by name;
 
SELECT
part_type,
name,
rows,
data_uncompressed_bytes,
data_compressed_bytes
FROM system.parts
WHERE (table = 'test_no_replace') AND (name LIKE '%_0')
ORDER BY name ASC
 
Query id: 7c06378e-ca84-470c-b470-28525221ec56
 
┌─part_type─┬─name───────────────────────────────────────┬────rows─┬─data_uncompressed_bytes─┬─data_compressed_bytes─┐
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_100_100_0 │ 1331273 │ 86667436 │ 9557337 │
│ Compact │ 72bab4faa3b9c23f2b36e80fe09e76fc_101_101_0 │ 12356 │ 790784 │ 79199 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_102_102_0 │ 1048545 │ 67106880 │ 8789597 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_103_103_0 │ 1599668 │ 107115999 │ 11964926 │
│ Compact │ 72bab4faa3b9c23f2b36e80fe09e76fc_104_104_0 │ 12495 │ 799390 │ 78505 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_105_105_0 │ 1048545 │ 61864155 │ 4441143 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_106_106_0 │ 1048545 │ 61864155 │ 4435208 │
... ...
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_172_172_0 │ 1579148 │ 105802916 │ 11805609 │
│ Compact │ 72bab4faa3b9c23f2b36e80fe09e76fc_173_173_0 │ 6291 │ 427777 │ 29137 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_174_174_0 │ 1048545 │ 106601528 │ 11922007 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_193_193_0 │ 1588158 │ 106263433 │ 11849256 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_194_194_0 │ 1579223 │ 105807941 │ 11805845 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_195_195_0 │ 1579254 │ 105810018 │ 11806035 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_196_196_0 │ 795816 │ 53313498 │ 6868682 │
│ Wide │ 72bab4faa3b9c23f2b36e80fe09e76fc_99_99_0 │ 1289661 │ 83407129 │ 9197285 │
└───────────┴────────────────────────────────────────────┴─────────┴─────────────────────────┴───────────────────────┘
从ReplicatedMergeTree Engine中没有连续出现小于$min_insert_block_size_rows

  

何时Replacing

使用ReplicatedMergeTree数据量与原表一致,说明Replacing发生在MergeTree写入磁盘时。
#Source table
clickhouse_ndoe2 :) select count(0) from a_local_;
 
SELECT count(0)
FROM a_local_
 
Query id: fc986d6a-413a-4bb7-9df2-99e31dd1bd09
 
┌───count()─┐
│ 111705551 │
└───────────┘
 
1 rows in set. Elapsed: 0.002 sec.
 
# NoReplacing
clickhouse_ndoe2 :) select count(0) from test_no_replace;
 
SELECT count(0)
FROM test_no_replace
 
Query id: 87e8cbc8-ae4e-45f8-a8aa-98884a648d15
 
┌───count()─┐
│ 111705551 │
└───────────┘
 
1 rows in set. Elapsed: 0.002 sec.
 
 
 
# Replacing Enigne
clickhouse_ndoe2 :) select count(0) from test.a_local_test;
 
SELECT count(0)
FROM test.a_local_test
 
Query id: 285f31dd-8b3b-47d1-a751-add3601ecf93
 
┌──count()─┐
│ 95100029 │
└──────────┘
 
1 rows in set. Elapsed: 0.002 sec.

  

减少Parts措施结论

  1. 设置参数 利用SquashingTransform 来Sqush Block
设置 insert_distributed_sync = false && settings.min_insert_block_size_rows, 会符合下面的公式,但是有弊端,会造成在Client端以及Server端 Double Buffering. (目前能分析到Server端可能会造成Double bufferization (由于COW机制),但是Client端还未得出结论).
公式: $min_insert_block_size_rows (default 1048576).
Part of number <= min(Celling(total_rows/$min_insert_block_size_rows), Celling(total_bytes/min_insert_block_size_bytes))
 
  1. 按 Partition Key, Shard key 进行排序pipeline
利用 Order by operator 来完成 多个Stream -> 1个Stream的转换.
 
─explain────────────────────────────────────────────────┐
│ digraph │
│ { │
│ rankdir="LR"; │
│ { node [shape = rect] │
│ n140107768265112[label="MergeTreeThread"]; │
│ n140107768273176[label="MergeTreeThread"]; │
│ n140107417657752[label="MergeTreeThread"]; │
│ n140107417660440[label="MergeTreeThread"]; │
│ n140106582188056[label="ExpressionTransform"]; │
│ n140106582193176[label="ExpressionTransform"]; │
│ n140106582193816[label="ExpressionTransform"]; │
│ n140106582194456[label="ExpressionTransform"]; │
│ n140107768862232[label="PartialSortingTransform"]; │
│ n140107719907864[label="PartialSortingTransform"]; │
│ n140107720055320[label="PartialSortingTransform"]; │
│ n140107720093720[label="PartialSortingTransform"]; │
│ n140107418149848[label="LimitsclickhouseeckingTransform"]; │
│ n140107418150296[label="LimitsclickhouseeckingTransform"]; │
│ n140107418152536[label="LimitsclickhouseeckingTransform"]; │
│ n140106582123800[label="LimitsclickhouseeckingTransform"]; │
│ n140107720611480[label="MergeSortingTransform"]; │
│ n140106585597720[label="MergeSortingTransform"]; │
│ n140106415137816[label="MergeSortingTransform"]; │
│ n140106415138456[label="MergeSortingTransform"]; │
│ n140106415139096[label="MergingSortedTransform"]; │
│ n140107720146968[label="Remote"]; │
│ n140106415143576[label="MergingSortedTransform"]; │
│ n140106584297624[label="ExpressionTransform"]; │
│ n140107719990040[label="ExpressionTransform"]; │
│ n140108130358936[label="CountingTransform"]; │
│ n140107749038616[label="ConvertingTransform"]; │
│ n140107417662232[label="DistributedSink"]; │
│ n140107723845784[label="EmptySink"];
 
 
 
 
# 验证
1. 建一张表设置min_insert_block_size_rows = 1,使用Trivial Insert 写入2000行. 会产生2000个block,每个block一行.
 
1.准备环境
 
clickhouse1 :) set min_insert_block_size_rows=1;
clickhouse2 :) set min_insert_block_size_rows=1;
 
 
建一张Distributed (user_all) +Local(关闭Local表的 merge) user_all_test.
clickhouse1 :)
│ CREATE TABLE test_database.user_all
(
`_id` UInt64,
`_key` String,
`_value` String,
`_id` String,
`update_time` DateTime64(3, 'Asia/Shanghai'),
`_id_int` UInt64
)
ENGINE = Distributed('cluster__no_replica', 'test_database', 'user_local_test', abs(javaHash(toString(_id))))
 
clickhouse2 :)
CREATE TABLE test_database.user_all
(
`_id` UInt64,
`_key` String,
`_value` String,
`_id` String,
`update_time` DateTime64(3, 'Asia/Shanghai'),
`_id_int` UInt64
)
ENGINE = Distributed('cluster__no_replica', 'test_database', 'user_local_test', abs(javaHash(toString(_id))))
 
 
clickhouse1 :) system stop merges user_local_test;
clickhouse2 :) system stop merges user_local_test;
 
3.产生Source Data, 会产生1000个block;
 
insert into user_all select number, '123', '456', 'account1', '20220101', number from numbers(2000);
 
# 查看user_local_test parts
 
 
clickhouse2 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'user_local_test' order by name, rows;
1000 rows in set. Elapsed: 0.009 sec. Processed 1.00 thousand rows, 115.15 KB (112.31 thousand rows/s., 12.93 MB/s.)
 
 
clickhouse1 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'user_local_test' order by name, rows;
 
1000 rows in set. Elapsed: 0.009 sec. Processed 1.00 thousand rows, 115.14 KB (106.19 thousand rows/s., 12.23 MB/s.)
 
 
5.清理掉数据
 
clickhouse1 :) truncate table local_test__;
 
TRUNCATE TABLE local_test__
 
Query id: 05b0bbde-d0f5-44b1-8d89-eb7028afa178
 
Ok.
 
0 rows in set. Elapsed: 0.011 sec.
 
clickhouse2 :) truncate table local_test__;
 
TRUNCATE TABLE local_test__
 
Query id: 8bb1270b-5f57-4399-a7a2-c1d0bb990b50
 
Ok.
 
0 rows in set. Elapsed: 0.011 sec.
 
 
6.使用order by _key,_id 的方式,写入distributed table (user_all_),会产生1个block.将user_local_test表的parts增大到各1000个,我们预测local_test__表part数量各为1.
 
 
# 写入user_all
insert into user_all_ select * from user_all order by _key, _id;
 
# 查看 local_test__ parts 数量
 
clickhouse2 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'local_test__' order by name, rows;
 
SELECT
part_type,
name,
rows,
data_uncompressed_bytes,
data_compressed_bytes
FROM system.parts
WHERE table = 'local_test__'
ORDER BY
name ASC,
rows ASC
 
Query id: ddbe926f-f570-4703-b97c-a177aa887ebd
 
┌─part_type─┬─name─────────────────────────────────────┬─rows─┬─data_uncompressed_bytes─┬─data_compressed_bytes─┐
│ Compact │ 72bab4faa3b9c23f2b36e80fe09e76fc_23_23_0 │ 1000 │ 41000 │ 8323 │
└───────────┴──────────────────────────────────────────┴──────┴─────────────────────────┴───────────────────────┘
 
1 rows in set. Elapsed: 0.004 sec.
 
clickhouse1 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'local_test__' order by name, rows;
 
SELECT
part_type,
name,
rows,
data_uncompressed_bytes,
data_compressed_bytes
FROM system.parts
WHERE table = 'local_test__'
ORDER BY
name ASC,
rows ASC
 
Query id: 3d0ff397-77f1-4810-b79b-0f4ea1bebe81
 
┌─part_type─┬─name─────────────────────────────────────┬─rows─┬─data_uncompressed_bytes─┬─data_compressed_bytes─┐
│ Compact │ 72bab4faa3b9c23f2b36e80fe09e76fc_22_22_0 │ 1000 │ 41000 │ 8325 │
└───────────┴──────────────────────────────────────────┴──────┴─────────────────────────┴───────────────────────┘
 
1 rows in set. Elapsed: 0.004 sec.
 
# 反例
# 如果不使用 order by shard_id ,partition_key,在目标表还是1000parts.
 
insert into user_all_ select * from user_all;
 
 
clickhouse2 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'local_test__' order by name, rows;
 
1000 rows in set. Elapsed: 0.010 sec. Processed 1.00 thousand rows, 116.90 KB (97.98 thousand rows/s., 11.45 MB/s.)
 
 
 
clickhouse1 :) select part_type, name, rows, data_uncompressed_bytes, data_compressed_bytes from system.parts where table = 'local_test__' order by name, rows;
1000 rows in set. Elapsed: 0.011 sec. Processed 1.00 thousand rows, 116.90 KB (92.57 thousand rows/s., 10.82 MB/s.)

  

 
posted @ 2022-02-10 22:34  博客记  阅读(240)  评论(0编辑  收藏  举报