clickhouse从MySQL导入数据
本节介绍如何从阿里云RDS MySQL中导入数据到ClickHouse。
操作步骤
- 依据MySQL表结构在ClickHouse中进行建表操作。
MySQL中数据类型与ClickHouse类型映射关系如下表。
MySQL ClickHouse UNSIGNED TINYINT UInt8 TINYINT Int8 UNSIGNED SMALLINT UInt16 SMALLINT Int16 UNSIGNED INT, UNSIGNED MEDIUMINT UInt32 INT, MEDIUMINT Int32 UNSIGNED BIGINT UInt64 BIGINT Int64 FLOAT Float32 DOUBLE Float64 DATE Date DATETIME, TIMESTAMP DateTime BINARY FixedString 说明 其他MySQL类型都转化为ClickHouse的String类型。MySQL建表语句中未指定NOT NULL的列,值可以为NULL,在ClickHouse的建表语句中,对应的列需要使用Nullable进行标识。MySQL建表语句示例如下。CREATE TABLE testdb.mysql_test_table ( id int NOT NULL, quarter tinyint unsigned DEFAULT NULL, month tinyint DEFAULT NULL, day_of_month smallint unsigned DEFAULT NULL, day_of_week smallint DEFAULT NULL, airline_id int DEFAULT NULL, carrier float DEFAULT NULL, origin double DEFAULT NULL, unique_carrier varchar(80) NOT NULL, flight_date date NOT NULL, tail_date datetime DEFAULT NULL, origin_airport_time timestamp, comment varchar(100) ) ENGINE=InnoDB
则对应的ClickHouse建表语句示例如下。--建立本地表 create table default.clickhouse_test_table ON CLUSTER default ( id Int32, quarter Nullable(UInt32), month Nullable(Int8), day_of_month Nullable(UInt16), day_of_week Nullable(Int16), airline_id Nullable(Int32), carrier Nullable(Float32), origin Nullable(Float64), unique_carrier String, flight_date Date, tail_date Nullable(Datetime), origin_airport_time Nullable(Datetime), comment Nullable(String) ) ENGINE = ReplicatedMergeTree( '/clickhouse/tables/clickhouse_test_table/{shard}', '{replica}', flight_date, (id, unique_carrier), 8192); --建立分布式表 CREATE TABLE clickhouse_test_table_distributed ON CLUSTER default AS clickhouse_test_table ENGINE = Distributed(default, default, clickhouse_test_table, rand());
- 导入数据。
insert into <table_name> select * from mysql('<host>:<port>', '<db_name>','<table_name>', '<username>', '<password>')
上述示例导入语句如下。
insert into clickhouse_test_table_distributed select * from mysql('<host>:<port>', 'test_db', 'mysql_test_table', '<username>', '<password>');
- 查询数据。
select count(*) from <table_name>