hive-to-clickhouse
hive->logstash->kafka->clickhouse
logstash.conf
input {
jdbc {
jdbc_driver_library => "jars/hive-jdbc-2.1.1.jar,jars/libthrift-0.9.3.jar,jars/httpclient-4.4.jar,jars/httpcore-4.4.jar,jars/hive-service-2.1.1.jar,jars/hive-common-2.1.1.jar,jars/hive-metastore-2.1.1.jar,jars/hadoop-common-2.8.3.jar,jars/commons-logging-1.2.jar,jars/log4j-slf4j-impl-2.4.1.jar,jars/hive-service-rpc-2.1.1.jar,jars/commons-lang-2.6.jar,jars/commons-lang3-3.1.jar,jars/protobuf-java-2.5.0.jar,jars/hive-serde-2.1.1.jar"
jdbc_driver_class => "org.apache.hive.jdbc.HiveDriver"
jdbc_connection_string => "jdbc:hive2://127.0.0.1:10000/default"
jdbc_user => ""
jdbc_password => ""
parameters => {"n" => 100}
use_column_value => true
tracking_column_type => "numeric"
tracking_column => "ts"
statement => "select ts,tag,cnt,val from test3 where ts > :sql_last_value"
schedule => "* * * * *"
}
}
filter {
mutate {
remove_field => ["@version","@timestamp"]
}
}
output {
kafka {
bootstrap_servers => "localhost:9092"
topic_id => "events"
batch_size => 5
codec => "json"
}
}
clickhouse-sql
CREATE TABLE event_stream (ts UInt64, tag String, cnt Int64, val Double) ENGINE = Kafka('192.168.182.1:9092', 'events', 'group1', 'JSONEachRow'); --消费kafka
CREATE MATERIALIZED VIEW events ENGINE = MergeTree(day, (day,ts, tag, cnt, val), 8192) AS SELECT toDate(toDateTime(ts)) AS day, ts, tag, cnt, val FROM event_stream; --数据持久化
CREATE TABLE events_all AS events ENGINE = Distributed(perftest_3shards_1replicas, default, events, rand());