clickhouse

hive-to-clickhouse

hive->logstash->kafka->clickhouse
logstash.conf

input {
    jdbc {
        jdbc_driver_library => "jars/hive-jdbc-2.1.1.jar,jars/libthrift-0.9.3.jar,jars/httpclient-4.4.jar,jars/httpcore-4.4.jar,jars/hive-service-2.1.1.jar,jars/hive-common-2.1.1.jar,jars/hive-metastore-2.1.1.jar,jars/hadoop-common-2.8.3.jar,jars/commons-logging-1.2.jar,jars/log4j-slf4j-impl-2.4.1.jar,jars/hive-service-rpc-2.1.1.jar,jars/commons-lang-2.6.jar,jars/commons-lang3-3.1.jar,jars/protobuf-java-2.5.0.jar,jars/hive-serde-2.1.1.jar"
        jdbc_driver_class => "org.apache.hive.jdbc.HiveDriver"
        jdbc_connection_string => "jdbc:hive2://127.0.0.1:10000/default"
        jdbc_user => ""
        jdbc_password => ""
        parameters => {"n" => 100}
		use_column_value => true
		tracking_column_type => "numeric"
		tracking_column => "ts"
        statement => "select ts,tag,cnt,val from test3 where ts > :sql_last_value"
        schedule => "* * * * *"
    }
}

filter {
    mutate {
        remove_field => ["@version","@timestamp"]
    }
}

output {
	kafka {
		bootstrap_servers => "localhost:9092" 
		topic_id => "events"
		batch_size => 5
		codec => "json"
	}
}

clickhouse-sql
CREATE TABLE event_stream (ts UInt64, tag String, cnt Int64, val Double) ENGINE = Kafka('192.168.182.1:9092', 'events', 'group1', 'JSONEachRow'); --消费kafka
CREATE MATERIALIZED VIEW events ENGINE = MergeTree(day, (day,ts, tag, cnt, val), 8192) AS SELECT toDate(toDateTime(ts)) AS day, ts, tag, cnt, val FROM event_stream; --数据持久化
CREATE TABLE events_all AS events ENGINE = Distributed(perftest_3shards_1replicas, default, events, rand());
posted @ 2020-10-28 15:56  fleam  阅读(308)  评论(0编辑  收藏  举报