八 Clickhouse 应用案例
1 用户和权限
在user.xml中添加用户配置
<?xml version="1.0"?> <yandex> <profiles> • <default> • <max_memory_usage>10000000000</max_memory_usage> • <use_uncompressed_cache>0</use_uncompressed_cache> • <load_balancing>random</load_balancing> • </default> • <readonly> • <readonly>1</readonly> • </readonly> </profiles> <users> • <default> • <password></password> • <networks incl="networks" replace="replace"> • <ip>::/0</ip> • </networks> • <profile>default</profile> • <quota>default</quota> • </default> <!-- 定义一个用户 用户名为hangge 密码使用明文 root --> <hangge> <password_sha256_hex>f493c8a7a3c37088731336766459cc37e4b094e95b918038726660cc42013fcd</password_sha256_hex> • <networks incl="networks" replace="replace"> • <ip>::/0</ip> • </networks> • <profile>default</profile> • <quota>default</quota> • </hangge> </users> <quotas> • <default> • <interval> • <duration>3600</duration> • <queries>0</queries> • <errors>0</errors> • <result_rows>0</result_rows> • <read_rows>0</read_rows> • <execution_time>0</execution_time> • </interval> • </default> </quotas> </yandex>
明文密码
<password></password> 里面没有配置说明是没有密码
SHA256加密:
在使用SHA256加密算法的时候,需要通过password_sha256_hex标签定义密码
[root@ck1 ~]# echo -n hangge | openssl dgst -sha256 (stdin)= f493c8a7a3c37088731336766459cc37e4b094e95b918038726660cc42013fcd
<hangge> <password_sha256_hex>f493c8a7a3c37088731336766459cc37e4b094e95b918038726660cc42013fcd</password_sha256_hex> <networks incl="networks" replace="replace"> <ip>::/0</ip> </networks> <profile>default</profile> <quota>default</quota> </hangge>
double_sha1加密:
在使用double_sha1加密算法的时候,则需要通过 password_double_sha1_hex标签定义密码,
<password_double_sha1_hex>23ae809ddacaf96af0fd78ed04b6a265e05aa257</password_double_sha1_hex> \# echo -n 123 | openssl dgst -sha1 -binary | openssl dgst -sha1 (stdin)= 23ae809ddacaf96af0fd78ed04b6a265e05aa257
用户权限控制'
<hangge> <password_sha256_hex>60cd41aedc4e47e8883682b416109e7b7e345e15decc63c2c98ecdab5e8e053a</password_sha256_hex> <networks incl="networks" /> <profile>readonly</profile> <quota>default</quota> <allow_databases> <database>default</database> </allow_databases> </hangge> ## 3 JDBC和客户端工具 ### 3.1 JDBC •```xml <!-- https://mvnrepository.com/artifact/ru.yandex.clickhouse/clickhouse-jdbc --> <dependency> <groupId>ru.yandex.clickhouse</groupId> <artifactId>clickhouse-jdbc</artifactId> <version>0.2.4</version> </dependency> public class Demo1 { public static void main(String[] args) throws Exception { Class.forName("ru.yandex.clickhouse.ClickHouseDriver"); String url = "jdbc:clickhouse://linux01:8123/default"; String username = "default"; String password = ""; Connection con = DriverManager.getConnection(url, username, password); Statement stmt = con.createStatement(); ResultSet resultSet = stmt.executeQuery("select * from tb_demo1"); while (resultSet.next()) { int id = resultSet.getInt("id"); String name = resultSet.getString("name"); System.out.println(id + ":" + name); } con.close(); stmt.close(); resultSet.close(); } }
高可用模式允许设置多个host地址,每次会从可用的地址中随机选择一个进行连接,在高可用模式下,需要通过BalancedClickhouseDataSource对象获取连接
public static void main(String[] args) throws Exception { // 初始化驱动 Class.forName("ru.yandex.clickhouse.ClickHouseDriver"); // url String url = "jdbc:clickhouse://linux01:8123,linux02:8123,linux03:8123/default"; //设置JDBC参数 ClickHouseProperties clickHouseProperties = new ClickHouseProperties(); clickHouseProperties.setUser("default"); //声明数据源 BalancedClickhouseDataSource balanced = new BalancedClickhouseDataSource(url, clickHouseProperties); //对每个host进行ping操作, 排除不可用的dead连接 balanced.actualize(); //获得JDBC连接 Connection con = balanced.getConnection(); Statement stmt = con.createStatement(); ResultSet resultSet = stmt.executeQuery("select * from demo3_all"); while (resultSet.next()) { int id = resultSet.getInt("id"); String name = resultSet.getString("name"); System.out.println(id + ":" + name); } con.close(); stmt.close(); resultSet.close(); }
3.2 客户端工具DBeaver
4.1 windowFunnel函数
(参数一) 时间的单位 窗口的大小 时间的单位 (时间 , 事件链条)
uid1 event1 1551398404 uid1 event2 1551398406 uid1 event3 1551398408 uid2 event2 1551398412 uid2 event3 1551398415 uid3 event3 1551398410 uid3 event4 1551398413 ———————————————— -- 建表 drop table if exists test_funnel ; CREATE TABLE test_funnel( uid String, eventid String, eventTime UInt64) ENGINE = MergeTree ORDER BY (uid, eventTime) ; -- 导入数据 insert into test_funnel values ('uid1','event1',1551398404), ('uid1','event2',1551398406), ('uid1','event3',1551398408), ('uid2','event2',1551398412), ('uid2','event3',1551398415), ('uid3','event3',1551398410), ('uid3','event4',1551398413); -- 查看数据 ┌─uid──┬─eventid─┬──eventTime─┐ │ uid1 │ event1 │ 1551398404 │ │ uid1 │ event2 │ 1551398406 │ │ uid1 │ event3 │ 1551398408 │ │ uid2 │ event2 │ 1551398412 │ │ uid2 │ event3 │ 1551398415 │ │ uid3 │ event3 │ 1551398410 │ │ uid3 │ event4 │ 1551398413 │ └──────┴─────────┴────────────┘ select uid , windowFunnel(4)( toDateTime(eventTime), eventid='event1' , eventid='event2' , eventid='event3' ) as funnel from test_funnel group by uid ; ┌─uid──┬─funnel─┐ │ uid3 │ 0 │ │ uid1 │ 3 │ │ uid2 │ 0 │ └──────┴────────┘ select uid , windowFunnel(4)( toDateTime(eventTime), eventid='event2' , eventid='event3' ) as funnel from test_funnel group by uid ; ┌─uid──┬─funnel─┐ │ uid3 │ 0 │ │ uid1 │ 2 │ │ uid2 │ 2 │ └──────┴────────┘
案例
建表 导入数据
clickhouse-client -q 'insert into test1.ods_log format JSONAsString' < event.log drop table if exists test_log ; create table test_log engine=MergeTree() order by (id,ts) as with visitParamExtractUInt(line,'timeStamp') as ts , visitParamExtractString(line ,'account')as account, visitParamExtractString(line ,'deviceId')as deviceId, visitParamExtractString(line ,'sessionId')as sessionId, visitParamExtractString(line ,'ip')as ip, visitParamExtractString(line ,'eventId')as eventId, visitParamExtractRaw(line ,'properties')as properties select if(account='' , deviceId , account) id , account , deviceId, sessionId, ip, eventId, properties, ts from tb_ods_log ;
select id , windowFunnel(100000)( toDateTime(ts), eventId='productView' , eventId='adClick' , eventId='productView' , eventId='collect' ) as funnel from test_log group by id ;
4.2 sequenceCount
sequenceCount满足要求的次数
SELECT id, sequenceCount('(?1)') ( FROM_UNIXTIME(ts) , eventId='adShow' , eventId='productView' , eventId='collect' , eventId='addCart' )AS cnt FROM test_log GROUP BY id having id='0T7136zA3BZI';
4.3 sequenceMatch
这个函数都需要指定模式串、时间列和期望的事件序列(最多可指定32个事件)。模式串的语法有以下三种: (?N):表示时间序列中的第N个事件,从1开始。例如上述SQL中,(?2)即表示event_type = 'shtKkclick' AND column_type = 'homePage'。 (?t op secs):插入两个事件之间,表示它们发生时需要满足的时间条件(单位为秒)。例如上述SQL中,(?1)(?t<=15)(?2)即表示事件1和2发生的时间间隔在15秒以内。 .*:表示任意的非指定事件。
SELECT id, sequenceMatch('(?1)(?t<=10)(?2)(?3).*(?4)')( FROM_UNIXTIME(ts) , eventId='adClick' , eventId='productView' , eventId='collect' , eventId='addCart' ) AS is_match FROM log GROUP BY id having id='dGHDHV7WOrpJ'; SELECT id, sequenceCount('(?1)(?t<=10)(?2)(?3).*(?4)')( FROM_UNIXTIME(ts) , eventId='adClick' , eventId='productView' , eventId='collect' , eventId='addCart' ) AS cnt FROM test_log GROUP BY id ; having id='dGHDHV7WOrpJ 0T7136zA3BZI'; select id ,eventId from test_log where id = 'dGHDHV7WOrpJ'