citus real-time 分析demo( 来自官方文档)
citus 对于多租户以及实时应用的开发都是比较好的,官方也提供了demo
环境准备
- docker-compose 文件
version: '2.1'
services:
graphql-engine:
image: hasura/graphql-engine:v1.0.0-alpha26
ports:
- "8080:8080"
command: >
/bin/sh -c "
graphql-engine --database-url postgres://postgres@master/postgres serve --enable-console;
"
master:
container_name: "${COMPOSE_PROJECT_NAME:-citus}_master"
image: 'citusdata/citus:7.5.1'
ports: ["${MASTER_EXTERNAL_PORT:-5432}:5432"]
labels: ['com.citusdata.role=Master']
worker:
image: 'citusdata/citus:7.5.1'
labels: ['com.citusdata.role=Worker']
ports:
- "5433:5432"
depends_on: { manager: { condition: service_healthy } }
manager:
container_name: "${COMPOSE_PROJECT_NAME:-citus}_manager"
image: 'citusdata/membership-manager:0.2.0'
volumes: ['/var/run/docker.sock:/var/run/docker.sock']
depends_on: { master: { condition: service_healthy } }
- 数据准备
curl https://examples.citusdata.com/tutorial/users.csv > users.csv
curl https://examples.citusdata.com/tutorial/events.csv > events.csv
- 数据表床架吗
CREATE TABLE github_events
(
event_id bigint,
event_type text,
event_public boolean,
repo_id bigint,
payload jsonb,
repo jsonb,
user_id bigint,
org jsonb,
created_at timestamp
);
CREATE TABLE github_users
(
user_id bigint,
url text,
login text,
avatar_url text,
gravatar_id text,
display_login text
);
- 添加索引以及分布式表
CREATE INDEX event_type_index ON github_events (event_type);
CREATE INDEX payload_index ON github_events USING GIN (payload jsonb_path_ops);
SELECT create_distributed_table('github_users', 'user_id');
SELECT create_distributed_table('github_events', 'user_id');
- 导入数据
使用psql copy,注意文件目录
\copy github_users from 'users.csv' with csv
\copy github_events from 'events.csv' with csv
运行查询
- 查询总数
select count(*) from github_users;
- 根据event 类似类型,分析每分钟的push event commit 数
SELECT date_trunc('minute', created_at) AS minute,
sum((payload->>'distinct_size')::int) AS num_commits
FROM github_events
WHERE event_type = 'PushEvent'
GROUP BY minute
ORDER BY minute;
- 数据join 查询,top 10
SELECT login, count(*)
FROM github_events ge
JOIN github_users gu
ON ge.user_id = gu.user_id
WHERE event_type = 'CreateEvent' AND payload @> '{"ref_type": "repository"}'
GROUP BY login
ORDER BY count(*) DESC LIMIT 10;
graphql 集成
- 添加graphql 支持
- 一个时间分组view
create view commitdataview as
SELECT date_trunc('minute', created_at) AS minute,
sum((payload->>'distinct_size')::int) AS num_commits
FROM github_events
WHERE event_type = 'PushEvent'
GROUP BY minute
ORDER BY minute;
- graphql 查询
query {
github_users(limit:10,where:{
user_id:{
_eq:331
}
}) {
user_id
gravatar_id
login
display_login
}
commitdataview(limit:10,order_by:{
num_commits:desc
}) {
num_commits
minute
}
}
- 效果
说明
核心就是基于时间的数据聚合分析,处理
参考资料
https://docs.citusdata.com/en/v7.5/get_started/tutorial_realtime_analytics.html
https://github.com/rongfengliang/citus-hasuar-graphql