springboot整合mysql和clickhouse多数据源
1、添加依赖
<!-- MyBatis-Plus Starter --> <dependency> <groupId>com.baomidou</groupId> <artifactId>mybatis-plus-boot-starter</artifactId> <version>3.2.0</version> </dependency> <!-- MySQL JDBC Connector --> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>8.0.26</version> </dependency> <!-- ClickHouse JDBC Connector --> <dependency> <groupId>ru.yandex.clickhouse</groupId> <artifactId>clickhouse-jdbc</artifactId> <version>0.2.4</version> </dependency>
2、配置文件
spring.datasource.dynamic.primary=master spring.datasource.dynamic.strict=false spring.datasource.dynamic.datasource.master.url=jdbc:mysql://172.16.22.80:3306/hw-manage?useUnicode=true&characterEncoding=utf8&serverTimezone=GMT%2b8&useSSL=false spring.datasource.dynamic.datasource.master.username=root spring.datasource.dynamic.datasource.master.password=root spring.datasource.dynamic.datasource.master.driver-class-name=com.mysql.jdbc.Driver spring.datasource.dynamic.datasource.click.url=jdbc:clickhouse://41.6.251.1:8123,41.6.251.2:8123,41.6.251.3:8123/vehicle_dev spring.datasource.dynamic.datasource.click.type=ru.yandex.clickhouse.BalancedClickhouseDataSource spring.datasource.dynamic.datasource.click.username=11 spring.datasource.dynamic.datasource.click.password=22 spring.datasource.dynamic.datasource.click.driver-class-name=ru.yandex.clickhouse.ClickHouseDriver
3、添加配置类
package com.zygh.hzhw.manage.config; import com.baomidou.dynamic.datasource.creator.AbstractDataSourceCreator; import com.baomidou.dynamic.datasource.creator.DataSourceCreator; import com.baomidou.dynamic.datasource.spring.boot.autoconfigure.DataSourceProperty; import org.apache.commons.lang3.StringUtils; import org.springframework.context.annotation.Configuration; import org.springframework.core.annotation.Order; import ru.yandex.clickhouse.BalancedClickhouseDataSource; import ru.yandex.clickhouse.settings.ClickHouseProperties; import javax.sql.DataSource; import java.util.Objects; // 保证执行顺序在前面,否则 会被其他加载器给创建,导致自定义的加载器执行不到 @Order(0) @Configuration public class DynamicDataSourceConfig extends AbstractDataSourceCreator implements DataSourceCreator { @Override public DataSource doCreateDataSource(DataSourceProperty dataSourceProperty) { ClickHouseProperties properties = new ClickHouseProperties(); properties.setUser("11"); properties.setPassword("22"); properties.setMaxThreads(10); properties.setAsync(true); properties.setMaxTotal(2000); properties.setDefaultMaxPerRoute(1000); properties.setBufferSize(2000); properties.setApacheBufferSize(2000); properties.setSocketTimeout(Integer.MAX_VALUE); properties.setMaxQuerySize(Long.MAX_VALUE); properties.setConnectionTimeout(10 * 60 * 1000); BalancedClickhouseDataSource clickhouseDataSource = new BalancedClickhouseDataSource("jdbc:clickhouse://41.6.251.1:8123,41.6.251.2:8123,41.6.251.3:8123/vehicle_dev", properties); return clickhouseDataSource; } @Override public boolean support(DataSourceProperty dataSourceProperty) { // 适配器模式,判断是否需要使用当前动态数据源创建 Class<? extends DataSource> type = dataSourceProperty.getType(); if (Objects.isNull(type)) { return Boolean.FALSE; } return StringUtils.equals(type.getName(), BalancedClickhouseDataSource.class.getName()); } }
clickhouse常用日期函数:
函数 效果 描述
now() 2022-07-13 14:28:33 取当前时间
toUnixTimestamp(now()) 1657695998 获取当前时间戳
toYear(now()) 2022 取日期中的年份
toMonth(now()) 7 取日期中的月份
today() 2022-07-13 今天的日期
toDate(now()) 2022-07-13 取日期中的日期
yesterday() 2022-07-12 昨天的日期
toQuarter(now()) 3 获取当前日期季度
toDayOfMonth(now()) 13 当前月份的天数
toDayOfYear(now()) 194 日期化为天数
toDayOfWeek(now()) 3 获取星期几
toMonday(now()) 2022-07-11 当前周的第一天
toHour(now()) 14 取日期时间中的小时
toMinute(now()) 42 取日期时间中的分钟
toSecond(now()) 40 取日期时间中的秒
toStartOfQuarter(now()) 2022-07-01 当前季度的第一天
toStartOfMinute(now()) 2022-07-13 14:57:00 当前起始分钟时间
toStartOfHour(now()) 2022-07-13 14:00:00 当前起始小时时间
toStartOfDay(now()) 2022-07-13 00:00:00 当天起始时间
toStartOfYear(now()) 2022-01-01 当前年份的第一天
toStartOfMonth(now()) 2022-07-01 当前月份的第一天
利用clickhouse中的toStartOf*()函数,将时间归属到相同点,然后利用分组统计即可。
toStartOf*()函数:
- toStartOfInterval()
- toStartOfHour()
- toStartOfFifteenMinutes()
- toStartOfFiveMinute()
- toStartOfDay()
- toStartOfMonth()
- toStartOfQuarter()
- toStartOfWeek()
时间函数参考:https://blog.csdn.net/master_hunter/article/details/125762575
参考:https://www.cnblogs.com/lalala1/p/15644885.html
clickhouse优化:
建表考虑分区索引
表参数:index_granularity表索引粒度,ttl设置
实时数据,批量插入,不要一条一条插入数据
配置在config和user文件下面
内存,cup,io
调整:默认线程16,
count()快
explain sytax查看执行计划
查询优化:
prewhere和where
采样数据 sample
orderby 结合wehre,limit使用
虚拟列避免使用,消耗资源
uniqcombined
视图
查询熔断、关闭虚拟内存
argmax 分组取最后一条
常用查询sql
package com.zygh.track.dal.mapper.analysis; import java.util.Arrays; import java.util.Date; import com.baomidou.dynamic.datasource.annotation.DS; import com.baomidou.mybatisplus.core.mapper.BaseMapper; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.zygh.track.common.pojo.PageResult; import com.zygh.track.config.mybatis.query.LambdaQueryWrapperX; import com.zygh.track.controller.dict.vo.data.DictDataPageReqVO; import com.zygh.track.dal.dataobject.dict.DictDataDO; import com.zygh.track.dto.ActivityFrequencyParamDTO; import com.zygh.track.dto.CarParamDTO; import com.zygh.track.dto.SearchCarParamDTO; import com.zygh.track.entity.VehicleTrack; import com.zygh.track.vo.analysis.ActivityTimePatternVO; import com.zygh.track.vo.analysis.SearchCarVO; import org.apache.ibatis.annotations.Param; import org.apache.ibatis.annotations.Select; import org.springframework.cache.annotation.Cacheable; import java.util.List; /** * @author admin * @createDate 2023-06-28 14:10:41 * @Entity generator.entity.DriverInfo */ @DS("clickhouse") public interface TrajectoryAnalysisMapper extends BaseMapper<VehicleTrack> { /** * 日期维度分析 * * @param activityFrequencyParamDTO * @return */ @Select("<script> " + "select" + " formatDateTime(vga.pass_time,'%Y-%m-%d') as passTime," + "count(plate_info) as counts " + " from" + " vehicle_gangqu_all vga" + "<where>" + "<if test='activityFrequencyParamDTO.startTime != null and activityFrequencyParamDTO.startTime.length >0 '>" + " and vga.pass_time BETWEEN #{activityFrequencyParamDTO.startTime} and #{activityFrequencyParamDTO.endTime} " + "</if>" + "<if test='activityFrequencyParamDTO.plateInfo != null and activityFrequencyParamDTO.plateInfo.length >0 '>" + " and vga.plate_info = #{activityFrequencyParamDTO.plateInfo}" + "</if>" + "<if test='activityFrequencyParamDTO.vehicleType != null and activityFrequencyParamDTO.vehicleType.length >0 '>" + " and vga.vehicle_type = #{activityFrequencyParamDTO.vehicleType}" + "</if>" + "</where>" + "group by " + " formatDateTime(vga.pass_time,'%Y-%m-%d') </script>") List<ActivityTimePatternVO.DayActivityTime> activityTimePattern(@Param("activityFrequencyParamDTO") CarParamDTO activityFrequencyParamDTO); /** * 时段数据查询 * * @param activityFrequencyParamDTO * @return */ @Select("<script> " + "select" + " formatDateTime(vga.pass_time,'%H') as passTime," + "count(plate_info) as counts " + " from" + " vehicle_gangqu_all vga" + "<where>" + "<if test='activityFrequencyParamDTO.startTime != null and activityFrequencyParamDTO.startTime.length >0 '>" + " and vga.pass_time BETWEEN #{activityFrequencyParamDTO.startTime} and #{activityFrequencyParamDTO.endTime} " + "</if>" + "<if test='activityFrequencyParamDTO.plateInfo != null and activityFrequencyParamDTO.plateInfo.length >0 '>" + " and vga.plate_info = #{activityFrequencyParamDTO.plateInfo}" + "</if>" + "<if test='activityFrequencyParamDTO.vehicleType != null and activityFrequencyParamDTO.vehicleType.length >0 '>" + " and vga.vehicle_type = #{activityFrequencyParamDTO.vehicleType}" + "</if>" + "</where>" + "group by " + " formatDateTime(vga.pass_time,'%H') order by passTime </script>") List<ActivityTimePatternVO.DayActivityTime> hourActivityTimes(@Param("activityFrequencyParamDTO") CarParamDTO activityFrequencyParamDTO); /** * 查询一年内的第一次数据,根据车牌号分组 * * @param activityFrequencyParamDTO * @return */ @Select("<script> " + "select" + " formatDateTime(vga.pass_time,'%H:%M') as passTime," + "count(plate_info) as counts " + " from" + " vehicle_gangqu_all vga" + "<where>" + "<if test='activityFrequencyParamDTO.startTime != null and activityFrequencyParamDTO.startTime.length >0 '>" + " and vga.pass_time BETWEEN #{activityFrequencyParamDTO.startTime} and #{activityFrequencyParamDTO.endTime} " + "</if>" + "<if test='activityFrequencyParamDTO.plateInfo != null and activityFrequencyParamDTO.plateInfo.length >0 '>" + " and vga.plate_info = #{activityFrequencyParamDTO.plateInfo}" + "</if>" + "<if test='activityFrequencyParamDTO.plateType != null and activityFrequencyParamDTO.plateType.length >0 '>" + " and vga.plate_type = #{activityFrequencyParamDTO.plateType}" + "</if>" + "</where>" + "group by " + " formatDateTime(vga.pass_time,'%H:%M') </script>") List<ActivityTimePatternVO.DayActivityTime> firstTimes(@Param("activityFrequencyParamDTO") ActivityFrequencyParamDTO activityFrequencyParamDTO); /** * 根据设备code和时间范围查询过车数据 * * @return */ @Select("<script> select\n" + " vga.plate_info ,\n" + " vga .vehicle_type,\n" + " vga.vehicle_model,\n" + " vga.vehicle_color,\n" + " count(vga.plate_info) as counts \n" + " from\n" + " vehicle_gangqu_all vga\n" + " <where>\n" + " <if test=\"crossCode !=null and crossCode !='' \">\n" + " and vga.cross_code = #{crossCode}\n" + " </if>\n" + " <if test=\"startTime !=null and startTime !='' \">\n" + " and vga.pass_time BETWEEN #{startTime} and #{endTime}\n" + " </if>\n" + " </where>\n" + " group by vga.plate_info,vga .vehicle_type,vga.vehicle_model,vga.vehicle_color</script>") Page<SearchCarVO> selectAllByPlateInfoAndPassTime(@Param("page") Page page, @Param("crossCode") String crossCode, @Param("startTime") String startTime, @Param("endTime") String endTime); @Select("<script> select\n" + " toStartOfHour(pass_time) as passTime ,\n" + " count(*) counts,\n" + "vehicle_type as vehicleType,\n" + "vehicle_model as vehicleModel,\n" + "vehicle_color as vehicleColor,\n" + "plate_info\n" + "from\n" + "vehicle_gangqu_all " + "where\n" + " pass_time BETWEEN #{startTime} and #{endTime} " + "<if test= \"plateInfoList !=null and plateInfoList !='' \">\n" + " and plate_info in" + " <foreach collection=\"plateInfoList\" item=\"item\" open=\"(\" separator=\",\" close=\")\">\n" + " #{item}\n" + " </foreach>\n" + " </if>\n" + "group by\n" + " toStartOfHour(pass_time),\n" + " plate_info ,\n" + "\tvehicle_type ,\n" + "\tvehicle_model ,\n" + "vehicle_color " + "</script>") List<SearchCarVO> vehicleDuring(@Param("plateInfoList") List<String> plateInfoList, @Param("startTime") String startTime, @Param("endTime") String endTime); /** * 案件串并分析 * * @param page * @param startTime * @param endTime * @return */ @Select("<script> select " + " vga.plate_info ,\n" + " vga .vehicle_type,\n" + " vga.vehicle_model,\n" + " vga.vehicle_color,\n" + " vga.pass_time,\n" + " vga.pic_vehicle,\n" + " vga.cross_code\n" + " from" + " vehicle_gangqu_all vga" + " <where>" + " <if test= \"crossIds !=null and crossIds !='' \">\n" + " and vga.cross_code in" + " <foreach collection=\"crossIds\" item=\"item\" open=\"(\" separator=\",\" close=\")\">\n" + " #{item}\n" + " </foreach>\n" + " </if>\n" + " <if test=\"startTime !=null and startTime !='' \">\n" + " and vga.pass_time BETWEEN #{startTime} and #{endTime} " + " </if> " + "order by vga.pass_time desc "+ " </where> " + " </script>") Page<SearchCarVO> caseAnalysis(@Param("page") Page page, @Param("crossIds") List<String> crossIds, @Param("startTime") String startTime, @Param("endTime") String endTime); @Select("<script> select " + " vga.plate_info ,\n" + " vga .vehicle_type,\n" + " vga.vehicle_model,\n" + " vga.vehicle_color,\n" + " vga.pass_time,\n" + " vga.pic_vehicle\n" + " from" + " vehicle_gangqu_all vga" + " <where>" + " <if test=\"startTime !=null and startTime !='' \">\n" + " and vga.pass_time BETWEEN #{startTime} and #{endTime} order by pass_time asc" + " </if> " + " </where> " + " </script>") Page<SearchCarVO> firstTime(@Param("page") Page page, @Param("startTime") String startTime, @Param("endTime") String endTime); /** * 根据车牌号和时间分组查询 * * @param page * @return */ @Select("<script> select " + " vga.plate_info ," + " vga .vehicle_type," + " vga.vehicle_model," + " vga.vehicle_color," + " count(vga.plate_info) as counts\n" + " from" + " vehicle_gangqu_all as vga" + " <where>" + " <if test=\"searchCarParamDTO.plateInfo !=null and searchCarParamDTO.plateInfo !='' \">\n" + " and vga.plate_info != #{searchCarParamDTO.plateInfo}\n" + " </if>\n" + " <if test=\"searchCarParamDTO.startTime !=null and searchCarParamDTO.startTime !='' \">\n" + " and toStartOfDay(vga.pass_time) BETWEEN #{searchCarParamDTO.startTime} and #{searchCarParamDTO.endTime} " + " </if> " + " </where> " + " group by vga.plate_info,vga.vehicle_type ,vga .vehicle_model ,vga .vehicle_color" + " </script>") Page<SearchCarVO> accompanyVehicle(@Param("page") Page page, @Param("searchCarParamDTO") SearchCarParamDTO searchCarParamDTO); /** * 根据车牌号和时间查询 * * @param page * @return */ @Select("<script> select " + " vga.plate_info ,\n" + " vga.pic_vehicle,\n" + " vga.cross_code,\n" + " vga.pass_time\n" + " from" + " vehicle_gangqu_all as vga" + " <where>" + " <if test=\"searchCarParamDTO.plateInfo !=null and searchCarParamDTO.plateInfo !='' \">\n" + " and vga.plate_info != #{searchCarParamDTO.plateInfo}\n" + " </if>\n" + " <if test=\"searchCarParamDTO.startTime !=null and searchCarParamDTO.startTime !='' \">\n" + " and toStartOfHour(vga.pass_time) BETWEEN #{searchCarParamDTO.startTime} and #{searchCarParamDTO.endTime} " + " </if> " + " </where> " + " </script>") Page<SearchCarVO> accompanyVehicleRecord(@Param("page") Page page, @Param("searchCarParamDTO") SearchCarParamDTO searchCarParamDTO); }
类型:
date
datetime
datetime64
megtree引擎可以修改表结构
分组取每组前n条
-- 数据库分组取每个库下最大的三个表 SELECT database, table, formatReadableSize(sum(data_compressed_bytes)) AS bytes FROM system.parts GROUP BY database,table ORDER BY database,bytes DESC limit 3 by database ┌─database───────────────┬─table──────────────────────────┬─bytes──────┐ │ default │ insert_test_76 │ 9.93 MiB │ │ default │ insert_test_77 │ 9.92 MiB │ │ default │ insert_test_67 │ 9.42 MiB │ │ system │ trace_log │ 8.31 MiB │ │ system │ metric_log │ 74.76 MiB │ │ tutorial │ hdfs2ch │ 5.32 MiB │ │ tutorial │ visits_v1 │ 310.43 MiB │ │ tutorial │ hits_v1 │ 1.18 GiB │ ...
查看集群信息:
select * from system.clusters;
查看数据库版本
select version();
常用建表语句:
CREATE DATABASE IF NOT EXISTS vehicle on cluster ck_cluster drop database vehicle on cluster ck_cluster drop table vehicle.vehicle_pass on cluster ck_cluster truncate table vehicle.vehicle_pass on cluster ck_cluster; CREATE TABLE vehicle.vehicle_pass on cluster ck_cluster ( `project_id` UInt8, `plate_info` String, `plate_type` String, `plate_color` String, `vehicle_speed` UInt16, `vehicle_type` String, `vehicle_color` String, `vehicle_logo` String, `vehicle_model` String, `vehicle_state` String, `pass_time` DateTime, `cross_code` String, `cross_name` String, `lane_id` UInt8, `longitude` String, `latitude` String, `alarm_action` String, `pic_plate` String, `pic_vehicle` String, `pic_vehicle1` String, `pic_vehicle2` String, `create_date` DateTime DEFAULT now() ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/vehicle/vehicle_pass','{replica}') PARTITION BY toYYYYMMDD(pass_time) ORDER BY pass_time SETTINGS index_granularity = 8192; CREATE TABLE vehicle.vehicle_pass_all on cluster ck_cluster ( `project_id` UInt8, `plate_info` String, `plate_type` String, `plate_color` String, `vehicle_speed` UInt16, `vehicle_type` String, `vehicle_color` String, `vehicle_logo` String, `vehicle_model` String, `vehicle_state` String, `pass_time` DateTime, `cross_code` String, `cross_name` String, `lane_id` UInt8, `longitude` String, `latitude` String, `alarm_action` String, `pic_plate` String, `pic_vehicle` String, `pic_vehicle1` String, `pic_vehicle2` String, `create_date` DateTime DEFAULT now() ) ENGINE = Distributed('ck_cluster', 'vehicle', 'vehicle_pass', rand()); -- vehicle.vehicle_pass_buffer definition CREATE TABLE vehicle.vehicle_pass_buffer on cluster ck_cluster ( `project_id` UInt8, `plate_info` String, `plate_type` String, `plate_color` String, `vehicle_speed` UInt16, `vehicle_type` String, `vehicle_color` String, `vehicle_logo` String, `vehicle_model` String, `vehicle_state` String, `pass_time` DateTime, `cross_code` String, `cross_name` String, `lane_id` UInt8, `longitude` String, `latitude` String, `alarm_action` String, `pic_plate` String, `pic_vehicle` String, `pic_vehicle1` String, `pic_vehicle2` String, `create_date` DateTime DEFAULT now() ) ENGINE = Buffer('vehicle', 'vehicle_pass_all', 16, 10, 100, 1000, 10000, 1000000, 10000000); //运维 //当前连接数 SELECT * FROM system.metrics WHERE metric LIKE '%Connection'; //存储空间统计 SELECT name, path, formatReadableSize(free_space) AS free, formatReadableSize(total_space) AS total, formatReadableSize(keep_free_space) AS reserved FROM system.disks //各数据库占用空间统计 SELECT database, formatReadableSize(sum(bytes_on_disk)) AS on_disk FROM system.parts GROUP BY database //查看库表资源占用情况 select database, table, sum(rows) AS "总行数", formatReadableSize(sum(data_uncompressed_bytes)) as "原始大小", formatReadableSize(sum(data_compressed_bytes)) AS "压缩大小", round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100.,2) AS "压缩率/%" from system.parts group by database,table order by database
primary_key不唯一,
分区就是本地磁盘的目录,如果不区分,就是all文件夹。
如果10个分区,就是10个线程,一般按照天来分区。
数据不是直接插入到分区里面的,批次插入会产生一个临时分区,合并的时候插入到分区。
触发合并:optimize table xxx final;
稀疏索引:8192,间隔多少个记录一次
order by必须的
order by 必须是注解的前缀字段
ttl,表建好之后可以添加ttl,可以字段或者是表。
mergetree和replacingmergetree
分区内去重。合并的时候去重,根据orderby字段。
summingmerge引擎,分区内聚合,时间是最早的那一条。
update和delete数据,通过alter table语法。
不支持事务。
分布式表,分片
internal_replication是否开启分片内副本同步
集群分片读取,读哪个,通过错误统计次数,读少的。
优化:
数据不要存储null值,磁盘会生成一个目录。
删除分布式表中的数据:
ALTER TABLE vehicle_stroke on cluster ck_cluster DELETE WHERE 1=1 ;
合并分区数据:
OPTIMIZE TABLE vehicle_stroke FINAL;
clickhouse和jdbc:参考:https://magicpenta.tech/docs/clickhouse/ClickHouse%20JDBC
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?