Doris笔记
# 显示分区
SHOW PARTITIONS from stg_pay_dt;
# 删除分区
ALTER TABLE stg_pay_dt DROP PARTITION p20211011;
DELETE FROM user.dailyitemsold PARTITION p20210611 WHERE clt_date="2021-06-11";
# 添加分区 ALTER TABLE stg_pay_dt ADD PARTITION IF NOT EXISTS p20211011 VALUES LESS THAN('20211012'); # 删除表 DROP TABLE IF EXISTS `stg_kafka_device_message_data`; # 创建表 CREATE TABLE stg_kafka_device_message_data ( dt INT COMMENT '分区时间(采用的数据时间而非消费数据的时间)', message VARCHAR(4000) COMMENT '原始数据', receive_time DATETIME REPLACE COMMENT '接收时间' ) AGGREGATE KEY ( dt, message ) PARTITION BY RANGE ( dt ) ( PARTITION p20211229 VALUES LESS THAN ( '20211230' ) ) DISTRIBUTED BY HASH ( dt ) buckets 10 PROPERTIES ( "replication_num" = "3" ); # 创建消费kafka的导入任务OFFSET_BEGINNING 、 OFFSET_END
CREATE ROUTINE LOAD load_kafka_device_message_data ON stg_kafka_device_message_data COLUMNS
( dt=from_unixtime( CAST( get_json_string ( message, '$.timestamp' ) AS BIGINT ) / 1000, '%Y%m%d' ),
message,
receive_time=DATE_FORMAT(now(),'%Y-%m-%d %H:%i:%s') )
PROPERTIES ( "desired_concurrent_number" = "1", "max_error_number" = "1000" )
FROM KAFKA ( "kafka_broker_list" = "172.17.46.202:9092",
"kafka_topic" = "device.message",
"property.group.id" = "kafka-consumer2-data-import",
"property.kafka_default_offsets" = "OFFSET_BEGINNING" );
-- 查看导入任务 SHOW routine Load;
-- 查看导入任务的创建语句 show CREATE routine load for stg_kafka_device_message_data;
-- 暂停导入kafka任务 PAUSE ROUTINE LOAD FOR load_kafka_iot_realtime_data;
-- 恢复导入任务 RESUME ROUTINE LOAD FOR load_kafka_iot_realtime_data;
-- 删除导入kafka任务 STOP ROUTINE LOAD FOR load_kafka_iot_realtime_data;
-- 查看建表语句 show create table ods_fireHydrant_dt;
-- 修改表字段信息 ALTER TABLE ods_fireHydrant_dt MODIFY COLUMN address VARCHAR(400) REPLACE COMMENT "设备地址";
-- 创建动态分区, HOUR、DAY、WEEK、MONTH(当指定为 HOUR 时,动态创建的分区名后缀格式为 yyyyMMddHH,例如2020032501。小时为单位的分区列数据类型不能为 DATE,要用datetime。)
CREATE TABLE tb1 ( k1 DATE ) PARTITION BY RANGE(k1) () DISTRIBUTED BY HASH(k1)
PROPERTIES ( "replication_num" = "3", "dynamic_partition.enable" = "true", "dynamic_partition.time_unit" = "DAY", "dynamic_partition.end" = "1", "dynamic_partition.prefix" = "p", "dynamic_partition.buckets" = "8" );
-- 动态分区的属性可以修改,例如需要起/停动态分区的功能,可以通过ALTER TABLE来完成。
ALTER TABLE site_access SET("dynamic_partition.enable"="false");
ALTER TABLE site_access SET("dynamic_partition.enable"="true");
-- 动态分区线程的执行频率,默认为600秒(10分钟),即每10分钟进行一次调度 ADMIN SET FRONTEND CONFIG ("dynamic_partition_check_interval_seconds" = "3600"); -- 修改表名 alter TABLE tb1 RENAME tb2;
-- 新增默认为空的字段
ALTER TABLE 表名 ADD COLUMN 字段名 字段类型 DEFAULT NULL;
-- 删除表字段
ALTER TABLE 表名 DROP COLUMN 字段名;
-- 替换表示例(表结构需保持一致) ALTER TABLE raw_mongo_fi_iot_hld_history_record REPLACE WITH TABLE stg_kafka_mongo_history_data PROPERTIES('swap' = 'true'); -- 清空表数据 TRUNCATE table stg_strem_mango_history_ws; -- 修改副本数 /*(最大副本数量取决于集群中独立 IP 的数量(注意不是 BE 数量)。Doris 中副本分布的原则是, 不允许同一个 Tablet 的副本分布在同一台物理机上,而识别物理机即通过 IP。 所以,即使在同一台物理机上部署了 3 个或更多 BE 实例,如果这些 BE 的 IP 相同,则依然只能设置副本数为 1)*/ alter TABLE tableName set ("default.replication_num" = "3"); -- 查看服务器BE节点 SHOW BACKENDS;
-- 查看表详情
DESC TABLE_NAME;
--doris消费kafka数据示例:OFFSET_BEGINNING 、 OFFSET_END
CREATE ROUTINE LOAD stg_kafka_device_message_data
ON stg_kafka_device_message_data
COLUMNS (
dt = from_unixtime( CAST( get_json_string ( message, '$.timestamp' ) AS BIGINT ) / 1000, '%Y%m%d' )
,message_id = get_json_string ( message, '$.messageId' )
,message_type = get_json_string ( message, '$.messageType' )
,product_id = get_json_string ( message, '$.headers.productId' )
,device_id = get_json_string ( message, '$.deviceId' )
,device_name = get_json_string ( message, '$.headers.deviceName' )
,collect_time = from_unixtime( CAST( get_json_string ( message, '$.timestamp' ) AS BIGINT ) / 1000, '%Y-%m-%d %H:%i:%s' )
,receive_time = DATE_FORMAT(now(),'%Y-%m-%d %H:%i:%s')
,headers = get_json_string ( message, '$.headers' )
,properties = get_json_string ( message, '$.properties' )
,message
)
PROPERTIES ( "desired_concurrent_number" = "50", "max_error_number" = "2000", "strict_mode" = "true" )
FROM KAFKA ( "kafka_broker_list" = "172.17.46.202:9092",
"kafka_topic" = "device.message",
"property.group.id" = "stg_kafka_device_message_data",
"property.kafka_default_offsets" = "OFFSET_BEGINNING" );
-- 查看导入任务
SHOW routine Load;
-- 暂停导入kafka任务
PAUSE ROUTINE LOAD FOR load_kafka_iot_realtime_data;
-- 恢复导入任务
RESUME ROUTINE LOAD FOR load_kafka_iot_realtime_data;
-- 删除导入kafka任务
STOP ROUTINE LOAD FOR load_kafka_iot_realtime_data;
-- 删除所有表语句
select concat('DROP TABLE IF EXISTS ', table_name, ';')
FROM information_schema.tables
WHERE table_schema = '数据库名';
-- 查看表所有列
show full COLUMNS from table_name;
-- 查看数据库或表的数据量
show data from stg_kafka_device_message_data;
【1】创建索引
CREATE INDEX idx_device_id ON stg_kafka_device_message_data (device_id) USING BITMAP COMMENT 'device_id索引';
【2】查看表配置的索引
SHOW INDEX FROM site_access_duplicate;
【3】删除索引
DROP INDEX idx_device_id ON stg_kafka_device_message_data_back;
-- 创建了一个名为:test 密码为:1234 的用户
create user 'test'@'%' identified by '1234';
-- 删除用户“test”
drop user test@localhost ;
-- 若创建的用户允许任何电脑登陆,删除用户如下
drop user test@'%';
-- 全部权限:授予用户test通过外网IP对于指定数据库db_name的全部库表权限,查询权限SELECT_PRIV
grant all on db_name.* to 'test'@'%';
-- 全部权限:授予用户系统自带的相关数据库的权限information_schema
grant all on information_schema.* to 'test'@'%';
-- 修改密码,密码实时更新;修改用户“test”的密码为“1122”
set password for test =password('1122');
-- 刷新
flush privileges;
Apache Doris分页SQL语法
需要 order by 字段
收到客户端{pageNo:1,pagesize:10}
select * from table order by ID limit pageSize offset (pageNo-1)*pageSize;
select * from table order by ID limit 10 offset 0;