基于dremio arp sdk 开发一个cratedb 连接器
目的比较简单,就是学习下dremio 基于arp模式的连接器开发,目前我们可以看到一些官方的demo
但是还是自己尝试下,同时也记录下开发过程中踩的坑
环境准备
基于13 版本
- maven 项目结构
为了方便jar 包的分发,使用了shade 扩展
├── README.md
├── pom.xml
├── src
│ ├── main
│ │ ├── java
│ │ │ └── com
│ │ │ └── dremio
│ │ │ └── exec
│ │ │ └── store
│ │ │ └── jdbc
│ │ │ └── conf
│ │ │ └── CrateConf.java
│ │ └── resources
│ │ ├── arp
│ │ │ └── implementation
│ │ │ └── crate-arp.yaml
│ │ └── sabot-module.conf
- 代码说明
pom.xml 主要是依赖以及插件配置,对于集成我们主要包含了关于cratedb jdbc 驱动,方便分发使用
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.dalong</groupId>
<artifactId>demodremio-driver</artifactId>
<version>1.2-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<version.dremio>13.0.0-202101272034330307-20fb9275</version.dremio>
</properties>
<dependencies>
<dependency>
<groupId>com.dremio.community.plugins</groupId>
<artifactId>dremio-ce-jdbc-plugin</artifactId>
<version>${version.dremio}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>io.crate</groupId>
<artifactId>crate-jdbc</artifactId>
<version>2.6.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<artifactSet>
<includes>
<include>io.crate:crate-jdbc</include>
</includes>
</artifactSet>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>tencent-public</id>
<url>http://mirrors.cloud.tencent.com/nexus/repository/maven-public/</url>
</repository>
<repository>
<id>dremio-public</id>
<url>http://maven.dremio.com/public/</url>
</repository>
<repository>
<id>dremio-free</id>
<url>http://maven.dremio.com/free/</url>
</repository>
</repositories>
</project>
sabot-module.conf 关于插件注册类扫描配置的,比较重要的配置(基于hocon)
看到网上好多的插件都是com.dremio.exec.store.jdbc 的,经过测试实际上并不是的,可以是其他的
#
# Copyright (C) 2017-2019 Dremio Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
// This file tells Dremio to consider this module when class path scanning.
// This file can also include any supplementary configuration information.
// This file is in HOCON format, see https://github.com/typesafehub/config/blob/master/HOCON.md for more information.
dremio.classpath.scanning.packages += "com.dremio.exec.store.jdbc"
类型映射处理配置(很方便,对于sql 的处理我们基于配置就可以搞定了,比如分页。。。,尽管cratedb 兼容pg,但是不是pg,分页是不一样的)
此文件内容比价多,实际可以参考pg 的,目前就做了分页的处理,实际上cratedb 有自己的数据类型的 (代码附带在github 中)
metadata:
name: Cratedb
apiname: crate
spec_version: '1'
syntax:
identifier_quote: '"'
allows_boolean_literal: true
inject_numeric_cast_project: true
supports_catalogs: false
supports_schemas: false
核心代码
CrateConf.java demio arp plugin开发的一个约定,主要实现类型配置,driver 链接,以及关于sql 方言的处理
同时关于db 连接的信息也是可以基于配置说明的,如果需要优化布局,可以自己定义一个layout 的json 文件
package com.dremio.exec.store.jdbc.conf;
import com.dremio.exec.catalog.conf.DisplayMetadata;
import com.dremio.exec.catalog.conf.NotMetadataImpacting;
import com.dremio.exec.catalog.conf.Secret;
import com.dremio.exec.catalog.conf.SourceType;
import com.dremio.exec.store.jdbc.CloseableDataSource;
import com.dremio.exec.store.jdbc.DataSources;
import com.dremio.exec.store.jdbc.JdbcPluginConfig;
import com.dremio.exec.store.jdbc.JdbcSchemaFetcherImpl;
import com.dremio.exec.store.jdbc.dialect.arp.ArpDialect;
import com.dremio.exec.store.jdbc.dialect.arp.ArpYaml;
import com.dremio.options.OptionManager;
import com.dremio.security.CredentialsService;
import com.google.common.annotations.VisibleForTesting;
import io.protostuff.Tag;
import java.sql.SQLException;
import java.util.Properties;
import static com.google.common.base.Preconditions.checkNotNull;
@SourceType(value = "CRATEDB", label = "CRATEDB", uiConfig = "crate-layout.json")
public class CrateConf extends AbstractArpConf<CrateConf> {
private static final String ARP_FILENAME = "arp/implementation/crate-arp.yaml";
// 基于yaml 文件生成sql 方言处理(注意yaml schema 的学习,可以通过源码,或者反编译官方jdbc plugin 的源码)
private static final ArpDialect ARP_DIALECT = AbstractArpConf.loadArpFile(ARP_FILENAME, CratedbDialect::new);
private static final String DRIVER = "io.crate.client.jdbc.CrateDriver";
static class CratedbSchemaFetcher extends JdbcSchemaFetcherImpl {
public CratedbSchemaFetcher(JdbcPluginConfig config) {
super(config);
}
protected boolean usePrepareForColumnMetadata() {
return true;
}
protected boolean usePrepareForGetTables() {
return true;
}
}
// 主要实现关于方言的处理,目前比较简单,主要是关于schema 的,同时我们关于cratedb 一些特殊sql
的处理也可以在这里编写
static class CratedbDialect extends ArpDialect {
public CratedbDialect(ArpYaml yaml) {
super(yaml);
}
@Override
public JdbcSchemaFetcherImpl newSchemaFetcher(JdbcPluginConfig config) {
return new CratedbSchemaFetcher(config);
}
public boolean supportsNestedAggregations() {
return false;
}
}
// ui 元素描述
@Tag(1)
@DisplayMetadata(label = "username")
@NotMetadataImpacting
public String username = "crate";
@Tag(2)
@DisplayMetadata(label = "host")
public String host;
@Tag(3)
@Secret
@DisplayMetadata(label = "password")
@NotMetadataImpacting
public String password = "";
@Tag(4)
@DisplayMetadata(label = "port")
@NotMetadataImpacting
public int port = 5432;
@Tag(5)
@DisplayMetadata(label = "Record fetch size")
@NotMetadataImpacting
public int fetchSize = 200;
@Tag(6)
@NotMetadataImpacting
@DisplayMetadata(label = ENABLE_EXTERNAL_QUERY_LABEL)
public boolean enableExternalQuery = false;
@VisibleForTesting
public String toJdbcConnectionString() {
final String username = checkNotNull(this.username, "Missing username.");
// format crate://localhost:5433/
final String format = String.format("crate://%s:%d/", this.host, this.port);
return format;
}
// 比较核心的,关于插件数据库连接的处理
@Override
@VisibleForTesting
public JdbcPluginConfig buildPluginConfig(
JdbcPluginConfig.Builder configBuilder,
CredentialsService credentialsService,
OptionManager optionManager
) {
return configBuilder.withDialect(getDialect())
.withFetchSize(fetchSize)
.withSkipSchemaDiscovery(true)
.clearHiddenSchemas()
.addHiddenSchema("sys")
.withDatasourceFactory(this::newDataSource)
.withAllowExternalQuery(enableExternalQuery)
.build();
}
// 数据源创建的说明
private CloseableDataSource newDataSource() throws SQLException {
Properties properties = new Properties();
CloseableDataSource dataSource = DataSources.newGenericConnectionPoolDataSource(DRIVER,
toJdbcConnectionString(), this.username, this.password, properties, DataSources.CommitMode.DRIVER_SPECIFIED_COMMIT_MODE);
return dataSource;
}
@Override
public ArpDialect getDialect() {
return ARP_DIALECT;
}
}
- 构建
mvn clean pacakge
使用
基于docker 运行
- docker 镜像
FROM dremio/dremio-oss:13.0
COPY demodremio-driver-1.2-SNAPSHOT.jar /opt/dremio/jars/
- 启动试用
基于docker-compose
具体配置,参考 https://github.com/rongfengliang/dremio-cluster-learning
version: "3"
services:
zookeeper:
image: zookeeper
ports:
- "2181:2181"
- "8080:8080"
dremio1:
image: dalongrong/dremio-oss:13.0
environment:
- DREMIO_JAVA_SERVER_EXTRA_OPTS=-Dsaffron.default.charset=UTF-16LE -Dsaffron.default.nationalcharset=UTF-16LE -Dsaffron.default.collation.name=UTF-16LE\$en_US
volumes:
- "./dremio1.conf:/opt/dremio/conf/dremio.conf"
- "./datas/data:/opt/dremio/data"
ports:
- "9047:9047"
- "31010:31010"
crate:
image: crate
ports:
- "4200:4200"
- "5433:5432"
dremio2:
image: dalongrong/dremio-oss:13.0
environment:
- DREMIO_JAVA_SERVER_EXTRA_OPTS=-Dsaffron.default.charset=UTF-16LE -Dsaffron.default.nationalcharset=UTF-16LE -Dsaffron.default.collation.name=UTF-16LE\$en_US
volumes:
- "./dremio3.conf:/opt/dremio/conf/dremio.conf"
ports:
- "9048:9047"
- "31011:31010"
pg:
image: postgres:12
environment:
- "POSTGRES_PASSWORD=dalong"
ports:
- "5432:5432"
mongo:
image: mongo
ports:
- "27017:27017"
minio:
image: minio/minio
command: server /data
ports:
- "9000:9000"
environment:
- "MINIO_ACCESS_KEY=minio"
- "MINIO_SECRET_KEY=minio123"
- 使用插件
启动之后,可以在cratedb 创建一些测试数据
配置
sql 查询
一些问题
- schema 的问题
因为cratedb 的特殊性,在处理查询的时候总是不对(schema 处理没问题),所以后边就禁用了schema 的自动发现(withSkipSchemaDiscovery(true)),同时对于schema 获取的处理
都使用了true 的返回值(usePrepareForColumnMetadata,usePrepareForGetTables) 此处是一个比较重要的,不然很费事(我折腾了好久) - 布局问题
我们可以自己定义ui 元素的布局,目前官方文档暂时缺少完整的说明,但是可以结合源码学习 - 图标问题
自己开发的plugin 是自己图标的,需要使用svg,同时注意命名为自己SourceType 的value 名称 - 数据反射问题
因为默认schema 不自动发现了,开始的时候反射是不好使的,但是在运行之后schema 会有cache的,我们依然就可以使用dremio强大的反射能力了
说明
以上是一个简单的dremio 插件开发的说明,详细代码可以参考github,同时多看官方文档,以及源码会比较好
参考资料
https://github.com/rongfengliang/cratedb-dremio-connector
https://www.dremio.com/tutorials/how-to-create-an-arp-connector/
https://github.com/narendrans/dremio-snowflake