升级版本的dremio cratedb arp 开发
记得去年的时候开发过一个cratedb 的arp 扩展,很粗糙,基本能用,但是很不好,以下是一个更加规范版本的,而且更好的集成官方的
test(关于dremio 测试的,我已经简单介绍过,很方便,而且比较友好)
改进版本的开发
- 核心部分
主要是关于自动元数据发现以及,数据查询的,以前版本为了方便查询,忽略了元数据发现,此问题,主要还是官方ce plugin 默认对于catalog 处理的
bug,如果配置了忽略catalog,使用protected CanonicalizeTablePathResponse getDatasetHandleViaGetTables(CanonicalizeTablePathRequest request, Connection connection) 的方法应该处理下,但是默认JdbcSchemaFetcherImpl 实现没有处理造成有问题 - 修复
处理下就行了,或者使用下ArpDialect 方言的yaml 定义处理下 - 参考代码
package com.dremio.exec.store.jdbc.conf;
import com.dremio.exec.catalog.conf.DisplayMetadata;
import com.dremio.exec.catalog.conf.NotMetadataImpacting;
import com.dremio.exec.catalog.conf.Secret;
import com.dremio.exec.catalog.conf.SourceType;
import com.dremio.exec.store.jdbc.*;
import com.dremio.exec.store.jdbc.dialect.arp.ArpDialect;
import com.dremio.exec.store.jdbc.dialect.arp.ArpYaml;
import com.dremio.options.OptionManager;
import com.dremio.security.CredentialsService;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import io.protostuff.Tag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Properties;
import static com.google.common.base.Preconditions.checkNotNull;
@SourceType(value = "CRATEDB", label = "CRATEDB", uiConfig = "crate-layout.json")
public class CrateConf extends AbstractArpConf<CrateConf> {
private static final String ARP_FILENAME = "arp/implementation/crate-arp.yaml";
private static final ArpDialect ARP_DIALECT = AbstractArpConf.loadArpFile(ARP_FILENAME, CratedbDialect::new);
private static final String DRIVER = "io.crate.client.jdbc.CrateDriver";
static class CratedbSchemaFetcherV2 extends ArpDialect.ArpSchemaFetcher {
private static final Logger logger = LoggerFactory.getLogger(CratedbSchemaFetcherV2.class);
private final JdbcPluginConfig config;
public CratedbSchemaFetcherV2(String query, JdbcPluginConfig config) {
super(query, config);
this.config = config;
logger.info("query schema:{}", query);
}
@Override
protected JdbcFetcherProto.CanonicalizeTablePathResponse getDatasetHandleViaGetTables(JdbcFetcherProto.CanonicalizeTablePathRequest request, Connection connection) throws SQLException {
DatabaseMetaData metaData = connection.getMetaData();
FilterDescriptor filter = new FilterDescriptor(request, supportsCatalogsWithoutSchemas(this.config.getDialect(), metaData));
ResultSet tablesResult = metaData.getTables(filter.catalogName, filter.schemaName, filter.tableName, (String[]) null);
Throwable throwable = null;
JdbcFetcherProto.CanonicalizeTablePathResponse canonicalizeTablePathResponse;
try {
String currSchema;
do {
if (!tablesResult.next()) {
return JdbcFetcherProto.CanonicalizeTablePathResponse.getDefaultInstance();
}
currSchema = tablesResult.getString(2);
} while (!Strings.isNullOrEmpty(currSchema) && this.config.getHiddenSchemas().contains(currSchema));
com.dremio.exec.store.jdbc.JdbcFetcherProto.CanonicalizeTablePathResponse.Builder responseBuilder = JdbcFetcherProto.CanonicalizeTablePathResponse.newBuilder();
// cratedb not support catalog,but default implement fetch it so omit it
if (!Strings.isNullOrEmpty(currSchema)) {
responseBuilder.setSchema(currSchema);
}
responseBuilder.setTable(tablesResult.getString(3));
canonicalizeTablePathResponse = responseBuilder.build();
} catch (Throwable ex) {
throwable = ex;
throw ex;
} finally {
if (tablesResult != null) {
try {
closeResource(throwable, tablesResult);
} catch (Exception e) {
e.printStackTrace();
}
}
}
return canonicalizeTablePathResponse;
}
private static void closeResource(Throwable throwable, AutoCloseable autoCloseable) throws Exception {
if (throwable != null) {
try {
autoCloseable.close();
} catch (Throwable throwable1) {
throwable.addSuppressed(throwable1);
}
} else {
autoCloseable.close();
}
}
protected static class FilterDescriptor {
private final String catalogName;
private final String schemaName;
private final String tableName;
public FilterDescriptor(JdbcFetcherProto.CanonicalizeTablePathRequest request, boolean hasCatalogsWithoutSchemas) {
this.tableName = request.getTable();
if (!Strings.isNullOrEmpty(request.getSchema())) {
this.schemaName = request.getSchema();
this.catalogName = request.getCatalogOrSchema();
} else {
this.catalogName = hasCatalogsWithoutSchemas ? request.getCatalogOrSchema() : "";
this.schemaName = hasCatalogsWithoutSchemas ? "" : request.getCatalogOrSchema();
}
}
}
}
static class CratedbDialect extends ArpDialect {
public CratedbDialect(ArpYaml yaml) {
super(yaml);
}
@Override
public ArpSchemaFetcher newSchemaFetcher(JdbcPluginConfig config) {
String query = String.format("SELECT NULL, SCH, NME from ( select table_catalog CAT, table_schema SCH, table_name NME from information_schema.\"tables\" union all select table_catalog CAT, table_schema SCH,table_name NME from information_schema.views ) t where cat not in ('information_schema','pg_catalog','sys', '%s')", new Object[]{Joiner.on("','").join(config.getHiddenSchemas())});
return new CratedbSchemaFetcherV2(query, config);
}
@Override
public ContainerSupport supportsCatalogs() {
return ContainerSupport.UNSUPPORTED;
}
@Override
public boolean supportsNestedAggregations() {
return false;
}
}
@Tag(1)
@DisplayMetadata(label = "username")
@NotMetadataImpacting
public String username = "crate";
@Tag(2)
@DisplayMetadata(label = "host")
public String host;
@Tag(3)
@Secret
@DisplayMetadata(label = "password")
@NotMetadataImpacting
public String password = "";
@Tag(4)
@DisplayMetadata(label = "port")
@NotMetadataImpacting
public int port = 5432;
@Tag(5)
@DisplayMetadata(label = "Record fetch size")
@NotMetadataImpacting
public int fetchSize = 200;
@Tag(6)
@DisplayMetadata(
label = "Maximum idle connections"
)
@NotMetadataImpacting
public int maxIdleConns = 8;
@Tag(7)
@DisplayMetadata(
label = "Connection idle time (s)"
)
@NotMetadataImpacting
public int idleTimeSec = 60;
@VisibleForTesting
public String toJdbcConnectionString() {
checkNotNull(this.username, "Missing username.");
// format crate://localhost:5433/
final String format = String.format("crate://%s:%d/", this.host, this.port);
return format;
}
@Override
@VisibleForTesting
public JdbcPluginConfig buildPluginConfig(
JdbcPluginConfig.Builder configBuilder,
CredentialsService credentialsService,
OptionManager optionManager
) {
return configBuilder.withDialect(getDialect())
.withFetchSize(fetchSize)
.clearHiddenSchemas()
.addHiddenSchema("sys")
.withDatasourceFactory(this::newDataSource)
.build();
}
private CloseableDataSource newDataSource() {
Properties properties = new Properties();
CloseableDataSource dataSource = DataSources.newGenericConnectionPoolDataSource(DRIVER,
toJdbcConnectionString(), this.username, this.password, properties, DataSources.CommitMode.DRIVER_SPECIFIED_COMMIT_MODE, this.maxIdleConns, this.idleTimeSec);
return dataSource;
}
@Override
public ArpDialect getDialect() {
return ARP_DIALECT;
}
}
单元测试
- 参考pom.xml
核心是pom 依赖,注意jackson 版本以及jdk 版本,最好使用8
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.dalong</groupId>
<artifactId>cratedb-driver</artifactId>
<version>${version.dremio}</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<version.dremio>20.0.0-202201050826310141-8cc7162b</version.dremio>
</properties>
<dependencies>
<dependency>
<groupId>com.dremio.community.plugins</groupId>
<artifactId>dremio-ce-jdbc-plugin</artifactId>
<version>${version.dremio}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>io.crate</groupId>
<artifactId>crate-jdbc</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>com.dremio.sabot</groupId>
<artifactId>dremio-sabot-kernel</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
<version>${version.dremio}</version>
</dependency>
<dependency>
<groupId>com.dremio</groupId>
<artifactId>dremio-common</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
<version>${version.dremio}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.11.4</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
<version>3.12.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.2.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<artifactSet>
<includes>
<include>io.crate:crate-jdbc</include>
</includes>
</artifactSet>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>tencent-public</id>
<url>http://mirrors.cloud.tencent.com/nexus/repository/maven-public/</url>
</repository>
<repository>
<id>dremio-public</id>
<url>http://maven.dremio.com/public/</url>
</repository>
<repository>
<id>dremio-free</id>
<url>http://maven.dremio.com/free/</url>
</repository>
</repositories>
</project>
- 简单代码
继承 BaseTestQuery 就可以实现测试了,还是很简单的
package com.dremio.jdbc;
import com.dremio.BaseTestQuery;
import com.dremio.TestResult;
import com.dremio.exec.ExecConstants;
import com.dremio.exec.store.CatalogService;
import com.dremio.exec.store.jdbc.conf.CrateConf;
import com.dremio.options.OptionValue;
import com.dremio.service.namespace.source.proto.SourceConfig;
import org.junit.Before;
import org.junit.Test;
public class MyPluginTest extends BaseTestQuery {
private CrateConf crateConf;
@Before
public void initSource(){
getSabotContext().getOptionManager().setOption(OptionValue.createLong(OptionValue.OptionType.SYSTEM, ExecConstants.ELASTIC_ACTION_RETRIES, 3));
SourceConfig sc = new SourceConfig();
sc.setName("cratedb");
crateConf = new CrateConf();
crateConf.host="127.0.0.1";
crateConf.port=5433;
crateConf.username="crate";
sc.setConnectionConf(crateConf);
sc.setMetadataPolicy(CatalogService.DEFAULT_METADATA_POLICY);
getSabotContext().getCatalogService().createSourceIfMissingWithThrow(sc);
}
@Test
public void test() throws Exception {
String query = "select * from cratedb.doc.demoapp";
TestResult testResult= testBuilder()
.sqlQuery(query)
.unOrdered()
.baselineColumns("id", "name")
.baselineValues(null, null)
.go();
}
}
参考使用
- 构建
mvn clean package -Dmaven.test.skip
- 安装arp
拷贝jar 到dremio 的jars 中,因为已经内置了cratedb驱动,不需要额外处理了
说明
目前版本的并不是特别的好,但是比较符合官方合理的arp 开发了,具体代码参考github,目前还缺少关于用户密码的,后期完善
以前关于dremio 级联模式的处理也是类似的
参考资料
https://www.dremio.com/resources/tutorials/how-to-create-an-arp-connector/
https://github.com/rongfengliang/cratedb-dremio-connector