升级版本的dremio cratedb arp 开发

记得去年的时候开发过一个cratedb 的arp 扩展,很粗糙,基本能用,但是很不好,以下是一个更加规范版本的,而且更好的集成官方的
test(关于dremio 测试的,我已经简单介绍过,很方便,而且比较友好)

改进版本的开发

  • 核心部分
    主要是关于自动元数据发现以及,数据查询的,以前版本为了方便查询,忽略了元数据发现,此问题,主要还是官方ce plugin 默认对于catalog 处理的
    bug,如果配置了忽略catalog,使用protected CanonicalizeTablePathResponse getDatasetHandleViaGetTables(CanonicalizeTablePathRequest request, Connection connection) 的方法应该处理下,但是默认JdbcSchemaFetcherImpl 实现没有处理造成有问题
  • 修复
    处理下就行了,或者使用下ArpDialect 方言的yaml 定义处理下
  • 参考代码
 
package com.dremio.exec.store.jdbc.conf;
 
import com.dremio.exec.catalog.conf.DisplayMetadata;
import com.dremio.exec.catalog.conf.NotMetadataImpacting;
import com.dremio.exec.catalog.conf.Secret;
import com.dremio.exec.catalog.conf.SourceType;
import com.dremio.exec.store.jdbc.*;
import com.dremio.exec.store.jdbc.dialect.arp.ArpDialect;
import com.dremio.exec.store.jdbc.dialect.arp.ArpYaml;
import com.dremio.options.OptionManager;
import com.dremio.security.CredentialsService;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import io.protostuff.Tag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
 
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Properties;
 
import static com.google.common.base.Preconditions.checkNotNull;
 
@SourceType(value = "CRATEDB", label = "CRATEDB", uiConfig = "crate-layout.json")
public class CrateConf extends AbstractArpConf<CrateConf> {
    private static final String ARP_FILENAME = "arp/implementation/crate-arp.yaml";
    private static final ArpDialect ARP_DIALECT = AbstractArpConf.loadArpFile(ARP_FILENAME, CratedbDialect::new);
    private static final String DRIVER = "io.crate.client.jdbc.CrateDriver";
 
    static class CratedbSchemaFetcherV2 extends ArpDialect.ArpSchemaFetcher {
        private static final Logger logger = LoggerFactory.getLogger(CratedbSchemaFetcherV2.class);
        private final JdbcPluginConfig config;
 
        public CratedbSchemaFetcherV2(String query, JdbcPluginConfig config) {
            super(query, config);
            this.config = config;
            logger.info("query schema:{}", query);
        }
 
        @Override
        protected JdbcFetcherProto.CanonicalizeTablePathResponse getDatasetHandleViaGetTables(JdbcFetcherProto.CanonicalizeTablePathRequest request, Connection connection) throws SQLException {
            DatabaseMetaData metaData = connection.getMetaData();
            FilterDescriptor filter = new FilterDescriptor(request, supportsCatalogsWithoutSchemas(this.config.getDialect(), metaData));
            ResultSet tablesResult = metaData.getTables(filter.catalogName, filter.schemaName, filter.tableName, (String[]) null);
            Throwable throwable = null;
 
            JdbcFetcherProto.CanonicalizeTablePathResponse canonicalizeTablePathResponse;
            try {
                String currSchema;
                do {
                    if (!tablesResult.next()) {
                        return JdbcFetcherProto.CanonicalizeTablePathResponse.getDefaultInstance();
                    }
                    currSchema = tablesResult.getString(2);
                } while (!Strings.isNullOrEmpty(currSchema) && this.config.getHiddenSchemas().contains(currSchema));
                com.dremio.exec.store.jdbc.JdbcFetcherProto.CanonicalizeTablePathResponse.Builder responseBuilder = JdbcFetcherProto.CanonicalizeTablePathResponse.newBuilder();
                // cratedb not support catalog,but default implement fetch it so omit it
                if (!Strings.isNullOrEmpty(currSchema)) {
                    responseBuilder.setSchema(currSchema);
                }
                responseBuilder.setTable(tablesResult.getString(3));
                canonicalizeTablePathResponse = responseBuilder.build();
            } catch (Throwable ex) {
                throwable = ex;
                throw ex;
            } finally {
                if (tablesResult != null) {
                    try {
                        closeResource(throwable, tablesResult);
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
 
            }
            return canonicalizeTablePathResponse;
        }
 
        private static void closeResource(Throwable throwable, AutoCloseable autoCloseable) throws Exception {
            if (throwable != null) {
                try {
                    autoCloseable.close();
                } catch (Throwable throwable1) {
                    throwable.addSuppressed(throwable1);
                }
            } else {
                autoCloseable.close();
            }
 
        }
 
        protected static class FilterDescriptor {
            private final String catalogName;
            private final String schemaName;
            private final String tableName;
 
            public FilterDescriptor(JdbcFetcherProto.CanonicalizeTablePathRequest request, boolean hasCatalogsWithoutSchemas) {
                this.tableName = request.getTable();
                if (!Strings.isNullOrEmpty(request.getSchema())) {
                    this.schemaName = request.getSchema();
                    this.catalogName = request.getCatalogOrSchema();
                } else {
                    this.catalogName = hasCatalogsWithoutSchemas ? request.getCatalogOrSchema() : "";
                    this.schemaName = hasCatalogsWithoutSchemas ? "" : request.getCatalogOrSchema();
                }
 
            }
        }
    }
 
    static class CratedbDialect extends ArpDialect {
        public CratedbDialect(ArpYaml yaml) {
            super(yaml);
        }
 
        @Override
        public ArpSchemaFetcher newSchemaFetcher(JdbcPluginConfig config) {
            String query = String.format("SELECT NULL, SCH, NME from ( select table_catalog CAT, table_schema SCH, table_name NME from information_schema.\"tables\" union all select table_catalog CAT, table_schema SCH,table_name NME from information_schema.views ) t where cat not in ('information_schema','pg_catalog','sys', '%s')", new Object[]{Joiner.on("','").join(config.getHiddenSchemas())});
            return new CratedbSchemaFetcherV2(query, config);
        }
 
        @Override
        public ContainerSupport supportsCatalogs() {
            return ContainerSupport.UNSUPPORTED;
        }
 
        @Override
        public boolean supportsNestedAggregations() {
            return false;
        }
    }
 
    @Tag(1)
    @DisplayMetadata(label = "username")
    @NotMetadataImpacting
    public String username = "crate";
 
    @Tag(2)
    @DisplayMetadata(label = "host")
    public String host;
 
    @Tag(3)
    @Secret
    @DisplayMetadata(label = "password")
    @NotMetadataImpacting
    public String password = "";
 
    @Tag(4)
    @DisplayMetadata(label = "port")
    @NotMetadataImpacting
    public int port = 5432;
 
    @Tag(5)
    @DisplayMetadata(label = "Record fetch size")
    @NotMetadataImpacting
    public int fetchSize = 200;
 
 
    @Tag(6)
    @DisplayMetadata(
            label = "Maximum idle connections"
    )
    @NotMetadataImpacting
    public int maxIdleConns = 8;
 
    @Tag(7)
    @DisplayMetadata(
            label = "Connection idle time (s)"
    )
    @NotMetadataImpacting
    public int idleTimeSec = 60;
 
    @VisibleForTesting
    public String toJdbcConnectionString() {
        checkNotNull(this.username, "Missing username.");
        // format crate://localhost:5433/
        final String format = String.format("crate://%s:%d/", this.host, this.port);
        return format;
    }
 
    @Override
    @VisibleForTesting
    public JdbcPluginConfig buildPluginConfig(
            JdbcPluginConfig.Builder configBuilder,
            CredentialsService credentialsService,
            OptionManager optionManager
    ) {
 
        return configBuilder.withDialect(getDialect())
                .withFetchSize(fetchSize)
                .clearHiddenSchemas()
                .addHiddenSchema("sys")
                .withDatasourceFactory(this::newDataSource)
                .build();
    }
 
    private CloseableDataSource newDataSource() {
        Properties properties = new Properties();
        CloseableDataSource dataSource = DataSources.newGenericConnectionPoolDataSource(DRIVER,
                toJdbcConnectionString(), this.username, this.password, properties, DataSources.CommitMode.DRIVER_SPECIFIED_COMMIT_MODE, this.maxIdleConns, this.idleTimeSec);
        return dataSource;
    }
 
    @Override
    public ArpDialect getDialect() {
        return ARP_DIALECT;
    }
}

单元测试

  • 参考pom.xml
    核心是pom 依赖,注意jackson 版本以及jdk 版本,最好使用8
 
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
 
    <groupId>com.dalong</groupId>
    <artifactId>cratedb-driver</artifactId>
    <version>${version.dremio}</version>
 
    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <version.dremio>20.0.0-202201050826310141-8cc7162b</version.dremio>
    </properties>
   <dependencies>
       <dependency>
           <groupId>com.dremio.community.plugins</groupId>
           <artifactId>dremio-ce-jdbc-plugin</artifactId>
           <version>${version.dremio}</version>
           <scope>compile</scope>
       </dependency>
       <dependency>
           <groupId>io.crate</groupId>
           <artifactId>crate-jdbc</artifactId>
           <version>2.6.0</version>
       </dependency>
       <dependency>
           <groupId>com.dremio.sabot</groupId>
           <artifactId>dremio-sabot-kernel</artifactId>
           <classifier>tests</classifier>
           <scope>test</scope>
           <version>${version.dremio}</version>
       </dependency>
       <dependency>
           <groupId>com.dremio</groupId>
           <artifactId>dremio-common</artifactId>
           <classifier>tests</classifier>
           <scope>test</scope>
           <version>${version.dremio}</version>
       </dependency>
       <dependency>
           <groupId>com.fasterxml.jackson.core</groupId>
           <artifactId>jackson-databind</artifactId>
           <version>2.11.4</version>
           <optional>true</optional>
       </dependency>
       <dependency>
           <groupId>org.mockito</groupId>
           <artifactId>mockito-core</artifactId>
           <scope>test</scope>
           <version>3.12.4</version>
       </dependency>
       <dependency>
           <groupId>junit</groupId>
           <artifactId>junit</artifactId>
           <version>4.13.2</version>
           <scope>test</scope>
       </dependency>
   </dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.2.3</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <artifactSet>
                                <includes>
                                    <include>io.crate:crate-jdbc</include>
                                </includes>
                            </artifactSet>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
    <repositories>
        <repository>
            <id>tencent-public</id>
            <url>http://mirrors.cloud.tencent.com/nexus/repository/maven-public/</url>
        </repository>
        <repository>
            <id>dremio-public</id>
            <url>http://maven.dremio.com/public/</url>
        </repository>
        <repository>
            <id>dremio-free</id>
            <url>http://maven.dremio.com/free/</url>
        </repository>
    </repositories>
</project> 
  • 简单代码
    继承 BaseTestQuery 就可以实现测试了,还是很简单的
 
package com.dremio.jdbc;
 
import com.dremio.BaseTestQuery;
import com.dremio.TestResult;
import com.dremio.exec.ExecConstants;
import com.dremio.exec.store.CatalogService;
import com.dremio.exec.store.jdbc.conf.CrateConf;
import com.dremio.options.OptionValue;
import com.dremio.service.namespace.source.proto.SourceConfig;
import org.junit.Before;
import org.junit.Test;
 
public class MyPluginTest extends BaseTestQuery {
    private CrateConf crateConf;
 
    @Before
    public  void initSource(){
        getSabotContext().getOptionManager().setOption(OptionValue.createLong(OptionValue.OptionType.SYSTEM, ExecConstants.ELASTIC_ACTION_RETRIES, 3));
        SourceConfig sc = new SourceConfig();
        sc.setName("cratedb");
        crateConf  = new CrateConf();
        crateConf.host="127.0.0.1";
        crateConf.port=5433;
        crateConf.username="crate";
        sc.setConnectionConf(crateConf);
        sc.setMetadataPolicy(CatalogService.DEFAULT_METADATA_POLICY);
        getSabotContext().getCatalogService().createSourceIfMissingWithThrow(sc);
    }
 
    @Test
    public  void test() throws Exception {
        String query  = "select * from cratedb.doc.demoapp";
        TestResult testResult=  testBuilder()
                .sqlQuery(query)
                .unOrdered()
                .baselineColumns("id", "name")
                .baselineValues(null,    null)
                .go();
    }
}
 
 

参考使用

  • 构建
mvn clean package -Dmaven.test.skip
  • 安装arp
    拷贝jar 到dremio 的jars 中,因为已经内置了cratedb驱动,不需要额外处理了

 

 

 

 


 

 

说明

目前版本的并不是特别的好,但是比较符合官方合理的arp 开发了,具体代码参考github,目前还缺少关于用户密码的,后期完善
以前关于dremio 级联模式的处理也是类似的

参考资料

https://www.dremio.com/resources/tutorials/how-to-create-an-arp-connector/
https://github.com/rongfengliang/cratedb-dremio-connector

posted on 2022-01-27 23:39  荣锋亮  阅读(66)  评论(0编辑  收藏  举报

导航