dremio 自定义arp 开发的几个细节
官方关于arp的开发说明比较简单,如果需要支持一个兼容jdbc 的数据库开发还是有一些问题需要解决的
标准格式
- conf 类
主要包含了关于UI,以及方言类的定义,同时也包含了数据源的管理,schema 的处理也比较重要 - 参考格式
@SourceType(value = "SNOWFLAKE", label = "Snowflake")
public class SnowflakeConf extends AbstractArpConf<SnowflakeConf> {
// 类型映射处理的,简化类型处理
private static final String ARP_FILENAME = "arp/implementation/snowflake-arp.yaml";
private static final ArpDialect ARP_DIALECT =
AbstractArpConf.loadArpFile(ARP_FILENAME, (SnowflakeDialect::new));
private static final String DRIVER = "net.snowflake.client.jdbc.SnowflakeDriver";
private static Logger logger = Logger.getLogger(SnowflakeConf.class);
// 此类也可以直接使用ArpSchemaFetcher 可以简化开发,比如自定义schema 查询的获取
static class SnowflakeSchemaFetcher extends JdbcSchemaFetcherImpl {
public SnowflakeSchemaFetcher(JdbcPluginConfig config) {
super(config);
}
protected boolean usePrepareForColumnMetadata() {
return true;
}
}
// 方言实现类,包含了schema 的获取,以及数据源创建,ui 定义,buildPluginConfig 是一个比较重要的方法
static class SnowflakeDialect extends ArpDialect {
public SnowflakeDialect(ArpYaml yaml) {
super(yaml);
}
@Override
public JdbcSchemaFetcherImpl newSchemaFetcher(JdbcPluginConfig config) {
return new SnowflakeSchemaFetcher(config);
}
public boolean supportsNestedAggregations() {
return false;
}
}
/*
Check Snowflake JDBC connection docs for more details: https://docs.snowflake.net/manuals/user-guide/jdbc-configure.html
*/
@Tag(1)
@DisplayMetadata(label = "JDBC URL (Ex: jdbc:snowflake://<account_name>.snowflakecomputing.com/?param1=value¶m2=value)")
public String jdbcURL;
@Tag(2)
@DisplayMetadata(label = "Username")
public String username;
@Tag(3)
@Secret
@DisplayMetadata(label = "Password")
public String password;
@Tag(4)
@DisplayMetadata(label = "Record fetch size")
@NotMetadataImpacting
public int fetchSize = 2000;
@Tag(5)
@NotMetadataImpacting
@DisplayMetadata(label = "Grant External Query access (External Query allows creation of VDS from a Snowflake query. Learn more here: https://docs.dremio.com/data-sources/external-queries.html#enabling-external-queries)")
public boolean enableExternalQuery = false;
@VisibleForTesting
public String toJdbcConnectionString() {
checkNotNull(this.jdbcURL, "JDBC URL is required");
return jdbcURL;
}
@Override
@VisibleForTesting
public JdbcPluginConfig buildPluginConfig(
JdbcPluginConfig.Builder configBuilder,
CredentialsService credentialsService,
OptionManager optionManager
){
logger.info("Connecting to Snowflake");
return configBuilder.withDialect(getDialect())
.withFetchSize(fetchSize)
.withDatasourceFactory(this::newDataSource)
.clearHiddenSchemas()
.addHiddenSchema("SYSTEM")
.withAllowExternalQuery(enableExternalQuery)
.build();
}
private CloseableDataSource newDataSource() {
return DataSources.newGenericConnectionPoolDataSource(DRIVER,
toJdbcConnectionString(), username, password, null,
DataSources.CommitMode.DRIVER_SPECIFIED_COMMIT_MODE);
}
@Override
public ArpDialect getDialect() {
return ARP_DIALECT;
}
@VisibleForTesting
public static ArpDialect getDialectSingleton() {
return ARP_DIALECT;
}
}
几个问题
- 参考流程
如下调用链, schema 对于dremio 还是很重要的,需要参考此我们可以了解处理流程,方便arp 开发的调试
at org.apache.commons.dbcp2.DelegatingStatement.executeQuery(DelegatingStatement.java:206)
at org.apache.commons.dbcp2.DelegatingStatement.executeQuery(DelegatingStatement.java:206)
at com.dremio.exec.store.jdbc.JdbcSchemaFetcherImpl.executeQueryAndGetFirstLong(JdbcSchemaFetcherImpl.java:357)
at com.dremio.exec.store.jdbc.JdbcSchemaFetcherImpl.getRowCount(JdbcSchemaFetcherImpl.java:311)
at com.dremio.exec.store.jdbc.JdbcDatasetMetadata.build(JdbcDatasetMetadata.java:85)
at com.dremio.exec.store.jdbc.JdbcSchemaFetcherImpl.getTableMetadata(JdbcSchemaFetcherImpl.java:643)
at com.dremio.exec.store.jdbc.JdbcStoragePlugin$JdbcDatasetHandle.getTableMetadataResponse(JdbcStoragePlugin.java:505)
at com.dremio.exec.store.jdbc.JdbcStoragePlugin.listPartitionChunks(JdbcStoragePlugin.java:270)
at com.dremio.exec.catalog.DatasetSaver.save(DatasetSaver.java:93)
at com.dremio.exec.catalog.DatasetSaver.save(DatasetSaver.java:142)
at com.dremio.exec.catalog.DatasetManager.getTableFromPlugin(DatasetManager.java:364)
at com.dremio.exec.catalog.DatasetManager.getTable(DatasetManager.java:209)
at com.dremio.exec.catalog.CatalogImpl.getTable(CatalogImpl.java:194)
at com.dremio.exec.catalog.SourceAccessChecker.lambda$getTable$3(SourceAccessChecker.java:121)
at com.dremio.exec.catalog.SourceAccessChecker.getIfVisible(SourceAccessChecker.java:90)
at com.dremio.exec.catalog.SourceAccessChecker.getTable(SourceAccessChecker.java:121)
at com.dremio.exec.catalog.DelegatingCatalog.getTable(DelegatingCatalog.java:83)
at com.dremio.exec.catalog.CachingCatalog.getTable(CachingCatalog.java:93)
- catlog && schema 处理
这个可以基于配置,同时对于特定数据库我们可能需要开发自己的schema fetch
配置参考
syntax:
identifier_quote: '"'
allows_boolean_literal: true
inject_numeric_cast_project: true
supports_catalogs: false
supports_schemas: true