基于odbc 开发一个高性能的dremio cube.js driver
我以前简单写过,基于odbc 的cube.js driver 开发简单说明,一直没有实现, 最近研究下了,发现实现起来还是很简单的
所以就写了一个,方便使用,对比下来性能提升是很大的
参考开发
核心是基于odbc 对于query以及testConnection 的实现,部分借鉴了http 协议的实现
- driver 代码
const odbc = require('odbc')
const SqlString = require('sqlstring');
const { BaseDriver } = require('@cubejs-backend/base-driver');
const DremioQuery = require('./DremioQuery');
const applyParams = (query, params) => SqlString.format(query, params);
class DremioDriver extends BaseDriver {
static dialectClass() {
return DremioQuery;
}
/**
* Returns default concurrency value.
*/
static getDefaultConcurrency() {
return 2;
}
/**
* Class constructor.
*/
constructor(config = {}) {
super();
this.config = {
connectionString:
config.connectionString || '',
loginTimeout:
config.loginTimeout || 10,
connectionTimeout:
config.connectionTimeout || 10,
};
this.pool = odbc.pool(this.config)
}
/**
* @public
* @return {Promise<void>}
*/
async testConnection() {
try {
const pool = await this.pool;
return await pool.query('SELECT 1');
} catch (e) {
throw e;
}
}
quoteIdentifier(identifier) {
return `"${identifier}"`;
}
async query(query, values) {
const queryString = applyParams(
query,
(values || []).map(s => (typeof s === 'string' ? {
toSqlString: () => SqlString.escape(s).replace(/\\\\\\([_%])/g, '\\$1').replace(/\\'/g, '\'\'')
} : s))
);
try {
const pool = await this.pool;
const res = await pool.query(queryString);
return res;
} catch (error) {
throw new Error(
`Dremio odbc Query error,${error.message}`,
);
}
}
informationSchemaQuery() {
const q = `SELECT columns.column_name as ${this.quoteIdentifier('column_name')},
columns.table_name as ${this.quoteIdentifier('table_name')},
columns.table_schema as ${this.quoteIdentifier('table_schema')},
columns.data_type as ${this.quoteIdentifier('data_type')}
FROM information_schema.columns
join information_schema.views
on columns.table_name=views.table_name
WHERE columns.table_schema NOT IN ('information_schema', 'mysql', 'performance_schema', 'sys') AND columns.table_schema NOT IN ('INFORMATION_SCHEMA', 'sys.cache')`;
return q;
}
}
module.exports = DremioDriver;
参考使用
- cube.js 配置
// Cube.js configuration options: https://cube.dev/docs/config
const { DremioDriver, DremioQuery } = require("dremio-odbc-cubejs-driver")
module.exports = {
dialectFactory: (dataSource) => {
return DremioQuery
},
dbType: ({ dataSource } = {}) => {
return "dremio-odbc"
},
driverFactory: ({ dataSource } = {}) => {
const username = "xxxx";
const password = "xxxxx"
const host = "xxxxxxxx"
const port = 32010
const ssl = false
const database = "xxxxxxx"
const connectionConfig = {
# 此路径实际上是odbc dsn 的配置,我使用了绝对路径,同时是mac 的配置,不同系统的不一样,sql 协议支持基于了apache arrow flight sql
connectionString: `DRIVER=/Library/Dremio/ODBC/lib/libarrow-flight-sql-odbc.dylib;Host=${host};ConnectionType=Direct;Schema=${database};Port=${port};useEncryption=${ssl};UID=${username};PWD=${password}`,
connectionTimeout: 10,
loginTimeout: 10,
}
return new DremioDriver(connectionConfig)
}
};
说明
相关npm 包我已经发布到npm 官方仓库了,对于基于dremio+cube.js 进行数据bi 分析的同学,可以直接使用,同时也欢迎提pr以及issue
具体npm 包为dremio-odbc-cubejs-driver
可以直接搜索到,目前npm 版本号与官方保持一致,但是不确保都是可以兼容的(尽量多更新)
参考资料
https://github.com/rongfengliang/cubejs-dremio-odbc-driver
https://www.npmjs.com/package/odbc
https://github.com/cube-js/cube.js/tree/master/packages
https://www.cnblogs.com/rongfengliang/p/16037324.html
https://www.npmjs.com/package/dremio-odbc-cubejs-driver