基于odbc 开发一个高性能的dremio cube.js driver

我以前简单写过,基于odbc 的cube.js driver 开发简单说明,一直没有实现, 最近研究下了,发现实现起来还是很简单的
所以就写了一个,方便使用,对比下来性能提升是很大的

参考开发

核心是基于odbc 对于query以及testConnection 的实现,部分借鉴了http 协议的实现

  • driver 代码
 
const odbc = require('odbc')
const SqlString = require('sqlstring');
const { BaseDriver } = require('@cubejs-backend/base-driver');
const DremioQuery = require('./DremioQuery');
const applyParams = (query, params) => SqlString.format(query, params);
 
class DremioDriver extends BaseDriver {
 
  static dialectClass() {
    return DremioQuery;
  }
  /**
     * Returns default concurrency value.
     */
  static getDefaultConcurrency() {
    return 2;
  }
  /**
 * Class constructor.
 */
  constructor(config = {}) {
    super();
    this.config = {
      connectionString:
        config.connectionString || '',
      loginTimeout:
        config.loginTimeout || 10,
      connectionTimeout:
        config.connectionTimeout || 10,
      ...config
    };
    this.pool = odbc.pool(this.config)
  }
 
  /**
   * @public
   * @return {Promise<void>}
   */
  async testConnection() {
    try {
      const pool = await this.pool;
      return await pool.query('SELECT 1');
    } catch (e) {
      throw e;
    }
  }
 
  quoteIdentifier(identifier) {
    return `"${identifier}"`;
  }
 
  async query(query, values) {
    const queryString = applyParams(
      query,
      (values || []).map(s => (typeof s === 'string' ? {
        toSqlString: () => SqlString.escape(s).replace(/\\\\\\([_%])/g, '\\$1').replace(/\\'/g, '\'\'')
      } : s))
    );
    try {
      const pool = await this.pool;
      const res = await pool.query(queryString);
      return res;
    } catch (error) {
      throw new Error(
        `Dremio odbc Query error,${error.message}`,
      );
    }
  }
 
  informationSchemaQuery() {
    const q = `SELECT columns.column_name as ${this.quoteIdentifier('column_name')},
    columns.table_name as ${this.quoteIdentifier('table_name')},
    columns.table_schema as ${this.quoteIdentifier('table_schema')},
    columns.data_type as ${this.quoteIdentifier('data_type')}
FROM information_schema.columns
join information_schema.views
on columns.table_name=views.table_name
WHERE columns.table_schema NOT IN ('information_schema', 'mysql', 'performance_schema', 'sys')  AND columns.table_schema NOT IN ('INFORMATION_SCHEMA', 'sys.cache')`;
    return q;
  }
}
module.exports = DremioDriver;

参考使用

  • cube.js 配置
// Cube.js configuration options: https://cube.dev/docs/config
const { DremioDriver, DremioQuery } = require("dremio-odbc-cubejs-driver")
module.exports = {
    dialectFactory: (dataSource) => {
        return DremioQuery
    },
    dbType: ({ dataSource } = {}) => {
        return "dremio-odbc"
    },
    driverFactory: ({ dataSource } = {}) => {
        const username = "xxxx";
        const password = "xxxxx"
        const host = "xxxxxxxx"
        const port = 32010
        const ssl = false
        const database = "xxxxxxx"
        const connectionConfig = {
          # 此路径实际上是odbc dsn 的配置,我使用了绝对路径,同时是mac 的配置,不同系统的不一样,sql 协议支持基于了apache arrow flight sql 
            connectionString: `DRIVER=/Library/Dremio/ODBC/lib/libarrow-flight-sql-odbc.dylib;Host=${host};ConnectionType=Direct;Schema=${database};Port=${port};useEncryption=${ssl};UID=${username};PWD=${password}`,
            connectionTimeout: 10,
            loginTimeout: 10,
        }
        return new DremioDriver(connectionConfig)
    }
};

说明

相关npm 包我已经发布到npm 官方仓库了,对于基于dremio+cube.js 进行数据bi 分析的同学,可以直接使用,同时也欢迎提pr以及issue
具体npm 包为dremio-odbc-cubejs-driver 可以直接搜索到,目前npm 版本号与官方保持一致,但是不确保都是可以兼容的(尽量多更新)

参考资料

https://github.com/rongfengliang/cubejs-dremio-odbc-driver
https://www.npmjs.com/package/odbc
https://github.com/cube-js/cube.js/tree/master/packages
https://www.cnblogs.com/rongfengliang/p/16037324.html
https://www.npmjs.com/package/dremio-odbc-cubejs-driver

posted on 2023-03-02 19:37  荣锋亮  阅读(53)  评论(0编辑  收藏  举报

导航