python连接clickhouse常用的三种方式
推荐运行环境
- python 3.8.3
- clickhouse_driver==0.2.3
- clickhouse_sqlalchemy==0.2.0
- sqlalchemy==1.4.32
一、clickhouse_driver连接的两种方式
注意端口都使用tcp端口9000
1.Client
from clickhouse_driver import Client client = Client(host=host, port=9000, database=database,user=user ,password=pw) sql = 'SHOW TABLES' res = client.execute(sql)
2.connect
from clickhouse_driver import connect #账号:密码@主机名:端口号/数据库 conn = connect(f'clickhouse://{user}:{pw}@{host}:9000/{database}') cursor = conn.cursor() cursor.execute('SHOW TABLES')
二、clickhouse_sqlalchemy连接方式
使用较复杂,推荐使用上述两种,注意使用端口为http端口8123。
from clickhouse_sqlalchemy import make_session from sqlalchemy import create_engine import pandas as pd conf = { "user": "xxx", "password": "xxx", "server_host": "xx.xxx.xx.xxx", "port": "8123", "db": "xxx" } connection = 'clickhouse://{user}:{password}@{server_host}:{port}/{db}'.format(**conf) engine = create_engine(connection, pool_size=100, pool_recycle=3600, pool_timeout=20) sql = 'SHOW TABLES' session = make_session(engine) cursor = session.execute(sql) try: fields = cursor._metadata.keys df = pd.DataFrame([dict(zip(fields, item)) for item in cursor.fetchall()]) finally: cursor.close() session.close()
简单连接示例:
#!/usr/bin/env python # coding: utf-8 # In[24]: import pandas as pd import numpy as np import datetime import os from email.mime.multipart import MIMEMultipart import smtplib from email.mime.text import MIMEText np.set_printoptions(suppress=True) # 取消科学计数法输出 from clickhouse_driver import Client client = Client(host='x.x.x.x',port='9000',user='xxx',password='xxxxxx') today = datetime.datetime.today().strftime('%Y%m%d') result_path = "/home/ad/workanalysis/result/" print(result_path) print(today) isExists=os.path.exists(result_path) if not isExists: os.makedirs(result_path) result_name = "失败记录_"+today+".xlsx" result_xlsx = result_path+result_name print(result_xlsx) dn_sql =''' SELECT a.SEQ,a.MER_NAME,b.MID,a.MER_ID,a.AMOUNT,a.SETTLE_AMOUNT,(CASE WHEN a.FAIL_REASON='打款系统返回错误:失败,数据已变更,无法修改新结算信息' THEN c.FAIL_REASON ELSE a.FAIL_REASON END),a.CREATE_TIME,a.BATCH_DATE FROM (SELECT*FROM pe_data_prod.dwd_settle_batch_detail_allattr WHERE SETTLE_STATUS='failed' AND CURRENT_FLAG='current' AND MER_ID IN (SELECT M_PEM_NO FROM isv_data_prod.dwd_merchant_allattr WHERE M_RELATIONSHIP_NAME LIKE '%V0008724%')) a LEFT JOIN (SELECT*FROM isv_data_prod.dwd_merchant_allattr WHERE M_RELATIONSHIP_NAME LIKE '%V0008724%' AND M_PEM_NO IN (SELECT MER_ID FROM pe_data_prod.dwd_settle_batch_detail_allattr)) b ON a.MER_ID=b.M_PEM_NO LEFT JOIN (SELECT SEQ,SEQ_SUBSTR,FAIL_REASON FROM (SELECT SEQ,SEQ_SUBSTR,FAIL_REASON,row_number () OVER (PARTITION BY SEQ_SUBSTR ORDER BY CREATE_TIME DESC) AS rank FROM (SELECT*FROM pe_data_prod.dwd_settle_batch_detail_allattr WHERE SEQ_SUBSTR IN (SELECT SEQ_SUBSTR FROM pe_data_prod.dwd_settle_batch_detail_allattr WHERE SETTLE_STATUS='failed' AND CURRENT_FLAG='current' AND (FAIL_REASON='打款系统返回错误:失败,数据已变更,无法修改新结算信息' OR FAIL_REASON IS NULL)) AND FAIL_REASON !='打款系统返回错误:失败,数据已变更,无法修改新结算信息') ORDER BY SEQ_SUBSTR ASC SETTINGS allow_experimental_window_functions=1) AS a WHERE a.rank='1') c ON a.SEQ_SUBSTR=c.SEQ_SUBSTR ''' dn_all = client.execute(dn_sql) dn_all=pd.DataFrame(dn_all,columns= ["SEQ","MER_NAME","MID","MER_ID","AMOUNT","SETTLE_AMOUNT","FAIL_REASON","CREATE_TIME","BATCH_DATE"]) #wx.to_excel(result_xlsx) dn_all.rename(columns = {'SEQ':'打款流水号','MER_NAME':'商户名称','MID':'商户M商编','MER_ID':'商户PE商编','AMOUNT':'应打款金额','SETTLE_AMOUNT':'实际打款金额','FAIL_REASON':'打款失败原因','CREATE_TIME':'创建时间','BATCH_DATE':'订单结算日期'}, inplace=True) print(dn_all) #wx.to_excel(result_xlsx) writer=pd.ExcelWriter(result_xlsx) dn_all.to_excel(writer,sheet_name='xxxx',index=0) writer.save() #writer.close() receivers = ['xxxx@126.com'] cc_mail = ['xxxx@126.com'] sender = "xxx@126.com" mail_pass = "xxxx" mail_subject = "失败记录" mail_context = "您好:\n 附件明细数据为 :"+result_name msg = MIMEMultipart() msg["From"] = sender msg["To"] = ";".join(receivers) msg['Cc'] = ','.join(cc_mail) msg["Subject"] = mail_subject msg.attach(MIMEText(mail_context, 'plain', 'utf-8')) att = MIMEText(open(writer, "rb").read(), "base64", "utf-8") att["Content-Type"] = "application/octet-stream" att.add_header("Content-Disposition", "attachment", filename=("gbk", "", result_name)) msg.attach(att) try: smtpObj = smtplib.SMTP_SSL('x.x.x.x', 465) smtpObj.login('xxxx@126.com', 'xxxxxx') smtpObj.sendmail(sender, receivers+cc_mail, msg.as_string()) print('Success!') smtpObj.quit() except smtplib.SMTPException as e: print(e) # In[ ]: