深度求索的DeepSeek 作为 DuckDB 的自然语言接口
2023年3月,使用 OpenAI 的 ChatGPT 作为 DuckDB 的自然语言接口
已有使用 OpenAI’s ChatGPT API,自然语言来使用Duckdb,
https:
QuackingDuck: Using OpenAI's ChatGPT as Natural Language Interface for DuckDB
原始代码的地址 : https:
使用DuckDB数据库作为演示的demo,
DuckDB数据库在进程中运行,并且可以使用简单的 pip install 命令进行安装,因此它非常适合小型独立演示
2025年02月,趁着Deepseek的API开发平台,也来使用一下,
代码参考Ask QuackingDuck。主要逻辑没有变动,在这里仅做学习使用
目前DeepSeek资源紧张,所以会出现API 返回空行的现象,以及
限速 非流式请求:持续返回空行
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
这个问题,后续DS资源稳定后会更好的,也可以在代码中加一些处理的逻辑
使用程序要申请DeepSeek 的 API-key
OpenAI的接口
OpenAI的 ChatCompletion 和 Completion 都是自然语言生成模型的接口,但它们的用途和应用场景略有不同
from openai import OpenAI
client = OpenAI(api_key="<DeepSeek API Key>", base_url="https://api.deepseek.com")
response = client.chat.completions.create(
response.choices[0].message.content.strip("\n")
import openai
openai.ChatCompletion.create
2023年3月2日,OpenAI 放出了一个直接可以进行对话聊天的接口。这个接口叫做 ChatCompletion
防止超标,可能需要在对话开始前设置一个允许最大的token阈值,比如 MAX_TOKEN_LIMIT = 2000,
再设置一个小于某个数量就需要提醒的警告值,比如 MIN_TOKEN_LIMIT = 200,对话前初
messages必须是消息对象的数组,其中每个对象都有一个角色(“system”、“user”、“assistant”)和内容(消息的内容)
openai.api_key = ""
openai.Model.list()
)["choices"][0]["message"]["content"].strip("\n")
代码示例
import os
import duckdb
from openai import OpenAI
auto_sk = r"sk-*****************"
client = OpenAI(api_key=auto_sk, base_url="https://api.deepseek.com")
model_nm = "deepseek-chat"
class QuackingDuck:
def __init__(self, conn):
self.conn = conn
self.schemas = self._get_schemas()
def _get_schemas(self):
tables = self.conn.execute("PRAGMA show_tables_expanded").fetchall()
schemas = ""
for table in tables:
name = table[2]
columns = [f"{table[3][i]}({table[4][i]})" for i in range(len(table[3]))]
first_rows_md = self.conn.execute(f"SELECT * from {name} LIMIT 1;").fetchdf().to_markdown()
schemas = schemas + f"{name} ({', '.join(columns)}):\n5 row sample:\n" + first_rows_md + "\n" + "\n"
return schemas
def explain_content(self, detail="one sentence"):
print(self._schema_summary_internal(detail)[1])
def _schema_summary_internal(self, detail="one sentence"):
prompt = f"""SQL schema of my database:{self.schemas} Explain in {detail} what the data is about:
"""
explanation = client.chat.completions.create(
model=model_nm,
messages=[{"role": "system",
"content": "你是一个乐于助人的助手,你可以根据数据库的元数据生成人类可阅读的关于数据库详情的总结"},
{"role": "user", "content": prompt}, ],
temperature=0,
).choices[0].message.content.strip("\n")
return prompt, explanation
def _generate_sql(self, question, debug=False):
(summary_prompt, summary) = self._schema_summary_internal()
sql_prompt = f"""输出一个SQL查询语句,不需要任何解释也不要输出和查询无关的语句,除非这个问题不是关于数据库的数据的,这种情况下回复我不知道
确保只使用来自之前提到的元数据中的表格和字段同时写一个查询语句回答以下问题:"{question}"
"""
sql_query = client.chat.completions.create(
model=model_nm,
messages=[{"role": "system",
"content": "你是一个乐于助人的助手,你可以根据用户的输入生成 Postgresql代码,你只要给出SQL代码,不给出其任何他人类可阅读文本解释"},
{"role": "user", "content": summary_prompt},
{"role": "assistant", "content": summary},
{"role": "user", "content": sql_prompt},
],
temperature=0,
).choices[0].message.content.strip("\n")
if debug:
print("Prompt: \n" + sql_prompt)
print("SQL Query: \n" + sql_query)
if "I don't know" in sql_query:
raise Exception("Question cannot be answered based on the data in the database.")
return summary_prompt, summary, sql_prompt, sql_query
def _regenerate_sql(self, content_prompt, content_summary, sql_prompt, sql_query, error, debug=False):
sql_query = client.chat.completions.create(
model=model_nm,
messages=[
{"role": "system",
"content": "你是一个乐于助人的助手,你可以根据用户的输入生成 Postgresql代码,你只要给出SQL代码,不给出其任何他人类可阅读文本解释"},
{"role": "user", "content": content_prompt},
{"role": "assistant", "content": content_summary},
{"role": "user", "content": sql_prompt},
{"role": "assistant", "content": sql_query},
{"role": "user",
"content": f"我得到了一个这样的错误: {error}. 请改正这个查询语句并且只给出SQL代码"},
],
temperature=0,
).choices[0].message.content.strip("\n")
if debug:
print("Corrected SQL Query: \n" + sql_query)
return sql_query
def ask(self, question, debug=False):
summary_prompt, summary, sql_prompt, sql_query = self._generate_sql(question, debug)
try:
result = self.conn.execute(sql_query).fetchdf()
result_markdown = result.head(10).to_markdown()
except Exception as e:
print("查询语句有一个错误: " + str(e) + "\n 将尝试修改它.\n")
sql_query = self._regenerate_sql(summary_prompt, summary, sql_prompt, sql_query, str(e), debug)
result = self.conn.execute(sql_query).fetchdf()
result_markdown = result.head(10).to_markdown()
answer_prompt = f"""查询结果: {result_markdown}根据查询结果的信息,用自然语言回答问题. """
answer = client.chat.completions.create(
model=model_nm,
messages=[
{"role": "system", "content": "你是一个乐于助人的助."},
{"role": "user", "content": summary_prompt},
{"role": "assistant", "content": summary},
{"role": "user", "content": sql_prompt},
{"role": "assistant", "content": sql_query},
{"role": "user", "content": answer_prompt},
],
temperature=0,
).choices[0].message.content.strip("\n")
if debug:
print("debug :Prompt: \n" + answer_prompt)
print("debug :Answer: \n" + answer)
else:
print(answer)
return result
con = duckdb.connect()
con.execute('''
CREATE TABLE IF NOT EXISTS customers (
customer_id char(10) NOT NULL,
firstname varchar(32) default NULL,
lastname varchar(32) default NULL,
city varchar(32) default NULL,
address varchar(128) default NULL,
email varchar(128) default NULL,
PRIMARY KEY (customer_id) )
''')
con.execute('''
CREATE TABLE IF NOT EXISTS laptops (
model char(4) NOT NULL default '',
speed double default NULL,
ram int default NULL,
hd int default NULL,
screen double default NULL,
price double default NULL,
PRIMARY KEY (model) )
''')
con.execute('''
CREATE TABLE IF NOT EXISTS pcs (
model char(4) NOT NULL,
speed double NOT NULL,
ram int NOT NULL,
hd int NOT NULL,
price double NOT NULL,
PRIMARY KEY (model) )
''')
con.execute('''CREATE TABLE IF NOT EXISTS products (
maker char(1) default NULL,
model char(4) NOT NULL default '',
type varchar(10) default NULL,
PRIMARY KEY (model) )
''')
con.execute('''CREATE TABLE IF NOT EXISTS sales (
customer_id char(10) NOT NULL default '',
model char(4) NOT NULL default '',
quantity int default NULL,
day date NOT NULL default '0000-00-00',
paid double default NULL,
type_of_payment varchar(32) default NULL,
PRIMARY KEY (customer_id,model,day) )
''')
con.execute('''
INSERT INTO customers (customer_id, firstname, lastname, city, address, email)
VALUES
('1122334455', '赵', '天', '京', '胡同', 'ann@123.com'),
('1231231231', '钱', '地', '沪', '弄堂', NULL),
('1234567890', '孙', '玄', '穗深', '巷子', NULL),
('9876543210', '李', '黄', '宁汉', '垵', 'jack@123.com'),
('9999999999', '周', '宇', '蓉杭', '洞', 'nj@123.com');
''')
con.execute('''
INSERT INTO laptops (model, speed, ram, hd, screen, price)
VALUES
('2001', 2, 2048, 240, 20.1, 3673),
('2002', 1.73, 1024, 80, 17, 949),
('2003', 1.8, 512, 60, 15.4, 549),
('2004', 2, 512, 60, 13.3, 1150),
('2005', 2.16, 1024, 120, 17, 2500),
('2006', 2, 2048, 80, 15.4, 1700),
('2007', 1.83, 1024, 120, 13.3, 1429),
('2010', 2, 2048, 160, 15.4, 2300);
''')
con.execute('''
INSERT INTO pcs (model, speed, ram, hd, price)
VALUES
('1001', 2.66, 1024, 250, 2114),
('1002', 2.1, 512, 250, 995),
('1003', 1.42, 512, 80, 478),
('1004', 2.8, 1024, 250, 649),
('1005', 3.2, 512, 250, 630),
('1006', 3.2, 1024, 320, 1049),
('1007', 2.2, 1024, 200, 510),
('1008', 3.06, 512, 80, 529);
''')
con.execute('''
INSERT INTO products (maker, model, type)
VALUES
('A', '1001', 'pc'),
('A', '1002', 'pc'),
('A', '1003', 'pc'),
('B', '1004', 'pc'),
('B', '1006', 'pc'),
('C', '1007', 'pc'),
('E', '2002', 'laptop'),
('A', '2004', 'laptop'),
('A', '2005', 'laptop');
''')
con.execute('''
INSERT INTO sales (customer_id, model, quantity, day, paid, type_of_payment)
VALUES
('1122334455', '2010', 1, '2020-12-19', 2300, '微信'),
('1122334455', '3001', 1, '2020-12-18', 99, '现金'),
('1231231231', '2002', 2, '2020-12-19', 1898, '支付宝'),
('1231231231', '3002', 1, '2020-12-18', 239, '信用卡'),
('1234567890', '1001', 1, '2020-12-20', 1902.6, '支票'),
('9999999999', '3007', 2, '2020-12-20', 360, '现金');
''')
quack = QuackingDuck(con)
quack.ask("谁买了最多的 PCs, 请给出用户的名字?", debug=True)
参考
https://www.cnblogs.com/ghj1976/p/openaichatcompletioncreate-jie-kou-can-shu-shuo-mi.html
https://platform.openai.com/docs/api-reference/completions
OpenAI.Completion.create 接口参数说明 https://mp.weixin.qq.com/s/RhHd4oPaagUMvBIfshkWaQ
https://colab.research.google.com/drive/1BdJWrigPYzOrGL8acXo1Uf7WdTy3U4rb
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· CSnakes vs Python.NET:高效嵌入与灵活互通的跨语言方案对比
· 【.NET】调用本地 Deepseek 模型
· Plotly.NET 一个为 .NET 打造的强大开源交互式图表库
· 上周热点回顾(2.17-2.23)
· 如何使用 Uni-app 实现视频聊天(源码,支持安卓、iOS)