AI_使用_DeepSeek_作为 DuckDB 的自然语言接口

深度求索的DeepSeek 作为 DuckDB 的自然语言接口

2023年3月，使用 OpenAI 的 ChatGPT 作为 DuckDB 的自然语言接口
已有使用 OpenAI’s ChatGPT API,自然语言来使用Duckdb，
https://tdoehmen.github.io/blog/2023/03/07/quackingduck.html
QuackingDuck: Using OpenAI's ChatGPT as Natural Language Interface for DuckDB
原始代码的地址： https://colab.research.google.com/drive/1BdJWrigPYzOrGL8acXo1Uf7WdTy3U4rb
使用DuckDB数据库作为演示的demo，
DuckDB数据库在进程中运行，并且可以使用简单的 pip install 命令进行安装，因此它非常适合小型独立演示
2025年02月，趁着Deepseek的API开发平台，也来使用一下，
代码参考Ask QuackingDuck。主要逻辑没有变动，在这里仅做学习使用
目前DeepSeek资源紧张，所以会出现API 返回空行的现象，以及
限速非流式请求：持续返回空行
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
这个问题，后续DS资源稳定后会更好的，也可以在代码中加一些处理的逻辑
使用程序要申请DeepSeek 的 API-key

OpenAI的接口

OpenAI的 ChatCompletion 和 Completion 都是自然语言生成模型的接口，但它们的用途和应用场景略有不同
from openai import OpenAI
client = OpenAI(api_key="", base_url="https://api.deepseek.com")
response = client.chat.completions.create(
response.choices[0].message.content.strip("\n")

import openai
openai.ChatCompletion.create
2023年3月2日，OpenAI 放出了一个直接可以进行对话聊天的接口。这个接口叫做 ChatCompletion
防止超标，可能需要在对话开始前设置一个允许最大的token阈值，比如 MAX_TOKEN_LIMIT = 2000，
再设置一个小于某个数量就需要提醒的警告值，比如 MIN_TOKEN_LIMIT = 200，对话前初
messages必须是消息对象的数组，其中每个对象都有一个角色（“system”、“user”、“assistant”）和内容（消息的内容）
openai.api_key = ""
openai.Model.list()
)["choices"][0]["message"]["content"].strip("\n")

代码示例

import os
import duckdb
from openai import OpenAI

auto_sk = r"sk-*****************"
client = OpenAI(api_key=auto_sk, base_url="https://api.deepseek.com")
model_nm = "deepseek-chat"

class QuackingDuck:
def init(self, conn):
self.conn = conn
self.schemas = self._get_schemas()

def _get_schemas(self):
tables = self.conn.execute("PRAGMA show_tables_expanded").fetchall()
schemas = ""
for table in tables:
name = table[2]
columns = [f"{table[3][i]}({table[4][i]})" for i in range(len(table[3]))]
first_rows_md = self.conn.execute(f"SELECT * from {name} LIMIT 1;").fetchdf().to_markdown()
schemas = schemas + f"{name} ({', '.join(columns)}):\n5 row sample:\n" + first_rows_md + "\n" + "\n"
return schemas

def explain_content(self, detail="one sentence"):
print(self._schema_summary_internal(detail)[1])

def _schema_summary_internal(self, detail="one sentence"):
prompt = f"""SQL schema of my database:{self.schemas} Explain in {detail} what the data is about:
"""
explanation = client.chat.completions.create(
model=model_nm,
messages=[{"role": "system",
"content": "你是一个乐于助人的助手，你可以根据数据库的元数据生成人类可阅读的关于数据库详情的总结"},
{"role": "user", "content": prompt}, ],
temperature=0,
).choices[0].message.content.strip("\n")
return prompt, explanation

def _generate_sql(self, question, debug=False):
(summary_prompt, summary) = self._schema_summary_internal()
sql_prompt = f"""输出一个SQL查询语句，不需要任何解释也不要输出和查询无关的语句，除非这个问题不是关于数据库的数据的，这种情况下回复我不知道
确保只使用来自之前提到的元数据中的表格和字段同时写一个查询语句回答以下问题:"{question}"
"""
sql_query = client.chat.completions.create(
model=model_nm,
messages=[{"role": "system",
"content": "你是一个乐于助人的助手，你可以根据用户的输入生成 Postgresql代码，你只要给出SQL代码，不给出其任何他人类可阅读文本解释"},
{"role": "user", "content": summary_prompt},
{"role": "assistant", "content": summary},
{"role": "user", "content": sql_prompt},
],
temperature=0,
).choices[0].message.content.strip("\n")

if debug:
print("Prompt: \n" + sql_prompt)
print("SQL Query: \n" + sql_query)
if "I don't know" in sql_query:
raise Exception("Question cannot be answered based on the data in the database.")

return summary_prompt, summary, sql_prompt, sql_query

def _regenerate_sql(self, content_prompt, content_summary, sql_prompt, sql_query, error, debug=False):
sql_query = client.chat.completions.create(
model=model_nm,
messages=[
{"role": "system",
"content": "你是一个乐于助人的助手，你可以根据用户的输入生成 Postgresql代码，你只要给出SQL代码，不给出其任何他人类可阅读文本解释"},
{"role": "user", "content": content_prompt},
{"role": "assistant", "content": content_summary},
{"role": "user", "content": sql_prompt},
{"role": "assistant", "content": sql_query},
{"role": "user",
"content": f"我得到了一个这样的错误: {error}. 请改正这个查询语句并且只给出SQL代码"},
],
temperature=0,
).choices[0].message.content.strip("\n")

if debug:
print("Corrected SQL Query: \n" + sql_query)

return sql_query

def ask(self, question, debug=False):
summary_prompt, summary, sql_prompt, sql_query = self._generate_sql(question, debug)
try:
result = self.conn.execute(sql_query).fetchdf()
result_markdown = result.head(10).to_markdown()
except Exception as e:
print("查询语句有一个错误: " + str(e) + "\n 将尝试修改它.\n")
sql_query = self._regenerate_sql(summary_prompt, summary, sql_prompt, sql_query, str(e), debug)
result = self.conn.execute(sql_query).fetchdf()
result_markdown = result.head(10).to_markdown()

answer_prompt = f"""查询结果: {result_markdown}根据查询结果的信息，用自然语言回答问题. """
answer = client.chat.completions.create(
model=model_nm,
messages=[
{"role": "system", "content": "你是一个乐于助人的助."},
{"role": "user", "content": summary_prompt},
{"role": "assistant", "content": summary},
{"role": "user", "content": sql_prompt},
{"role": "assistant", "content": sql_query},
{"role": "user", "content": answer_prompt},
],
temperature=0,
).choices[0].message.content.strip("\n")
if debug:
print("debug ：Prompt: \n" + answer_prompt)
print("debug ：Answer: \n" + answer)
else:
print(answer)
return result

con = duckdb.connect()
# Customers Table
# 创建表格 #客户表 customers 笔记本laptops，台式机 pcs 产品表 Products 销售表sales
con.execute('''
CREATE TABLE IF NOT EXISTS customers (
customer_id char(10) NOT NULL,
firstname varchar(32) default NULL,
lastname varchar(32) default NULL,
city varchar(32) default NULL,
address varchar(128) default NULL,
email varchar(128) default NULL,
PRIMARY KEY (customer_id) )
''')

Laptops Table

con.execute('''
CREATE TABLE IF NOT EXISTS laptops (
model char(4) NOT NULL default '',
speed double default NULL,
ram int default NULL,
hd int default NULL,
screen double default NULL,
price double default NULL,
PRIMARY KEY (model) )
''')

PCs Table

con.execute('''
CREATE TABLE IF NOT EXISTS pcs (
model char(4) NOT NULL,
speed double NOT NULL,
ram int NOT NULL,
hd int NOT NULL,
price double NOT NULL,
PRIMARY KEY (model) )
''')

Products Table

con.execute('''CREATE TABLE IF NOT EXISTS products (
maker char(1) default NULL,
model char(4) NOT NULL default '',
type varchar(10) default NULL,
PRIMARY KEY (model) )
''')

Sales Table

con.execute('''CREATE TABLE IF NOT EXISTS sales (
customer_id char(10) NOT NULL default '',
model char(4) NOT NULL default '',
quantity int default NULL,
day date NOT NULL default '0000-00-00',
paid double default NULL,
type_of_payment varchar(32) default NULL,
PRIMARY KEY (customer_id,model,day) )
''')
### 插入数据
# Customers Table
con.execute('''
INSERT INTO customers (customer_id, firstname, lastname, city, address, email)
VALUES
('1122334455', '赵', '天', '京', '胡同', 'ann@123.com'),
('1231231231', '钱', '地', '沪', '弄堂', NULL),
('1234567890', '孙', '玄', '穗深', '巷子', NULL),
('9876543210', '李', '黄', '宁汉', '垵', 'jack@123.com'),
('9999999999', '周', '宇', '蓉杭', '洞', 'nj@123.com');
''')

Laptops Table

con.execute('''
INSERT INTO laptops (model, speed, ram, hd, screen, price)
VALUES
('2001', 2, 2048, 240, 20.1, 3673),
('2002', 1.73, 1024, 80, 17, 949),
('2003', 1.8, 512, 60, 15.4, 549),
('2004', 2, 512, 60, 13.3, 1150),
('2005', 2.16, 1024, 120, 17, 2500),
('2006', 2, 2048, 80, 15.4, 1700),
('2007', 1.83, 1024, 120, 13.3, 1429),
('2010', 2, 2048, 160, 15.4, 2300);
''')

PCs Table

con.execute('''
INSERT INTO pcs (model, speed, ram, hd, price)
VALUES
('1001', 2.66, 1024, 250, 2114),
('1002', 2.1, 512, 250, 995),
('1003', 1.42, 512, 80, 478),
('1004', 2.8, 1024, 250, 649),
('1005', 3.2, 512, 250, 630),
('1006', 3.2, 1024, 320, 1049),
('1007', 2.2, 1024, 200, 510),
('1008', 3.06, 512, 80, 529);
''')

Products Table

con.execute('''
INSERT INTO products (maker, model, type)
VALUES
('A', '1001', 'pc'),
('A', '1002', 'pc'),
('A', '1003', 'pc'),
('B', '1004', 'pc'),
('B', '1006', 'pc'),
('C', '1007', 'pc'),
('E', '2002', 'laptop'),
('A', '2004', 'laptop'),
('A', '2005', 'laptop');
''')

Sales Table

con.execute('''
INSERT INTO sales (customer_id, model, quantity, day, paid, type_of_payment)
VALUES
('1122334455', '2010', 1, '2020-12-19', 2300, '微信'),
('1122334455', '3001', 1, '2020-12-18', 99, '现金'),
('1231231231', '2002', 2, '2020-12-19', 1898, '支付宝'),
('1231231231', '3002', 1, '2020-12-18', 239, '信用卡'),
('1234567890', '1001', 1, '2020-12-20', 1902.6, '支票'),
('9999999999', '3007', 2, '2020-12-20', 360, '现金');
''')

quack = QuackingDuck(con)
# quack.explain_content()
quack.ask("谁买了最多的 PCs, 请给出用户的名字?", debug=True)

另外一种方式

import requests
import json

url = "https://api.deepseek.com/chat/completions"

auto_sk = r"sk-****************"
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json',
'Authorization': 'Bearer ' + auto_sk
}

message_content = [
{"content": "You are a helpful assistant", "role": "system"},
{"role": "user", "content": "9.11 and 9.8, which is greater?"}]

input_info = {
"messages": message_content,
"model": "deepseek-chat",
"frequency_penalty": 0,
"max_tokens": 2048,
"presence_penalty": 0,
"response_format": {"type": "text"},
"stop": None,
"stream": False,
"stream_options": None,
"temperature": 1,
"top_p": 1,
"tools": None,
"tool_choice": "none",
"logprobs": False,
"top_logprobs": None,

}
payload = json.dumps(input_info)
response = requests.request("POST", url, headers=headers, data=payload)
response.encoding = 'utf-8'
status = response.status_code
print("status",status)
if response.status_code == 200:
print("response.text :",response.text)
response_info = response.json()
assistant_content = response_info.get("choices")[0].get("message")["content"]
print(status, assistant_content)
else:
print(f"请求失败，状态码: {response.status_code}")
# Round 2
if assistant_content is not None:
message_content.append({'role': 'assistant', 'content': assistant_content})
message_content.append({'role': 'user', 'content': "How many Rs are there in the word 'strawberry'?"})
print("message_content",message_content)

input_info["messages"] = message_content
payload = json.dumps(input_info)
response = requests.request("POST", url, headers=headers, data=payload)
status = response.status_code
if response.status_code == 200:
print("response.text 2 :",response.text)
response_info = response.json()
assistant_content = response_info.get("choices")[0].get("message")["content"]
print(status, assistant_content)
else:
print(f"请求失败，状态码: {response.status_code}")

参考

https://www.cnblogs.com/ghj1976/p/openaichatcompletioncreate-jie-kou-can-shu-shuo-mi.html
https://platform.openai.com/docs/api-reference/completions
OpenAI.Completion.create 接口参数说明 https://mp.weixin.qq.com/s/RhHd4oPaagUMvBIfshkWaQ
https://colab.research.google.com/drive/1BdJWrigPYzOrGL8acXo1Uf7WdTy3U4rb

posted on 2025-02-07 17:33 辰令阅读(319) 评论(0) 收藏举报