通过历史记录运行本地模型
from transformers import AutoTokenizer, AutoModel modelPath = "/home/cmcc/server/model/chatglm3-6b" tokenizer = AutoTokenizer.from_pretrained(modelPath, trust_remote_code=True) model = AutoModel.from_pretrained(modelPath, trust_remote_code=True).half().cuda() model.eval() history = [ {"role": "user", "content": "你好"}, {"role": "assistant", "content": "我是人工智能助手,我叫小明"} ] response, history = model.chat(tokenizer, "你好", history=history) print(response) response, history = model.chat(tokenizer, "你叫什么名字", history=history) print(response)