HF transformers 转 OpenAI API(以 chatglm4 为例)

服务端:

import uvicorn
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from typing import *

app = FastAPI()

class Query(BaseModel):
    messages: List[Dict[str, str]]
    model: Optional[str] = None
    request_id: Optional[str] = None
    do_sample: Optional[bool] = None
    stream: Optional[bool] = None
    temperature: Optional[float] = None
    top_p: Optional[float] = None
    max_tokens: Optional[int] = None
    stop: Optional[List[str]] = None

nf4_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.float16,
)
model_path = '/data/glm-4-9b-chat-1m'
model = AutoModelForCausalLM.from_pretrained(
    model_path, device_map="auto",
    trust_remote_code=True,
    quantization_config=nf4_config, 
)
tok = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)


@app.post("/chat/completions")
async def generate_response(query: Query):
    iids = tok.apply_chat_template(
        query.messages, 
        add_generation_prompt=1,
    )
    gen_cfg = model.generation_config.to_dict() | dict(
        max_tokens=query.max_tokens,
        temperature=query.temperature,
        do_sample=query.do_sample,
        top_p=query.top_p,
    )
    oids = model.generate(
        inputs=torch.tensor([iids]).to(model.device),
        **gen_cfg,
    )
    oids = oids[0][len(iids):-1].tolist()
    output = tok.decode(oids)
    return {
        "choices": [{
            'index': 0, 
            'message': {'role': 'assistant', 'content': output}
        }]
    }

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=6006)

客户端:

from openai import OpenAI
api = OpenAI(base_url='http://192.168.60.105:6006/')
r=api.chat.completions.create(messages=[{'role':'user', 'content':'你好'}], model='')
r.choices[0].message.content
# '\n你好👋!很高兴遇见你,如果你想聊聊或者有需要帮助的地方,随时告诉我哦!'
posted @ 2024-07-30 17:52  绝不原创的飞龙  阅读(20)  评论(0编辑  收藏  举报