mindie-llm 镜像指南

mindie-llm 镜像指南(rc2版)

  • 基础镜像:mindie_1.0.rc2:cann_8.0.rc2-py_3.10-ubuntu_22.04

  • 基础容器启动命令(后续只做单卡推理):

    docker run -itd --name mindie-llm -e ASCEND_VISIBLE_DEVICES=0 -v /app:/app -p 9000:8900 mindie_1.0.rc2:cann_8.0.rc2-py_3.10-ubuntu_22.04 bash
    
  • 依赖包:

    pip install transformers decorator sympy attrs psutil sentencepiece accelerate scipy tiktoken==0.5.2 einops==0.7.0 transformers_stream_generator==0.0.4 numpy==1.26.4
    
  • 需修改的配置文件:

    vi /usr/local/Ascend/atb-models/atb_llm/models/telechat/config.py
    max_position_embeddings=8192
    
    vi /usr/local/Ascend/mindie/latest/mindie-service/conf/config.json
    "cacheBlockSize" : 96	# telechat要小于等于96
    "ipAddress" : "172.17.0.2"
    "port" : 9000
    "httpsEnabled" : false,
    "interNodeTLSEnabled" : false
    "npuDeviceIds" : [[0]] # 多卡:[[0,1,2,3,4,5,6,7]]
    "worldSize" : 1 # 显卡数量,等于npuDeviceIds列举的数量,与docker run中的-e ASCEND_VISIBLE_DEVICES数量保持一致
    "maxSeqLen" : 8192, # 最大序列长度。即输入的长度+输出的长度
    "modelName" : "telechat" # 根据实际模型配置,千问用qwen
    "modelWeightPath" : "/app/model/telechat/TeleChat-12B-v2" # 模型路径,可以通过docker的-v注入
    "maxIterTimes" : 1024, # 迭代次数,即一句话最大可生成长度,取值范围[1, maxSeqLen-1]
    
    vi /app/model/telechat/TeleChat-12B-V2/config.json # 星辰
    vi /app/model/qwen2.5/Qwen2.5-7B-Instruct/config.json # 千问
    "torch_dtype":"float16" # telechat和qwen要设置为float16
    
  • 服务启动命令:

    nohup ./bin/mindieservice_daemon > output.log 2>&1 &
    
  • 编写启动脚本start.sh(需要加载conda环境和各种Ascend环境):

    #!/bin/bash
    __conda_setup="$('/root/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
    if [ $? -eq 0 ]; then
        eval "$__conda_setup"
    else
        if [ -f "/root/miniconda3/etc/profile.d/conda.sh" ]; then
            . "/root/miniconda3/etc/profile.d/conda.sh"
        else
            export PATH="/root/miniconda3/bin:$PATH"
        fi
    fi
    unset __conda_setup
    # <<< conda initialize <<<
    conda activate MindIE_1.0.RC2
    export LANG=C.UTF-8
    export LC_ALL=C.UTF-8
    source /usr/local/Ascend/ascend-toolkit/set_env.sh
    source /usr/local/Ascend/nnal/atb/set_env.sh
    source /usr/local/Ascend/atb-models/set_env.sh
    source /usr/local/Ascend/mindie/set_env.sh
    # 设置侦听ip,此变量会覆盖config.json文件中的ipAddress属性
    export MIES_CONTAINER_IP=$(ifconfig eth0 | grep -w inet | awk '{print $2}')
    /usr/local/Ascend/mindie/latest/mindie-service/bin/mindieservice_daemon
    
  • 制作标准镜像:

    docker commit mindie-llm mindie-llm:latest
    docker commit --change='CMD ["bash", "/usr/local/Ascend/mindie/latest/mindie-service/start.sh"]' mindie-llm mindie-llm:latest
    
  • 准备大模型配置文件(用来覆盖/usr/local/Ascend/mindie/latest/mindie-service/conf/config.json):

    # 注意modelWeightPath属性,指向要与容器启动时-v对应
    # 千问2.5-12B:config.json.qwen2.5-12b
    # 星辰2-12B:config.json.telechat2-12b
    
  • 启动容器:

    # 启动星辰-12B-v2
    docker run -itd --shm-size="48G" --name telechat2-12b-lora -e ASCEND_VISIBLE_DEVICES=0 -v /app:/app -v /root/mindie-llm/conf/config.json.telechat2-12b:/usr/local/Ascend/mindie/latest/mindie-service/conf/config.json -p 8000:8900 mindie-llm:latest
    # 启动千问2.5-7B
    docker run -itd --shm-size="48G" --name mindie-qwen2.5-7b -e ASCEND_VISIBLE_DEVICES=1 -v /app:/app -v /root/mindie-llm/conf/config.json.qwen2.5-7b:/usr/local/Ascend/mindie/latest/mindie-service/conf/config.json -p 8001:8900 mindie-llm:latest
    
  • 测试命令:

    # 星辰暂时没有标准openai接口
    curl -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{"inputs": "<_user>推荐本书<_bot>", "stream": false,"parameters": {"temperature": 1.0, "top_k": 10,"top_p": 0.95, "max_new_tokens": 200, "do_sample": true, "seed": null, "repetition_penalty": 1.03, "details": false, "typical_p": 0.5, "watermark": false}}' http://localhost:8000
    
    # openai接口
    curl -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{"model": "qwen","messages": [{"role": "system", "content": "You are a helpful assistant."},{"role": "user", "content": "推荐本书"}],"max_tokens": 512, "presence_penalty": 1.03, "frequency_penalty": 1.0, "seed": null, "temperature": 0.5, "top_p": 0.95, "stream": false}' http://localhost:8001/v1/chat/completions
    
  • 其他:

posted @ 2024-12-20 10:39  badwood  阅读(12)  评论(0编辑  收藏  举报
Badwood's Blog