Loading

vllm docker-compose

version: '3'

services:
  moonlit-vllm-qwen-hotvideo:
    image: vllm/vllm-openai:v0.6.2
    container_name: hitvideos_api
    restart: always
    command: [
      "--served-model-name", "qwen2.5-14b-hitvideos",
        "--model", "/root/models/Qwen2.5-14B-Insruct-GPTQ-Int4-1113",
#        "--api-key", "sk-zZVAfGSXnGjVpYT127Cf5aD420F648F1826355455eEaD881",
#        "--max-model-len", "512",
        "--tool-call-parser", "hermes",
        "--enable-auto-tool-choice",
        "--enforce_eager",
         "--gpu-memory-utilization","0.5",
#        "--max_num_seqs","256",
        "--cpu-offload-gb","2"
    ]
    volumes:
      - /data/preview/base/models/SFT/hitvideos:/root/models
    ports:
      - "11110:8000"
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              capabilities: [ gpu ]
              device_ids: ['0']
    environment:
      TZ: Asia/Shanghai
#    networks:
#      - moonlit-vllm
#
#networks:
#    moonlit-vllm:
#        external: true

posted @ 2024-11-18 10:35  踩坑大王  阅读(8)  评论(0编辑  收藏  举报