NVIDIA创建容器 ================== .. code-block:: shell docker run --runtime nvidia --gpus 0 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \ -p 8000:8000 \ --ipc=host \ vllm/vllm-openai:latest \ --model Qwen/Qwen3-0.6B docker-compose(NVIDIA) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. code-block:: yaml services: vllm: container_name: vllm-qwen2.5-7b restart: always image: vllm/vllm-openai:v0.8.1 ipc: host volumes: - /mnt/data/llm_models:/llm_models environment: - CUDA_VISIBLE_DEVICES=0 command: - --model - /llm_models/Qwen/Qwen2.5-7B-Instruct - --served-model-name - Qwen2.5-7B-Instruct - --port - "10013" ports: - 10013:10013 deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu]