NVIDIA创建容器
docker run --runtime nvidia --gpus 0 \
-v ~/.cache/huggingface:/root/.cache/huggingface \
--env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-p 8000:8000 \
--ipc=host \
vllm/vllm-openai:latest \
--model Qwen/Qwen3-0.6B
docker-compose(NVIDIA)
services:
vllm:
container_name: vllm-qwen2.5-7b
restart: always
image: vllm/vllm-openai:v0.8.1
ipc: host
volumes:
- /mnt/data/llm_models:/llm_models
environment:
- CUDA_VISIBLE_DEVICES=0
command:
- --model
- /llm_models/Qwen/Qwen2.5-7B-Instruct
- --served-model-name
- Qwen2.5-7B-Instruct
- --port
- "10013"
ports:
- 10013:10013
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]