# 文本推理
time curl -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{
"model": "Qwen3-8B",
"messages": [
{"role": "system", "content": "你是一个数学家"},
{"role": "user", "content": "介绍下泰勒公式"}
],
"stream": false,
"max_tokens": 1024,
"presence_penalty": 1.03,
"frequency_penalty": 1.0,
"repetition_penalty": 1.0,
"temperature": 0.5,
"top_p": 0.95,
"top_k": 10,
"seed": null,
"stop": ["stop1", "stop2"],
"include_stop_str_in_output": false,
"skip_special_tokens": true,
"ignore_eos": false
}' http://127.0.0.1:1025/v1/chat/completions
# 多模态
time curl -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{
"model": "Qwen2.5-VL-72B",
"messages": [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"}},
{"type": "text", "text": "图片中有什么"}
]
}],
"max_tokens": 512,
"do_sample": true,
"repetition_penalty": 1.00,
"temperature": 0.01,
"top_p": 0.001,
"top_k": 1
}' http://127.0.0.1:1025/v1/chat/completions