推理请求 ================== .. code-block:: shell # 文本推理 time curl -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{ "model": "Qwen3-8B", "messages": [ {"role": "system", "content": "你是一个数学家"}, {"role": "user", "content": "介绍下泰勒公式"} ], "stream": false, "max_tokens": 1024, "presence_penalty": 1.03, "frequency_penalty": 1.0, "repetition_penalty": 1.0, "temperature": 0.5, "top_p": 0.95, "top_k": 10, "seed": null, "stop": ["stop1", "stop2"], "include_stop_str_in_output": false, "skip_special_tokens": true, "ignore_eos": false }' http://127.0.0.1:1025/v1/chat/completions # 多模态 time curl -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{ "model": "Qwen2.5-VL-72B", "messages": [{ "role": "user", "content": [ {"type": "image_url", "image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"}}, {"type": "text", "text": "图片中有什么"} ] }], "max_tokens": 512, "do_sample": true, "repetition_penalty": 1.00, "temperature": 0.01, "top_p": 0.001, "top_k": 1 }' http://127.0.0.1:1025/v1/chat/completions