推理请求

# 文本推理
time curl -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{
    "model": "Qwen3-8B",
    "messages": [
        {"role": "system", "content": "你是一个数学家"},
        {"role": "user", "content": "介绍下泰勒公式"}
    ],
    "stream": false,
    "max_tokens": 1024,
    "presence_penalty": 1.03,
    "frequency_penalty": 1.0,
    "repetition_penalty": 1.0,
    "temperature": 0.5,
    "top_p": 0.95,
    "top_k": 10,
    "seed": null,
    "stop": ["stop1", "stop2"],
    "include_stop_str_in_output": false,
    "skip_special_tokens": true,
    "ignore_eos": false
}' http://127.0.0.1:1025/v1/chat/completions

# 多模态
time curl -H "Accept: application/json" -H "Content-type: application/json" -X POST -d '{
    "model": "Qwen2.5-VL-72B",
    "messages": [{
    "role": "user",
    "content": [
        {"type": "image_url", "image_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"}},
        {"type": "text", "text": "图片中有什么"}
    ]
    }],
    "max_tokens": 512,
    "do_sample": true,
    "repetition_penalty": 1.00,
    "temperature": 0.01,
    "top_p": 0.001,
    "top_k": 1
}' http://127.0.0.1:1025/v1/chat/completions