纯模型推理

单机多卡

cd /usr/local/Ascend/atb-models/tests/modeltest
bash run.sh pa_fp16 performance [[512,512]] 16 qwen /data/llm_models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/ 8

多机多卡

# parallel_params: [dp, tp, sp, moe_tp, moe_ep, pp, microbatch_size]

# server1
bash run.sh pa_bf16 performance [[256,256]] 192 8 deepseekv2 /data2/deepseek-r1-bf16/ /data2/ranktable_4.json 16 2 0 172.16.66.1 [2,8,4,4,-1,-1]

# server2
bash run.sh pa_bf16 performance [[256,256]] 192 8 deepseekv2 /data2/deepseek-r1-bf16/ /data2/ranktable_4.json 16 2 8 172.16.66.1 [2,8,4,4,-1,-1]