纯模型推理
单机多卡
cd /usr/local/Ascend/atb-models/tests/modeltest
bash run.sh pa_fp16 performance [[512,512]] 16 qwen /data/llm_models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/ 8
多机多卡
# parallel_params: [dp, tp, sp, moe_tp, moe_ep, pp, microbatch_size]
# server1
bash run.sh pa_bf16 performance [[256,256]] 192 8 deepseekv2 /data2/deepseek-r1-bf16/ /data2/ranktable_4.json 16 2 0 172.16.66.1 [2,8,4,4,-1,-1]
# server2
bash run.sh pa_bf16 performance [[256,256]] 192 8 deepseekv2 /data2/deepseek-r1-bf16/ /data2/ranktable_4.json 16 2 8 172.16.66.1 [2,8,4,4,-1,-1]