Skip to content
Open

Ck #7869

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
9f6c3c0
support eb5 fp4 cuda_graph
lonelygsh Apr 15, 2026
55d1a05
update
lonelygsh Apr 15, 2026
3509714
merge develop
lonelygsh Apr 18, 2026
deebd2a
Merge branch 'develop' of https://github.com/PaddlePaddle/FastDeploy …
lonelygsh Apr 18, 2026
dd4118d
Support FP4 communication quantization
lonelygsh Apr 19, 2026
3fdbc08
fix
lonelygsh Apr 19, 2026
1226b27
Merge branch 'develop' of https://github.com/PaddlePaddle/FastDeploy …
lonelygsh Apr 20, 2026
6c3cc4b
update
lonelygsh Apr 21, 2026
e89dff7
fix
lonelygsh Apr 21, 2026
24d07c6
support mix_quant and nvfp4
lonelygsh Apr 22, 2026
19a7019
support prefill cuda_graph
lonelygsh Apr 24, 2026
842feba
support fp4 communication quantization
lonelygsh Apr 28, 2026
141ac55
support
lonelygsh May 14, 2026
4c076ce
fix
lonelygsh May 15, 2026
b643683
fix
lonelygsh May 15, 2026
2f4151c
add test
lonelygsh May 15, 2026
22ac5a0
merge develop
lonelygsh May 15, 2026
8443d62
update develop
lonelygsh May 15, 2026
d7f98f0
fix
lonelygsh May 15, 2026
264dbd8
fix
lonelygsh May 15, 2026
eae9a55
delete
May 18, 2026
9205ac7
update
lizexu123 May 18, 2026
3d64926
update
lizexu123 May 18, 2026
d2545c1
add document
lizexu123 May 18, 2026
ca03742
Merge branch 'develop' of https://github.com/PaddlePaddle/FastDeploy …
lizexu123 May 18, 2026
7a58d12
fix
lizexu123 May 19, 2026
aad041d
revert helper.h to develop
lizexu123 May 19, 2026
fb82351
Merge branch 'develop' of https://github.com/PaddlePaddle/FastDeploy …
lizexu123 May 19, 2026
e282850
Merge branch 'develop' of https://github.com/PaddlePaddle/FastDeploy …
lizexu123 May 19, 2026
a96f2a5
make_scale_interleaved
lizexu123 May 19, 2026
e20676d
fix
lizexu123 May 19, 2026
5284879
fix
lizexu123 May 20, 2026
3a80481
Merge remote-tracking branch 'origin/develop' into kkc
lizexu123 May 20, 2026
c22d3ea
fix
lizexu123 May 20, 2026
7eb643b
fix
lizexu123 May 20, 2026
9654004
fix
lizexu123 May 20, 2026
47bee0a
update skills
lizexu123 May 20, 2026
9655daa
Merge branch 'develop' of https://github.com/PaddlePaddle/FastDeploy …
lizexu123 May 20, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
245 changes: 164 additions & 81 deletions .claude/skills/benchmark-compare/scripts/extract_metrics.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
#!/usr/bin/env python3
"""extract_metrics.py — 从 benchmark 结果文件提取指标,输出结构化 JSON

支持框架: fd (FastDeploy) / sg (SGLang) / vllm (vLLM)
任意框架结果均可缺省,缺省的不参与对比。

用法:
python3 extract_metrics.py \
--fd-result <FD_RESULT.txt> \
--sg-result <SG_RESULT.txt> \
--vllm-result <VLLM_RESULT.txt> \
--model-path <MODEL_PATH> \
--fd-config '{"gpu":"H800","tp":1,"concurrency":32}' \
--sg-config '{"gpu":"H800","tp":1,"concurrency":32}' \
--vllm-config '{"gpu":"H800","tp":1,"concurrency":32}' \
--output <metrics.json>
"""

Expand All @@ -18,12 +23,16 @@
import subprocess
import sys

# 支持的框架列表
FRAMEWORKS = ("fd", "sg", "vllm")


def parse_benchmark_result(filepath):
"""解析 benchmark_serving.py 的输出文件,提取所有指标"""
metrics = {}
if not os.path.isfile(filepath):
print(f"[WARN] 结果文件不存在: {filepath}", file=sys.stderr)
if not filepath or not os.path.isfile(filepath):
if filepath:
print(f"[WARN] 结果文件不存在: {filepath}", file=sys.stderr)
return metrics

with open(filepath, "r") as f:
Expand Down Expand Up @@ -110,70 +119,104 @@ def get_model_info(model_path):
return info


def compute_comparison(fd_metrics, sg_metrics):
"""计算对比指标(差异百分比、胜出方)"""
# 吞吐类指标:越高越好
HIGHER_IS_BETTER = {
"total_token_throughput",
"output_token_throughput",
"request_throughput",
"mean_decode",
"median_decode",
"p80_decode",
"p95_decode",
"p99_decode",
}

# 延迟类指标:越低越好
LOWER_IS_BETTER = {
"mean_ttft",
"median_ttft",
"p80_ttft",
"p95_ttft",
"p99_ttft",
"mean_tpot",
"median_tpot",
"p80_tpot",
"p95_tpot",
"p99_tpot",
"mean_itl",
"median_itl",
"p80_itl",
"p95_itl",
"p99_itl",
"mean_e2el",
"median_e2el",
"p80_e2el",
"p95_e2el",
"p99_e2el",
"benchmark_duration",
}


def compute_comparison(all_metrics, baseline="sg"):
"""计算多框架对比指标。

all_metrics: {"fd": {...}, "sg": {...}, "vllm": {...}}(任意 key 可为空 dict)
baseline: 用于计算 diff_pct 的基准框架(默认 SGLang)

返回:
{
metric_key: {
"fd": ..., "sg": ..., "vllm": ...,
"diff_pct": {"fd": ..., "vllm": ...}, # 相对 baseline
"winner": "fd" | "sg" | "vllm" | "tie"
}
}
"""
comparison = {}

# 吞吐类指标:越高越好
higher_is_better = {
"total_token_throughput",
"output_token_throughput",
"request_throughput",
"mean_decode",
"median_decode",
"p80_decode",
"p95_decode",
"p99_decode",
}
# 只比较实际有数据的框架
active = [fw for fw in FRAMEWORKS if all_metrics.get(fw)]
if not active:
return comparison

# 延迟类指标:越低越好
lower_is_better = {
"mean_ttft",
"median_ttft",
"p80_ttft",
"p95_ttft",
"p99_ttft",
"mean_tpot",
"median_tpot",
"p80_tpot",
"p95_tpot",
"p99_tpot",
"mean_itl",
"median_itl",
"p80_itl",
"p95_itl",
"p99_itl",
"mean_e2el",
"median_e2el",
"p80_e2el",
"p95_e2el",
"p99_e2el",
"benchmark_duration",
}

all_keys = set(fd_metrics.keys()) | set(sg_metrics.keys())
# 收集所有指标 key
all_keys = set()
for fw in active:
all_keys |= set(all_metrics[fw].keys())

for key in sorted(all_keys):
fd_val = fd_metrics.get(key)
sg_val = sg_metrics.get(key)

if fd_val is None or sg_val is None:
entry = {}
per_fw_val = {}
for fw in active:
val = all_metrics[fw].get(key)
if val is None:
continue
entry[fw] = val
per_fw_val[fw] = val

if len(per_fw_val) < 2:
# 单框架数据,无法对比但仍记录
comparison[key] = entry
continue

entry = {"fd": fd_val, "sg": sg_val}

# 计算差异百分比 (FD 相对于 SG)
if sg_val != 0:
diff_pct = round((fd_val - sg_val) / sg_val * 100, 2)
else:
diff_pct = 0
entry["diff_pct"] = diff_pct
# 计算相对 baseline 的差异百分比
diff_pct = {}
base_val = per_fw_val.get(baseline)
for fw, val in per_fw_val.items():
if fw == baseline or base_val is None:
continue
if base_val != 0:
diff_pct[fw] = round((val - base_val) / base_val * 100, 2)
else:
diff_pct[fw] = 0
if diff_pct:
entry["diff_pct"] = diff_pct

# 判断胜出方
if key in higher_is_better:
entry["winner"] = "fd" if fd_val > sg_val else "sg"
elif key in lower_is_better:
entry["winner"] = "fd" if fd_val < sg_val else "sg"
if key in HIGHER_IS_BETTER:
entry["winner"] = max(per_fw_val, key=per_fw_val.get)
elif key in LOWER_IS_BETTER:
entry["winner"] = min(per_fw_val, key=per_fw_val.get)
else:
entry["winner"] = "tie"

Expand All @@ -184,40 +227,65 @@ def compute_comparison(fd_metrics, sg_metrics):

def main():
parser = argparse.ArgumentParser(description="从 benchmark 结果提取指标并生成对比 JSON")
parser.add_argument("--fd-result", required=True, help="FastDeploy 结果文件路径")
parser.add_argument("--sg-result", required=True, help="SGLang 结果文件路径")
parser.add_argument("--fd-result", default=None, help="FastDeploy 结果文件路径")
parser.add_argument("--sg-result", default=None, help="SGLang 结果文件路径")
parser.add_argument("--vllm-result", default=None, help="vLLM 结果文件路径")
parser.add_argument("--model-path", required=True, help="模型权重目录路径")
parser.add_argument("--fd-config", default="{}", help="FD 部署配置 JSON 字符串")
parser.add_argument("--sg-config", default="{}", help="SG 部署配置 JSON 字符串")
parser.add_argument("--vllm-config", default="{}", help="vLLM 部署配置 JSON 字符串")
parser.add_argument(
"--baseline", default="sg", choices=FRAMEWORKS, help="对比基准框架(计算 diff_pct 用),默认 sg"
)
parser.add_argument("--output", default="metrics.json", help="输出 JSON 路径")
args = parser.parse_args()

print(f"[INFO] 解析 FD 结果: {args.fd_result}")
fd_metrics = parse_benchmark_result(args.fd_result)
print(f"[INFO] 解析 SG 结果: {args.sg_result}")
sg_metrics = parse_benchmark_result(args.sg_result)
# 至少需要一份结果
if not any([args.fd_result, args.sg_result, args.vllm_result]):
parser.error("至少需要提供 --fd-result / --sg-result / --vllm-result 中的一个")

result_paths = {
"fd": args.fd_result,
"sg": args.sg_result,
"vllm": args.vllm_result,
}
config_strs = {
"fd": args.fd_config,
"sg": args.sg_config,
"vllm": args.vllm_config,
}
framework_display = {"fd": "FastDeploy", "sg": "SGLang", "vllm": "vLLM"}

all_metrics = {}
for fw in FRAMEWORKS:
path = result_paths[fw]
if path:
print(f"[INFO] 解析 {framework_display[fw]} 结果: {path}")
all_metrics[fw] = parse_benchmark_result(path)
else:
all_metrics[fw] = {}

print(f"[INFO] 读取模型信息: {args.model_path}")
model_info = get_model_info(args.model_path)

print("[INFO] 计算对比指标...")
comparison = compute_comparison(fd_metrics, sg_metrics)
print(f"[INFO] 计算对比指标 (baseline={args.baseline})...")
comparison = compute_comparison(all_metrics, baseline=args.baseline)

# 解析部署配置
fd_config = json.loads(args.fd_config) if args.fd_config else {}
sg_config = json.loads(args.sg_config) if args.sg_config else {}
configs = {}
for fw in FRAMEWORKS:
try:
configs[fw] = json.loads(config_strs[fw]) if config_strs[fw] else {}
except json.JSONDecodeError as e:
print(f"[WARN] 解析 --{fw}-config 失败: {e}", file=sys.stderr)
configs[fw] = {}

output = {
"model": model_info,
"config": {
"fd": fd_config,
"sg": sg_config,
},
"raw_metrics": {
"fd": fd_metrics,
"sg": sg_metrics,
},
"config": configs,
"raw_metrics": all_metrics,
"comparison": comparison,
"baseline": args.baseline,
}

with open(args.output, "w") as f:
Expand All @@ -236,14 +304,29 @@ def main():
"mean_decode",
"benchmark_duration",
]
active = [fw for fw in FRAMEWORKS if all_metrics.get(fw)]
if not active:
print("[WARN] 没有任何有效的结果数据")
return

print("\n========== 核心指标摘要 ==========")
print(f"{'Metric':<30} {'FD':>12} {'SG':>12} {'Diff%':>8} {'Winner':>8}")
print("-" * 72)
header = f"{'Metric':<30}"
for fw in active:
header += f" {framework_display[fw]:>12}"
header += f" {'Winner':>10}"
print(header)
print("-" * len(header))
for key in key_metrics:
if key in comparison:
c = comparison[key]
print(f"{key:<30} {c['fd']:>12.2f} {c['sg']:>12.2f} {c['diff_pct']:>+7.1f}% {c['winner']:>8}")
print("=" * 72)
if key not in comparison:
continue
c = comparison[key]
line = f"{key:<30}"
for fw in active:
val = c.get(fw)
line += f" {val:>12.2f}" if isinstance(val, (int, float)) else f" {'-':>12}"
line += f" {c.get('winner', '-'):>10}"
print(line)
print("=" * len(header))


if __name__ == "__main__":
Expand Down
Loading
Loading