Skip to content

Commit f7a21c8

Browse files
committed
changed for infaas in system metrics
1 parent df32fa9 commit f7a21c8

1 file changed

Lines changed: 69 additions & 27 deletions

File tree

vidur/metrics/system_metrics.py

Lines changed: 69 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
loader utilities plus pure functions that compute aggregate metrics. It is
66
purpose-built for comparing two scheduler stacks:
77
- Llumnix (global) + Llumlet (replica)
8-
- LOR (global) + vLLM (replica)
8+
- INFaaS (global) + vLLM (replica)
99
1010
Metrics computed per run:
1111
- end-to-end latency (mean, p99)
@@ -16,7 +16,7 @@
1616
- resource usage (average instance count) and cost vs latency target
1717
- optional priority-aware slices (mean/p99 for highest-priority requests)
1818
19-
Comparison helpers then compute speedups (LOR→Llumnix) so results line up with
19+
Comparison helpers then compute speedups (INFaaS→Llumnix) so results line up with
2020
the Llumnix paper reporting style.
2121
"""
2222

@@ -79,8 +79,12 @@ def _load_batch_metrics(run_dir: Path) -> pd.DataFrame:
7979

8080
def _load_request_df(run_dir: Path) -> pd.DataFrame:
8181
chrome_trace_path = run_dir / "chrome_trace.json"
82-
trace_events = la._load_trace_events(chrome_trace_path) if chrome_trace_path.exists() else []
83-
request_priorities = la._extract_request_priorities(trace_events) if trace_events else {}
82+
trace_events = (
83+
la._load_trace_events(chrome_trace_path) if chrome_trace_path.exists() else []
84+
)
85+
request_priorities = (
86+
la._extract_request_priorities(trace_events) if trace_events else {}
87+
)
8488
return la._load_request_metrics(run_dir, request_priorities)
8589

8690

@@ -116,7 +120,9 @@ def _fragmentation_metrics(batch_df: pd.DataFrame, config: Dict) -> Dict[str, ob
116120
return {"avg_fragmentation": None, "series": pd.DataFrame()}
117121

118122
sched_cfg = config.get("cluster_config", {}).get("replica_scheduler_config", {})
119-
block_size = sched_cfg.get("block_size") or config.get("cluster_config", {}).get("replica_config", {}).get("block_size")
123+
block_size = sched_cfg.get("block_size") or config.get("cluster_config", {}).get(
124+
"replica_config", {}
125+
).get("block_size")
120126
num_blocks = sched_cfg.get("num_blocks")
121127

122128
if not block_size or not num_blocks:
@@ -137,7 +143,9 @@ def _fragmentation_metrics(batch_df: pd.DataFrame, config: Dict) -> Dict[str, ob
137143
return {"avg_fragmentation": _safe_mean(frag_series), "series": series_df}
138144

139145

140-
def _resource_usage(config: Dict, latency_target: Optional[float]) -> Dict[str, Optional[float]]:
146+
def _resource_usage(
147+
config: Dict, latency_target: Optional[float]
148+
) -> Dict[str, Optional[float]]:
141149
cluster_cfg = config.get("cluster_config", {})
142150
replica_cfg = cluster_cfg.get("replica_config", {})
143151
num_replicas = cluster_cfg.get("num_replicas") or 0
@@ -162,7 +170,9 @@ def _resource_usage(config: Dict, latency_target: Optional[float]) -> Dict[str,
162170
}
163171

164172

165-
def _priority_slice_metrics(request_df: pd.DataFrame, column: str) -> Dict[str, Optional[float]]:
173+
def _priority_slice_metrics(
174+
request_df: pd.DataFrame, column: str
175+
) -> Dict[str, Optional[float]]:
166176
if column not in request_df.columns or "priority" not in request_df.columns:
167177
return {"mean": None, "p99": None}
168178
high_prio = request_df["priority"].max()
@@ -175,15 +185,22 @@ def _priority_slice_metrics(request_df: pd.DataFrame, column: str) -> Dict[str,
175185
}
176186

177187

178-
def compute_run_metrics(run_dir: Path, system: str, name: Optional[str] = None, latency_target: Optional[float] = None) -> Tuple[RunData, Dict]:
188+
def compute_run_metrics(
189+
run_dir: Path,
190+
system: str,
191+
name: Optional[str] = None,
192+
latency_target: Optional[float] = None,
193+
) -> Tuple[RunData, Dict]:
179194
"""Load a single run directory and compute aggregate metrics."""
180195
request_df = _load_request_df(run_dir)
181196
batch_df = _load_batch_metrics(run_dir)
182197
config = _load_config(run_dir)
183198

184199
latency = _latency_stats(request_df, "request_e2e_time")
185200
prefill = _latency_stats(request_df, "prefill_e2e_time")
186-
decode = _latency_stats(request_df, "decode_time_execution_plus_preemption_normalized")
201+
decode = _latency_stats(
202+
request_df, "decode_time_execution_plus_preemption_normalized"
203+
)
187204
preemption = _preemption_metrics(request_df)
188205
fragmentation = _fragmentation_metrics(batch_df, config)
189206
resource = _resource_usage(config, latency_target or latency["p99"])
@@ -221,39 +238,64 @@ def _speedup(baseline: Optional[float], contender: Optional[float]) -> Optional[
221238
return float(baseline / contender)
222239

223240

224-
def compare_runs(llumnix_metrics: Dict, lor_metrics: Dict) -> Dict[str, Optional[float]]:
241+
def compare_runs(
242+
llumnix_metrics: Dict, infaas_metrics: Dict
243+
) -> Dict[str, Optional[float]]:
225244
"""
226-
Compute speedups using LOR as baseline and Llumnix as contender.
245+
Compute speedups using INFaaS as baseline and Llumnix as contender.
227246
Speedup > 1.0 means Llumnix is faster.
228247
"""
229248

230249
return {
231-
"e2e_mean_speedup": _speedup(lor_metrics["latency"]["mean"], llumnix_metrics["latency"]["mean"]),
232-
"e2e_p99_speedup": _speedup(lor_metrics["latency"]["p99"], llumnix_metrics["latency"]["p99"]),
233-
"prefill_mean_speedup": _speedup(lor_metrics["prefill"]["mean"], llumnix_metrics["prefill"]["mean"]),
234-
"prefill_p99_speedup": _speedup(lor_metrics["prefill"]["p99"], llumnix_metrics["prefill"]["p99"]),
235-
"decode_mean_ratio": _speedup(lor_metrics["decode"]["mean"], llumnix_metrics["decode"]["mean"]),
236-
"decode_p99_ratio": _speedup(lor_metrics["decode"]["p99"], llumnix_metrics["decode"]["p99"]),
250+
"e2e_mean_speedup": _speedup(
251+
infaas_metrics["latency"]["mean"], llumnix_metrics["latency"]["mean"]
252+
),
253+
"e2e_p99_speedup": _speedup(
254+
infaas_metrics["latency"]["p99"], llumnix_metrics["latency"]["p99"]
255+
),
256+
"prefill_mean_speedup": _speedup(
257+
infaas_metrics["prefill"]["mean"], llumnix_metrics["prefill"]["mean"]
258+
),
259+
"prefill_p99_speedup": _speedup(
260+
infaas_metrics["prefill"]["p99"], llumnix_metrics["prefill"]["p99"]
261+
),
262+
"decode_mean_ratio": _speedup(
263+
infaas_metrics["decode"]["mean"], llumnix_metrics["decode"]["mean"]
264+
),
265+
"decode_p99_ratio": _speedup(
266+
infaas_metrics["decode"]["p99"], llumnix_metrics["decode"]["p99"]
267+
),
237268
"preemption_rate_delta": None
238-
if lor_metrics["preemption"]["rate"] is None or llumnix_metrics["preemption"]["rate"] is None
239-
else float(lor_metrics["preemption"]["rate"] - llumnix_metrics["preemption"]["rate"]),
269+
if infaas_metrics["preemption"]["rate"] is None
270+
or llumnix_metrics["preemption"]["rate"] is None
271+
else float(
272+
infaas_metrics["preemption"]["rate"] - llumnix_metrics["preemption"]["rate"]
273+
),
240274
"preemption_loss_delta": None
241-
if lor_metrics["preemption"]["loss"] is None or llumnix_metrics["preemption"]["loss"] is None
242-
else float(lor_metrics["preemption"]["loss"] - llumnix_metrics["preemption"]["loss"]),
275+
if infaas_metrics["preemption"]["loss"] is None
276+
or llumnix_metrics["preemption"]["loss"] is None
277+
else float(
278+
infaas_metrics["preemption"]["loss"] - llumnix_metrics["preemption"]["loss"]
279+
),
243280
"fragmentation_delta": None
244-
if lor_metrics["fragmentation"]["avg"] is None or llumnix_metrics["fragmentation"]["avg"] is None
245-
else float(lor_metrics["fragmentation"]["avg"] - llumnix_metrics["fragmentation"]["avg"]),
281+
if infaas_metrics["fragmentation"]["avg"] is None
282+
or llumnix_metrics["fragmentation"]["avg"] is None
283+
else float(
284+
infaas_metrics["fragmentation"]["avg"]
285+
- llumnix_metrics["fragmentation"]["avg"]
286+
),
246287
"cost_ratio": _speedup(
247-
lor_metrics["resource"]["run_cost"], llumnix_metrics["resource"]["run_cost"]
288+
infaas_metrics["resource"]["run_cost"],
289+
llumnix_metrics["resource"]["run_cost"],
248290
),
249291
"cost_per_latency_ratio": _speedup(
250-
lor_metrics["resource"]["cost_vs_latency_target"],
292+
infaas_metrics["resource"]["cost_vs_latency_target"],
251293
llumnix_metrics["resource"]["cost_vs_latency_target"],
252294
),
253295
"priority_mean_speedup": _speedup(
254-
lor_metrics["priority"]["mean"], llumnix_metrics["priority"]["mean"]
296+
infaas_metrics["priority"]["mean"], llumnix_metrics["priority"]["mean"]
255297
),
256298
"priority_p99_speedup": _speedup(
257-
lor_metrics["priority"]["p99"], llumnix_metrics["priority"]["p99"]
299+
infaas_metrics["priority"]["p99"], llumnix_metrics["priority"]["p99"]
258300
),
259301
}

0 commit comments

Comments
 (0)