55loader utilities plus pure functions that compute aggregate metrics. It is
66purpose-built for comparing two scheduler stacks:
77 - Llumnix (global) + Llumlet (replica)
8- - LOR (global) + vLLM (replica)
8+ - INFaaS (global) + vLLM (replica)
99
1010Metrics computed per run:
1111 - end-to-end latency (mean, p99)
1616 - resource usage (average instance count) and cost vs latency target
1717 - optional priority-aware slices (mean/p99 for highest-priority requests)
1818
19- Comparison helpers then compute speedups (LOR →Llumnix) so results line up with
19+ Comparison helpers then compute speedups (INFaaS →Llumnix) so results line up with
2020the Llumnix paper reporting style.
2121"""
2222
@@ -79,8 +79,12 @@ def _load_batch_metrics(run_dir: Path) -> pd.DataFrame:
7979
8080def _load_request_df (run_dir : Path ) -> pd .DataFrame :
8181 chrome_trace_path = run_dir / "chrome_trace.json"
82- trace_events = la ._load_trace_events (chrome_trace_path ) if chrome_trace_path .exists () else []
83- request_priorities = la ._extract_request_priorities (trace_events ) if trace_events else {}
82+ trace_events = (
83+ la ._load_trace_events (chrome_trace_path ) if chrome_trace_path .exists () else []
84+ )
85+ request_priorities = (
86+ la ._extract_request_priorities (trace_events ) if trace_events else {}
87+ )
8488 return la ._load_request_metrics (run_dir , request_priorities )
8589
8690
@@ -116,7 +120,9 @@ def _fragmentation_metrics(batch_df: pd.DataFrame, config: Dict) -> Dict[str, ob
116120 return {"avg_fragmentation" : None , "series" : pd .DataFrame ()}
117121
118122 sched_cfg = config .get ("cluster_config" , {}).get ("replica_scheduler_config" , {})
119- block_size = sched_cfg .get ("block_size" ) or config .get ("cluster_config" , {}).get ("replica_config" , {}).get ("block_size" )
123+ block_size = sched_cfg .get ("block_size" ) or config .get ("cluster_config" , {}).get (
124+ "replica_config" , {}
125+ ).get ("block_size" )
120126 num_blocks = sched_cfg .get ("num_blocks" )
121127
122128 if not block_size or not num_blocks :
@@ -137,7 +143,9 @@ def _fragmentation_metrics(batch_df: pd.DataFrame, config: Dict) -> Dict[str, ob
137143 return {"avg_fragmentation" : _safe_mean (frag_series ), "series" : series_df }
138144
139145
140- def _resource_usage (config : Dict , latency_target : Optional [float ]) -> Dict [str , Optional [float ]]:
146+ def _resource_usage (
147+ config : Dict , latency_target : Optional [float ]
148+ ) -> Dict [str , Optional [float ]]:
141149 cluster_cfg = config .get ("cluster_config" , {})
142150 replica_cfg = cluster_cfg .get ("replica_config" , {})
143151 num_replicas = cluster_cfg .get ("num_replicas" ) or 0
@@ -162,7 +170,9 @@ def _resource_usage(config: Dict, latency_target: Optional[float]) -> Dict[str,
162170 }
163171
164172
165- def _priority_slice_metrics (request_df : pd .DataFrame , column : str ) -> Dict [str , Optional [float ]]:
173+ def _priority_slice_metrics (
174+ request_df : pd .DataFrame , column : str
175+ ) -> Dict [str , Optional [float ]]:
166176 if column not in request_df .columns or "priority" not in request_df .columns :
167177 return {"mean" : None , "p99" : None }
168178 high_prio = request_df ["priority" ].max ()
@@ -175,15 +185,22 @@ def _priority_slice_metrics(request_df: pd.DataFrame, column: str) -> Dict[str,
175185 }
176186
177187
178- def compute_run_metrics (run_dir : Path , system : str , name : Optional [str ] = None , latency_target : Optional [float ] = None ) -> Tuple [RunData , Dict ]:
188+ def compute_run_metrics (
189+ run_dir : Path ,
190+ system : str ,
191+ name : Optional [str ] = None ,
192+ latency_target : Optional [float ] = None ,
193+ ) -> Tuple [RunData , Dict ]:
179194 """Load a single run directory and compute aggregate metrics."""
180195 request_df = _load_request_df (run_dir )
181196 batch_df = _load_batch_metrics (run_dir )
182197 config = _load_config (run_dir )
183198
184199 latency = _latency_stats (request_df , "request_e2e_time" )
185200 prefill = _latency_stats (request_df , "prefill_e2e_time" )
186- decode = _latency_stats (request_df , "decode_time_execution_plus_preemption_normalized" )
201+ decode = _latency_stats (
202+ request_df , "decode_time_execution_plus_preemption_normalized"
203+ )
187204 preemption = _preemption_metrics (request_df )
188205 fragmentation = _fragmentation_metrics (batch_df , config )
189206 resource = _resource_usage (config , latency_target or latency ["p99" ])
@@ -221,39 +238,64 @@ def _speedup(baseline: Optional[float], contender: Optional[float]) -> Optional[
221238 return float (baseline / contender )
222239
223240
224- def compare_runs (llumnix_metrics : Dict , lor_metrics : Dict ) -> Dict [str , Optional [float ]]:
241+ def compare_runs (
242+ llumnix_metrics : Dict , infaas_metrics : Dict
243+ ) -> Dict [str , Optional [float ]]:
225244 """
226- Compute speedups using LOR as baseline and Llumnix as contender.
245+ Compute speedups using INFaaS as baseline and Llumnix as contender.
227246 Speedup > 1.0 means Llumnix is faster.
228247 """
229248
230249 return {
231- "e2e_mean_speedup" : _speedup (lor_metrics ["latency" ]["mean" ], llumnix_metrics ["latency" ]["mean" ]),
232- "e2e_p99_speedup" : _speedup (lor_metrics ["latency" ]["p99" ], llumnix_metrics ["latency" ]["p99" ]),
233- "prefill_mean_speedup" : _speedup (lor_metrics ["prefill" ]["mean" ], llumnix_metrics ["prefill" ]["mean" ]),
234- "prefill_p99_speedup" : _speedup (lor_metrics ["prefill" ]["p99" ], llumnix_metrics ["prefill" ]["p99" ]),
235- "decode_mean_ratio" : _speedup (lor_metrics ["decode" ]["mean" ], llumnix_metrics ["decode" ]["mean" ]),
236- "decode_p99_ratio" : _speedup (lor_metrics ["decode" ]["p99" ], llumnix_metrics ["decode" ]["p99" ]),
250+ "e2e_mean_speedup" : _speedup (
251+ infaas_metrics ["latency" ]["mean" ], llumnix_metrics ["latency" ]["mean" ]
252+ ),
253+ "e2e_p99_speedup" : _speedup (
254+ infaas_metrics ["latency" ]["p99" ], llumnix_metrics ["latency" ]["p99" ]
255+ ),
256+ "prefill_mean_speedup" : _speedup (
257+ infaas_metrics ["prefill" ]["mean" ], llumnix_metrics ["prefill" ]["mean" ]
258+ ),
259+ "prefill_p99_speedup" : _speedup (
260+ infaas_metrics ["prefill" ]["p99" ], llumnix_metrics ["prefill" ]["p99" ]
261+ ),
262+ "decode_mean_ratio" : _speedup (
263+ infaas_metrics ["decode" ]["mean" ], llumnix_metrics ["decode" ]["mean" ]
264+ ),
265+ "decode_p99_ratio" : _speedup (
266+ infaas_metrics ["decode" ]["p99" ], llumnix_metrics ["decode" ]["p99" ]
267+ ),
237268 "preemption_rate_delta" : None
238- if lor_metrics ["preemption" ]["rate" ] is None or llumnix_metrics ["preemption" ]["rate" ] is None
239- else float (lor_metrics ["preemption" ]["rate" ] - llumnix_metrics ["preemption" ]["rate" ]),
269+ if infaas_metrics ["preemption" ]["rate" ] is None
270+ or llumnix_metrics ["preemption" ]["rate" ] is None
271+ else float (
272+ infaas_metrics ["preemption" ]["rate" ] - llumnix_metrics ["preemption" ]["rate" ]
273+ ),
240274 "preemption_loss_delta" : None
241- if lor_metrics ["preemption" ]["loss" ] is None or llumnix_metrics ["preemption" ]["loss" ] is None
242- else float (lor_metrics ["preemption" ]["loss" ] - llumnix_metrics ["preemption" ]["loss" ]),
275+ if infaas_metrics ["preemption" ]["loss" ] is None
276+ or llumnix_metrics ["preemption" ]["loss" ] is None
277+ else float (
278+ infaas_metrics ["preemption" ]["loss" ] - llumnix_metrics ["preemption" ]["loss" ]
279+ ),
243280 "fragmentation_delta" : None
244- if lor_metrics ["fragmentation" ]["avg" ] is None or llumnix_metrics ["fragmentation" ]["avg" ] is None
245- else float (lor_metrics ["fragmentation" ]["avg" ] - llumnix_metrics ["fragmentation" ]["avg" ]),
281+ if infaas_metrics ["fragmentation" ]["avg" ] is None
282+ or llumnix_metrics ["fragmentation" ]["avg" ] is None
283+ else float (
284+ infaas_metrics ["fragmentation" ]["avg" ]
285+ - llumnix_metrics ["fragmentation" ]["avg" ]
286+ ),
246287 "cost_ratio" : _speedup (
247- lor_metrics ["resource" ]["run_cost" ], llumnix_metrics ["resource" ]["run_cost" ]
288+ infaas_metrics ["resource" ]["run_cost" ],
289+ llumnix_metrics ["resource" ]["run_cost" ],
248290 ),
249291 "cost_per_latency_ratio" : _speedup (
250- lor_metrics ["resource" ]["cost_vs_latency_target" ],
292+ infaas_metrics ["resource" ]["cost_vs_latency_target" ],
251293 llumnix_metrics ["resource" ]["cost_vs_latency_target" ],
252294 ),
253295 "priority_mean_speedup" : _speedup (
254- lor_metrics ["priority" ]["mean" ], llumnix_metrics ["priority" ]["mean" ]
296+ infaas_metrics ["priority" ]["mean" ], llumnix_metrics ["priority" ]["mean" ]
255297 ),
256298 "priority_p99_speedup" : _speedup (
257- lor_metrics ["priority" ]["p99" ], llumnix_metrics ["priority" ]["p99" ]
299+ infaas_metrics ["priority" ]["p99" ], llumnix_metrics ["priority" ]["p99" ]
258300 ),
259301 }
0 commit comments