Skip to content

Commit 6f17ca0

Browse files
ltiaofacebook-github-bot
authored andcommitted
Refactor get_trace and extend get_opt_trace_by_steps to MOO/constrained
Summary: We have a method `get_opt_trace_by_steps` that was used extensively during our Ax 1.0 benchmarking campaign. It duplicates the basic logic of `get_trace` but differs in that it operates along `(trial_index, MAP_KEY)` pairs and respects ordering by timestamp (i.e. chronological order). However, it is limited to single-objective unconstrained problems, and our current needs (multi-objective and/or constrained) have outgrown it. We reconcile the two by extracting three core building blocks of `get_trace`: 1. `_pivot_data_with_feasibility`: Pivots data to wide format with feasibility information and metric completeness checks. 2. `_compute_trace_values`: Computes per-observation trace values (hypervolume for MOO, objective value for SOO), with cumulative best support. 3. `_aggregate_and_cumulate_trace`: Aggregates values by groups and computes the cumulative best across groups. These are implemented in a more general way that respects arbitrary groupings and orderings. We then refactor `get_trace` (and its helpers `_prepare_data_for_trace` and `get_trace_by_arm_pull_from_data`) to use these building blocks, and leverage them in `get_opt_trace_by_steps` to extend its support to multi-objective and constrained problems. Additionally: - The timestamp-based sorting in `get_opt_trace_by_steps` is preserved, which is critical for correct cumulative hypervolume computation (without this, observations would be processed in `(trial_index, arm_name, MAP_KEY)` order instead of chronological order). - Tests are updated to replace `NotImplementedError` checks with actual MOO and constrained test cases that verify correctness of the new functionality. Differential Revision: D79581270
1 parent 8a706b8 commit 6f17ca0

4 files changed

Lines changed: 346 additions & 129 deletions

File tree

ax/benchmark/benchmark.py

Lines changed: 72 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,13 @@
5454
from ax.generation_strategy.generation_strategy import GenerationStrategy
5555
from ax.orchestration.orchestrator import Orchestrator
5656
from ax.service.utils.best_point import (
57+
_aggregate_and_cumulate_trace,
58+
_compute_trace_values,
59+
_pivot_data_with_feasibility,
5760
_prepare_data_for_trace,
5861
derelativize_opt_config,
5962
get_trace,
63+
is_row_feasible,
6064
)
6165
from ax.service.utils.best_point_mixin import BestPointMixin
6266
from ax.service.utils.orchestrator_options import OrchestratorOptions, TrialType
@@ -791,64 +795,80 @@ def get_opt_trace_by_steps(experiment: Experiment) -> npt.NDArray:
791795
that is in terms of steps, with one element added each time a step
792796
completes.
793797
798+
Supports single-objective, multi-objective, and constrained problems.
799+
For multi-objective problems, the trace is in terms of hypervolume.
800+
794801
Args:
795802
experiment: An experiment produced by `benchmark_replication`; it must
796803
have `BenchmarkTrialMetadata` (as produced by `BenchmarkRunner`) for
797804
each trial, and its data must have a "step" column.
798805
"""
799806
optimization_config = none_throws(experiment.optimization_config)
807+
full_df = experiment.lookup_data().full_df
800808

801-
if optimization_config.is_moo_problem:
802-
raise NotImplementedError(
803-
"Cumulative epochs only supported for single objective problems."
804-
)
805-
if len(optimization_config.outcome_constraints) > 0:
806-
raise NotImplementedError(
807-
"Cumulative epochs not supported for problems with outcome constraints."
808-
)
809+
full_df["row_feasible"] = is_row_feasible(
810+
df=full_df,
811+
optimization_config=optimization_config,
812+
# For the sake of this function, we only care about feasible trials. The
813+
# distinction between infeasible and undetermined is not important.
814+
undetermined_value=False,
815+
)
809816

810-
objective_name = optimization_config.objective.metric.name
811-
data = experiment.lookup_data()
812-
full_df = data.full_df
817+
# Pivot to wide format with feasibility
818+
df_wide = _pivot_data_with_feasibility(
819+
df=full_df,
820+
index=["trial_index", "arm_name", MAP_KEY],
821+
optimization_config=optimization_config,
822+
)
813823

814-
# Has timestamps; needs to be merged with full_df because it contains
815-
# data on epochs that didn't actually run due to early stopping, and we need
816-
# to know which actually ran
817-
def _get_df(trial: Trial) -> pd.DataFrame:
824+
def _get_timestamps(experiment: Experiment) -> pd.Series:
818825
"""
819-
Get the (virtual) time each epoch finished at.
826+
Get the (virtual) time at which each training progression finished.
820827
"""
821-
metadata = trial.run_metadata["benchmark_metadata"]
822-
backend_simulator = none_throws(metadata.backend_simulator)
823-
# Data for the first metric, which is the only metric
824-
df = next(iter(metadata.dfs.values()))
825-
start_time = backend_simulator.get_sim_trial_by_index(
826-
trial.index
827-
).sim_start_time
828-
df["time"] = df["virtual runtime"] + start_time
829-
return df
830-
831-
with_timestamps = pd.concat(
832-
(
833-
_get_df(trial=assert_is_instance(trial, Trial))
834-
for trial in experiment.trials.values()
835-
),
836-
axis=0,
837-
ignore_index=True,
838-
)[["trial_index", MAP_KEY, "time"]]
839-
840-
df = (
841-
full_df.loc[
842-
full_df["metric_name"] == objective_name,
843-
["trial_index", "arm_name", "mean", MAP_KEY],
844-
]
845-
.merge(with_timestamps, how="left")
846-
.sort_values("time", ignore_index=True)
828+
frames = []
829+
for trial in experiment.trials.values():
830+
trial = assert_is_instance(trial, Trial)
831+
metadata = trial.run_metadata["benchmark_metadata"]
832+
backend_simulator = none_throws(metadata.backend_simulator)
833+
sim_trial = backend_simulator.get_sim_trial_by_index(
834+
trial_index=trial.index
835+
)
836+
start_time = sim_trial.sim_start_time
837+
# timestamps are identical across all metrics, so just use the first one
838+
frame = next(iter(metadata.dfs.values())).copy()
839+
frame["time"] = frame["virtual runtime"] + start_time
840+
frames.append(frame)
841+
df = pd.concat(frames, axis=0, ignore_index=True).set_index(
842+
["trial_index", "arm_name", MAP_KEY]
843+
)
844+
return df["time"]
845+
846+
# Compute timestamps and join with df_wide *before* cumulative computations.
847+
# This is critical because cumulative HV/objective calculations depend on
848+
# the temporal ordering of observations.
849+
timestamps = _get_timestamps(experiment=experiment)
850+
851+
# Merge timestamps and sort by time before cumulative computations
852+
df_wide = df_wide.join(
853+
timestamps, on=["trial_index", "arm_name", MAP_KEY], how="left"
854+
).sort_values(by="time", ascending=True, ignore_index=True)
855+
856+
# Compute per-evaluation (trial_index, MAP_KEY) cumulative values,
857+
# with keep_order=True to preserve ordering by timestamp
858+
df_wide["value"], maximize = _compute_trace_values(
859+
df_wide=df_wide,
860+
optimization_config=optimization_config,
861+
use_cumulative_best=True,
847862
)
848-
return (
849-
df["mean"].cummin()
850-
if optimization_config.objective.minimize
851-
else df["mean"].cummax()
863+
# Get a value for each (trial_index, arm_name, MAP_KEY) tuple
864+
value_by_arm_pull = df_wide[["trial_index", "arm_name", MAP_KEY, "value"]]
865+
866+
# Aggregate by trial and step, then compute cumulative best
867+
return _aggregate_and_cumulate_trace(
868+
df=value_by_arm_pull,
869+
by=["trial_index", MAP_KEY],
870+
maximize=maximize,
871+
keep_order=True,
852872
).to_numpy()
853873

854874

@@ -867,14 +887,15 @@ def get_benchmark_result_with_cumulative_steps(
867887
opt_trace = get_opt_trace_by_steps(experiment=experiment)
868888
return replace(
869889
result,
870-
optimization_trace=opt_trace,
871-
cost_trace=np.arange(1, len(opt_trace) + 1, dtype=int),
890+
optimization_trace=opt_trace.tolist(),
891+
cost_trace=np.arange(1, len(opt_trace) + 1, dtype=int).tolist(),
872892
# Empty
873-
oracle_trace=np.full(len(opt_trace), np.nan),
874-
inference_trace=np.full(len(opt_trace), np.nan),
893+
oracle_trace=np.full_like(opt_trace, np.nan).tolist(),
894+
inference_trace=np.full_like(opt_trace, np.nan).tolist(),
895+
is_feasible_trace=None,
875896
score_trace=compute_score_trace(
876897
optimization_trace=opt_trace,
877898
baseline_value=baseline_value,
878899
optimal_value=optimal_value,
879-
),
900+
).tolist(),
880901
)

ax/benchmark/testing/benchmark_stubs.py

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -312,15 +312,52 @@ def get_async_benchmark_problem(
312312
n_steps: int = 1,
313313
lower_is_better: bool = False,
314314
report_inference_value_as_trace: bool = False,
315+
num_objectives: int = 1,
316+
num_constraints: int = 0,
315317
) -> BenchmarkProblem:
318+
"""
319+
Create an early-stopping benchmark problem with MAP_KEY data.
320+
321+
Args:
322+
map_data: Whether to use map metrics (required for early stopping).
323+
step_runtime_fn: Optional runtime function for steps.
324+
n_steps: Number of steps per trial.
325+
lower_is_better: Whether lower values are better (for SOO).
326+
report_inference_value_as_trace: Whether to report inference trace.
327+
num_objectives: Number of objectives (1 for SOO, >1 for MOO).
328+
num_constraints: Number of outcome constraints to add.
329+
330+
Returns:
331+
A BenchmarkProblem suitable for early-stopping evaluation.
332+
"""
316333
search_space = get_discrete_search_space()
317-
test_function = IdentityTestFunction(n_steps=n_steps)
318-
optimization_config = get_soo_opt_config(
319-
outcome_names=["objective"],
320-
use_map_metric=map_data,
321-
observe_noise_sd=True,
322-
lower_is_better=lower_is_better,
323-
)
334+
335+
# Create outcome names for objectives and constraints
336+
objective_names = [f"objective_{i}" for i in range(num_objectives)]
337+
constraint_names = [f"constraint_{i}" for i in range(num_constraints)]
338+
outcome_names = [*objective_names, *constraint_names]
339+
340+
test_function = IdentityTestFunction(n_steps=n_steps, outcome_names=outcome_names)
341+
342+
if num_objectives == 1:
343+
# Single-objective: first outcome is objective, rest are constraints
344+
optimization_config = get_soo_opt_config(
345+
outcome_names=outcome_names,
346+
lower_is_better=lower_is_better,
347+
observe_noise_sd=True,
348+
use_map_metric=map_data,
349+
)
350+
else:
351+
# Multi-objective: pass all outcomes (objectives + constraints)
352+
# get_moo_opt_config will use the last num_constraints as constraints
353+
optimization_config = get_moo_opt_config(
354+
outcome_names=outcome_names,
355+
ref_point=[1.0] * num_objectives,
356+
num_constraints=num_constraints,
357+
lower_is_better=lower_is_better,
358+
observe_noise_sd=True,
359+
use_map_metric=map_data,
360+
)
324361

325362
return BenchmarkProblem(
326363
name="test",
@@ -330,6 +367,7 @@ def get_async_benchmark_problem(
330367
num_trials=4,
331368
baseline_value=19 if lower_is_better else 0,
332369
optimal_value=0 if lower_is_better else 19,
370+
worst_feasible_value=5.0 if num_constraints > 0 else None,
333371
step_runtime_function=step_runtime_fn,
334372
report_inference_value_as_trace=report_inference_value_as_trace,
335373
)

ax/benchmark/tests/test_benchmark.py

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1195,28 +1195,85 @@ def test_get_opt_trace_by_cumulative_epochs(self) -> None:
11951195
new_opt_trace = get_opt_trace_by_steps(experiment=experiment)
11961196
self.assertEqual(list(new_opt_trace), [0.0, 0.0, 1.0, 1.0, 2.0, 3.0])
11971197

1198-
method = get_sobol_benchmark_method()
1199-
with self.subTest("MOO"):
1200-
problem = get_multi_objective_benchmark_problem()
1201-
1198+
with self.subTest("Multi-objective"):
1199+
# Multi-objective problem with step data
1200+
problem = get_async_benchmark_problem(
1201+
map_data=True,
1202+
n_steps=5,
1203+
num_objectives=2,
1204+
# Ensure we don't have two finishing at the same time, for
1205+
# determinism
1206+
step_runtime_fn=lambda params: params["x0"] * (1 - 0.01 * params["x0"]),
1207+
)
12021208
experiment = self.run_optimization_with_orchestrator(
12031209
problem=problem, method=method, seed=0
12041210
)
1205-
with self.assertRaisesRegex(
1206-
NotImplementedError, "only supported for single objective"
1207-
):
1208-
get_opt_trace_by_steps(experiment=experiment)
1211+
new_opt_trace = get_opt_trace_by_steps(experiment=experiment)
1212+
self.assertListEqual(
1213+
new_opt_trace.tolist(),
1214+
[
1215+
0.0,
1216+
0.0,
1217+
0.0,
1218+
0.0,
1219+
0.0,
1220+
0.0,
1221+
0.0,
1222+
1.0,
1223+
1.0,
1224+
1.0,
1225+
1.0,
1226+
1.0,
1227+
1.0,
1228+
4.0,
1229+
4.0,
1230+
4.0,
1231+
4.0,
1232+
4.0,
1233+
4.0,
1234+
4.0,
1235+
],
1236+
)
12091237

12101238
with self.subTest("Constrained"):
1211-
problem = get_benchmark_problem("constrained_gramacy_observed_noise")
1239+
# Constrained problem with step data.
1240+
problem = get_async_benchmark_problem(
1241+
map_data=True,
1242+
n_steps=5,
1243+
num_constraints=1,
1244+
# Ensure we don't have two finishing at the same time, for
1245+
# determinism
1246+
step_runtime_fn=lambda params: params["x0"] * (1 - 0.01 * params["x0"]),
1247+
)
12121248
experiment = self.run_optimization_with_orchestrator(
12131249
problem=problem, method=method, seed=0
12141250
)
1215-
with self.assertRaisesRegex(
1216-
NotImplementedError,
1217-
"not supported for problems with outcome constraints",
1218-
):
1219-
get_opt_trace_by_steps(experiment=experiment)
1251+
new_opt_trace = get_opt_trace_by_steps(experiment=experiment)
1252+
self.assertListEqual(
1253+
new_opt_trace.tolist(),
1254+
[
1255+
0.0,
1256+
0.0,
1257+
0.0,
1258+
0.0,
1259+
0.0,
1260+
1.0,
1261+
1.0,
1262+
2.0,
1263+
2.0,
1264+
2.0,
1265+
2.0,
1266+
2.0,
1267+
2.0,
1268+
3.0,
1269+
3.0,
1270+
3.0,
1271+
3.0,
1272+
3.0,
1273+
3.0,
1274+
3.0,
1275+
],
1276+
)
12201277

12211278
def test_get_benchmark_result_with_cumulative_steps(self) -> None:
12221279
"""See test_get_opt_trace_by_cumulative_epochs for more info."""

0 commit comments

Comments
 (0)