From ae2405cb27e5854042b99b8298c371f9a42fcefa Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 29 Mar 2026 10:43:52 -0400 Subject: [PATCH] Update safe_eval config to gigaflow-matching evaluation settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Safe eval now uses: - 50 agents (Na=50), 9000-step episodes (600s at dt=0.066) - δgoal=10m, vgoal=3 m/s - αcollision=3.0, αboundary=3.0, αcomfort=0.05 - αl-align=0.025, αvel-align=1.0, αl-center=0.0038 - αcenter-bias=0.0, αvelocity=0.0025 - αreverse=0.005, αstop-line=1.0, αtimestep=2.5e-5 Also fix generate_safe_eval_ini to forward goal_radius, dt, min_goal_speed, max_goal_speed to the env config (previously only episode_length, num_agents, distances, map settings were forwarded). --- pufferlib/config/ocean/drive.ini | 46 +++++++++++++++++++++----------- pufferlib/utils.py | 13 ++++++++- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 77e474f86..e9ab9c3ed 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -228,36 +228,50 @@ human_replay_num_agents = 16 enabled = True ; How often to run safe eval (in training epochs). Defaults to render_interval. interval = 250 -; Number of agents to run in the eval environment -num_agents = 64 +; Number of agents to run in the eval environment (Na=50) +num_agents = 50 ; Number of episodes to collect metrics over num_episodes = 100 -; episode length -episode_length = 1000 +; Episode length: 9000 steps = 600s at dt=0.066 +episode_length = 9000 ; Map directory and count for safe eval (independent of training maps) map_dir = "resources/drive/binaries/carla_2D" num_maps = 8 min_goal_distance = 0.5 max_goal_distance = 1000.0 +; dt=0.066 so 9000 steps = 600s +dt = 0.066 +; vgoal = 3 m/s +min_goal_speed = -0.01 +max_goal_speed = 3.0 -; Reward conditioning values (min=max to fix the value). -; Names match the env reward_bound_* keys. -; High penalties for unsafe behavior +; Gigaflow-matching reward conditioning values (min=max to fix the value). +; αcollision = 3.0 collision = -3.0 +; αboundary = 3.0 offroad = -3.0 -overspeed = -1.0 +; αstop-line = 1.0 traffic_light = -1.0 -reverse = -0.0075 -comfort = -0.1 - -; Standard driving rewards -goal_radius = 2.0 +overspeed = -1.0 +; αreverse = 5.0e-3 +reverse = -0.005 +; αcomfort = 0.05 +comfort = -0.05 + +; δgoal = 10m +goal_radius = 10.0 +; αl-align = 2.5e-2 lane_align = 0.025 -lane_center = -0.00075 -velocity = 0.005 +; αl-center = 3.8e-3 +lane_center = -0.0038 +; αvelocity = 2.5e-3 +velocity = 0.0025 +; αcenter-bias = 0.0 center_bias = 0.0 +; αvel-align = 1.0 vel_align = 1.0 -timestep = -0.00005 +; αtimestep = 2.5e-5 +timestep = -0.000025 ; Neutral scaling factors throttle = 1.0 diff --git a/pufferlib/utils.py b/pufferlib/utils.py index 9eda5827e..2ec4f2a87 100644 --- a/pufferlib/utils.py +++ b/pufferlib/utils.py @@ -42,7 +42,18 @@ def generate_safe_eval_ini(safe_eval_config, base_ini_path="pufferlib/config/oce "reward_conditioning": 1, "resample_frequency": 0, } - for key in ["episode_length", "num_agents", "min_goal_distance", "max_goal_distance", "map_dir", "num_maps"]: + for key in [ + "episode_length", + "num_agents", + "min_goal_distance", + "max_goal_distance", + "map_dir", + "num_maps", + "goal_radius", + "dt", + "min_goal_speed", + "max_goal_speed", + ]: if key in safe_eval_config: env_overrides[key] = safe_eval_config[key]