From ae2405cb27e5854042b99b8298c371f9a42fcefa Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Sun, 29 Mar 2026 10:43:52 -0400
Subject: [PATCH] Update safe_eval config to gigaflow-matching evaluation
 settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Safe eval now uses:
- 50 agents (Na=50), 9000-step episodes (600s at dt=0.066)
- δgoal=10m, vgoal=3 m/s
- αcollision=3.0, αboundary=3.0, αcomfort=0.05
- αl-align=0.025, αvel-align=1.0, αl-center=0.0038
- αcenter-bias=0.0, αvelocity=0.0025
- αreverse=0.005, αstop-line=1.0, αtimestep=2.5e-5

Also fix generate_safe_eval_ini to forward goal_radius, dt,
min_goal_speed, max_goal_speed to the env config (previously
only episode_length, num_agents, distances, map settings were
forwarded).
---
 pufferlib/config/ocean/drive.ini | 46 +++++++++++++++++++++-----------
 pufferlib/utils.py               | 13 ++++++++-
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 77e474f86..e9ab9c3ed 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -228,36 +228,50 @@ human_replay_num_agents = 16
 enabled = True
 ; How often to run safe eval (in training epochs). Defaults to render_interval.
 interval = 250
-; Number of agents to run in the eval environment
-num_agents = 64
+; Number of agents to run in the eval environment (Na=50)
+num_agents = 50
 ; Number of episodes to collect metrics over
 num_episodes = 100
-; episode length
-episode_length = 1000
+; Episode length: 9000 steps = 600s at dt=0.066
+episode_length = 9000
 ; Map directory and count for safe eval (independent of training maps)
 map_dir = "resources/drive/binaries/carla_2D"
 num_maps = 8
 min_goal_distance = 0.5
 max_goal_distance = 1000.0
+; dt=0.066 so 9000 steps = 600s
+dt = 0.066
+; vgoal = 3 m/s
+min_goal_speed = -0.01
+max_goal_speed = 3.0
 
-; Reward conditioning values (min=max to fix the value).
-; Names match the env reward_bound_* keys.
-; High penalties for unsafe behavior
+; Gigaflow-matching reward conditioning values (min=max to fix the value).
+; αcollision = 3.0
 collision = -3.0
+; αboundary = 3.0
 offroad = -3.0
-overspeed = -1.0
+; αstop-line = 1.0
 traffic_light = -1.0
-reverse = -0.0075
-comfort = -0.1
-
-; Standard driving rewards
-goal_radius = 2.0
+overspeed = -1.0
+; αreverse = 5.0e-3
+reverse = -0.005
+; αcomfort = 0.05
+comfort = -0.05
+
+; δgoal = 10m
+goal_radius = 10.0
+; αl-align = 2.5e-2
 lane_align = 0.025
-lane_center = -0.00075
-velocity = 0.005
+; αl-center = 3.8e-3
+lane_center = -0.0038
+; αvelocity = 2.5e-3
+velocity = 0.0025
+; αcenter-bias = 0.0
 center_bias = 0.0
+; αvel-align = 1.0
 vel_align = 1.0
-timestep = -0.00005
+; αtimestep = 2.5e-5
+timestep = -0.000025
 
 ; Neutral scaling factors
 throttle = 1.0
diff --git a/pufferlib/utils.py b/pufferlib/utils.py
index 9eda5827e..2ec4f2a87 100644
--- a/pufferlib/utils.py
+++ b/pufferlib/utils.py
@@ -42,7 +42,18 @@ def generate_safe_eval_ini(safe_eval_config, base_ini_path="pufferlib/config/oce
         "reward_conditioning": 1,
         "resample_frequency": 0,
     }
-    for key in ["episode_length", "num_agents", "min_goal_distance", "max_goal_distance", "map_dir", "num_maps"]:
+    for key in [
+        "episode_length",
+        "num_agents",
+        "min_goal_distance",
+        "max_goal_distance",
+        "map_dir",
+        "num_maps",
+        "goal_radius",
+        "dt",
+        "min_goal_speed",
+        "max_goal_speed",
+    ]:
         if key in safe_eval_config:
             env_overrides[key] = safe_eval_config[key]