Emerge-Lab · m2kulkarni · Feb 24, 2026 · Feb 24, 2026 · Feb 26, 2026 · Feb 26, 2026
diff --git a/pufferlib/config/ocean/adaptive.ini b/pufferlib/config/ocean/adaptive.ini
@@ -2,18 +2,20 @@
 package = ocean
 env_name = puffer_adaptive_drive
 policy_name = Drive
-transformer_name = Transformer
- ; Changed from rnn_name
+rnn_name = Recurrent
 
 [vec]
 num_workers = 16
 num_envs = 16
-batch_size = 2
+batch_size = 1
 ; backend = Serial
 
 [policy]
-input_size = 128
-; Increased from 64 for richer representations
+input_size = 64
+hidden_size = 256
+
+[rnn]
+input_size = 256
 hidden_size = 256
 
 [transformer]
@@ -29,8 +31,8 @@ dropout = 0.0
 ; Dropout (keep at 0 for RL stability initially)
 
 [env]
-num_agents = 1512
-num_ego_agents = 756
+num_agents = 1024
+num_ego_agents = 512
 ; Options: discrete, continuous
 action_type = discrete
 ; Options: classic, jerk
@@ -120,7 +122,7 @@ minibatch_size = 36400
 ; 400 * 91
 max_minibatch_size = 36400
 minibatch_multiplier = 400
-policy_architecture = Transformer
+policy_architecture = Recurrent
 ; Matches scenario_length for buffer organization
 bptt_horizon = 32
 ; Keep for backward compatibility
@@ -193,6 +195,8 @@ human_replay_num_agents = 32
 human_replay_num_rollouts = 100
 ; Number of maps to use for human replay evaluation
 human_replay_num_maps = 100
+; Number of maps to render for human replay (subset of eval maps)
+human_replay_render_num_maps = 3
 
 [sweep.train.learning_rate]
 distribution = log_normal

diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c
@@ -34,20 +34,18 @@ void test_drivenet() {
 void demo() {
 
     // Note: The settings below are hardcoded for demo purposes. Since the policy was
-    // trained with these exact settings, that changing them may lead to
-    // weird behavior.
+    // trained with these exact settings, changing them may lead to weird behavior.
     Drive env = {
         .human_agent_idx = 0,
-        .dynamics_model = conf.dynamics_model,
-        .reward_vehicle_collision = conf.reward_vehicle_collision,
-        .reward_offroad_collision = conf.reward_offroad_collision,
-        .reward_ade = conf.reward_ade,
-        .goal_radius = conf.goal_radius,
-        .dt = conf.dt,
+        .dynamics_model = CLASSIC,
+        .reward_vehicle_collision = -1.0f,
+        .reward_offroad_collision = -1.0f,
+        .goal_radius = 2.0f,
+        .dt = 0.1f,
         .map_name = "resources/drive/binaries/training/map_000.bin",
-        .init_steps = conf.init_steps,
-        .collision_behavior = conf.collision_behavior,
-        .offroad_behavior = conf.offroad_behavior,
+        .init_steps = 0,
+        .collision_behavior = 0,
+        .offroad_behavior = 0,
     };
     allocate(&env);
     c_reset(&env);