From f41a84d59d6fcb605e472c9a6f3c326253a9124a Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugene@percepta.ai>
Date: Mon, 16 Mar 2026 23:42:57 +0000
Subject: [PATCH 1/8] Randomize agent positions on every respawn in variable
 agent mode

Previously agents always reset to their initial spawn position. Now in
INIT_VARIABLE_AGENT_NUMBER mode, both mid-episode respawns and full
episode resets pick a new random collision-free position on a drivable
lane via length-weighted sampling.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 pufferlib/ocean/drive/drive.h | 120 +++++++++++++++++++++++++++++++---
 1 file changed, 111 insertions(+), 9 deletions(-)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index fc4aa2bfc1..2ead0deb73 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -2824,16 +2824,110 @@ void compute_observations(Drive *env) {
     }
 }
 
+// Find a random collision-free position on a drivable lane for an existing agent.
+// Returns true if a valid position was found and updates the agent's sim_x/y/z/heading.
+static bool randomize_agent_position(Drive *env, int agent_idx) {
+    Agent *agent = &env->agents[agent_idx];
+
+    // Pre-compute drivable lanes
+    int drivable_lanes[env->num_roads];
+    float lane_lengths[env->num_roads];
+    int num_drivable = 0;
+    float total_lane_length = 0.0f;
+    for (int i = 0; i < env->num_roads; i++) {
+        if (env->road_elements[i].type == ROAD_LANE && env->road_elements[i].polyline_length > 0.0f) {
+            drivable_lanes[num_drivable] = i;
+            lane_lengths[num_drivable] = env->road_elements[i].polyline_length;
+            total_lane_length += lane_lengths[num_drivable];
+            num_drivable++;
+        }
+    }
+
+    if (num_drivable == 0) return false;
+
+    for (int attempt = 0; attempt < MAX_SPAWN_ATTEMPTS; attempt++) {
+        // Length-weighted lane selection
+        float r = ((float)rand() / (float)RAND_MAX) * total_lane_length;
+        float cumulative = 0.0f;
+        int selected = num_drivable - 1;
+        for (int k = 0; k < num_drivable; k++) {
+            cumulative += lane_lengths[k];
+            if (r < cumulative) {
+                selected = k;
+                break;
+            }
+        }
+        RoadMapElement *lane = &env->road_elements[drivable_lanes[selected]];
+
+        float spawn_x, spawn_y, spawn_z, spawn_heading;
+        get_random_point_on_lane(lane, &spawn_x, &spawn_y, &spawn_z, &spawn_heading);
+        spawn_z += agent->sim_height / 2.0f;
+
+        // Check collision with all other active agents (excluding this one)
+        bool collision = false;
+        for (int j = 0; j < env->active_agent_count; j++) {
+            int other_idx = env->active_agent_indices[j];
+            if (other_idx == agent_idx) continue;
+            Agent *other = &env->agents[other_idx];
+            if (other->sim_x == INVALID_POSITION || other->removed) continue;
+            float dx = spawn_x - other->sim_x;
+            float dy = spawn_y - other->sim_y;
+            float dist = sqrtf(dx * dx + dy * dy);
+            float min_dist = (agent->sim_length + other->sim_length) / 2.0f;
+            if (dist < min_dist) {
+                collision = true;
+                break;
+            }
+        }
+        if (collision) continue;
+
+        // Check offroad
+        if (check_spawn_offroad(env, spawn_x, spawn_y, spawn_z, spawn_heading,
+                                agent->sim_length, agent->sim_width, agent->sim_height))
+            continue;
+
+        agent->sim_x = spawn_x;
+        agent->sim_y = spawn_y;
+        agent->sim_z = spawn_z;
+        agent->sim_heading = spawn_heading;
+        agent->heading_x = cosf(spawn_heading);
+        agent->heading_y = sinf(spawn_heading);
+        // Update stored initial position so future non-random resets are consistent
+        agent->log_trajectory_x[0] = spawn_x;
+        agent->log_trajectory_y[0] = spawn_y;
+        agent->log_trajectory_z[0] = spawn_z;
+        agent->log_heading[0] = spawn_heading;
+        return true;
+    }
+    return false;
+}
+
 void respawn_agent(Drive *env, int agent_idx) {
     Agent *agent = &env->agents[agent_idx];
-    agent->sim_x = agent->log_trajectory_x[0];
-    agent->sim_y = agent->log_trajectory_y[0];
-    agent->sim_z = agent->log_trajectory_z[0];
-    agent->sim_heading = agent->log_heading[0];
-    agent->heading_x = cosf(agent->sim_heading);
-    agent->heading_y = sinf(agent->sim_heading);
-    agent->sim_vx = agent->log_velocity_x[0];
-    agent->sim_vy = agent->log_velocity_y[0];
+
+    if (env->init_mode == INIT_VARIABLE_AGENT_NUMBER) {
+        if (!randomize_agent_position(env, agent_idx)) {
+            // Fallback to original position if no valid spawn found
+            agent->sim_x = agent->log_trajectory_x[0];
+            agent->sim_y = agent->log_trajectory_y[0];
+            agent->sim_z = agent->log_trajectory_z[0];
+            agent->sim_heading = agent->log_heading[0];
+            agent->heading_x = cosf(agent->sim_heading);
+            agent->heading_y = sinf(agent->sim_heading);
+        }
+    } else {
+        agent->sim_x = agent->log_trajectory_x[0];
+        agent->sim_y = agent->log_trajectory_y[0];
+        agent->sim_z = agent->log_trajectory_z[0];
+        agent->sim_heading = agent->log_heading[0];
+        agent->heading_x = cosf(agent->sim_heading);
+        agent->heading_y = sinf(agent->sim_heading);
+    }
+
+    agent->sim_vx = 0.0f;
+    agent->sim_vy = 0.0f;
+    agent->sim_speed = 0.0f;
+    agent->sim_speed_signed = 0.0f;
     agent->metrics_array[COLLISION_IDX] = 0.0f;
     agent->metrics_array[OFFROAD_IDX] = 0.0f;
     agent->metrics_array[REACHED_GOAL_IDX] = 0.0f;
@@ -3097,7 +3191,15 @@ void move_dynamics(Drive *env, int action_idx, int agent_idx) {
 
 void c_reset(Drive *env) {
     env->timestep = env->init_steps;
-    set_start_position(env);
+    if (env->init_mode == INIT_VARIABLE_AGENT_NUMBER) {
+        // Randomize all agent positions on reset
+        for (int x = 0; x < env->active_agent_count; x++) {
+            int agent_idx = env->active_agent_indices[x];
+            randomize_agent_position(env, agent_idx);
+        }
+    } else {
+        set_start_position(env);
+    }
     reset_goal_positions(env);
     for (int x = 0; x < env->active_agent_count; x++) {
         env->logs[x] = (Log){0};

From 4ddc477cdea607b85e50e2028d6713622eec00ca Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugene@percepta.ai>
Date: Tue, 17 Mar 2026 00:03:19 +0000
Subject: [PATCH 2/8] Fix: sample new goals after randomizing agent positions

After moving an agent to a new random position, must also sample a new
goal relative to that position. Previously reset_goal_positions would
restore the original init goal, which could be far from the new spawn.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 pufferlib/ocean/drive/drive.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 2ead0deb73..0088652ec1 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -2915,6 +2915,8 @@ void respawn_agent(Drive *env, int agent_idx) {
             agent->heading_x = cosf(agent->sim_heading);
             agent->heading_y = sinf(agent->sim_heading);
         }
+        // Sample a new goal relative to the new position
+        sample_new_goal(env, agent_idx);
     } else {
         agent->sim_x = agent->log_trajectory_x[0];
         agent->sim_y = agent->log_trajectory_y[0];
@@ -3197,10 +3199,15 @@ void c_reset(Drive *env) {
             int agent_idx = env->active_agent_indices[x];
             randomize_agent_position(env, agent_idx);
         }
+        // Sample new goals relative to new positions
+        for (int x = 0; x < env->active_agent_count; x++) {
+            int agent_idx = env->active_agent_indices[x];
+            sample_new_goal(env, agent_idx);
+        }
     } else {
         set_start_position(env);
+        reset_goal_positions(env);
     }
-    reset_goal_positions(env);
     for (int x = 0; x < env->active_agent_count; x++) {
         env->logs[x] = (Log){0};
         int agent_idx = env->active_agent_indices[x];

From 7dd64315e0ac7fc96ecfadf9bd5f16793e79d2f9 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugene@percepta.ai>
Date: Tue, 17 Mar 2026 00:14:52 +0000
Subject: [PATCH 3/8] Fix: don't overwrite sampled goals with stale init_goal
 in variable agent mode

c_reset's GOAL_GENERATE_NEW block was restoring init_goal_x/y/z after
sample_new_goal had already set fresh goals relative to the new position.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 pufferlib/ocean/drive/drive.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 0088652ec1..caec2999ae 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -3237,7 +3237,7 @@ void c_reset(Drive *env) {
         agent->prev_goal_z = agent->sim_z;
         generate_reward_coefs(env, agent);
 
-        if (env->goal_behavior == GOAL_GENERATE_NEW) {
+        if (env->goal_behavior == GOAL_GENERATE_NEW && env->init_mode != INIT_VARIABLE_AGENT_NUMBER) {
             agent->goal_position_x = agent->init_goal_x;
             agent->goal_position_y = agent->init_goal_y;
             agent->goal_position_z = agent->init_goal_z;

From 2fb1217cf184eeeb817682b3b9a8a00ddba4a933 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugene@percepta.ai>
Date: Tue, 17 Mar 2026 00:54:14 +0000
Subject: [PATCH 4/8] Fix collision check and velocity restoration in respawn

- Use proper OBB collision check (check_spawn_collision) instead of
  rough distance approximation in randomize_agent_position
- Restore original log_velocity on respawn for non-variable-agent modes
  instead of zeroing it (preserves data-driven replay behavior)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 pufferlib/ocean/drive/drive.h | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index caec2999ae..a0e519ceff 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -2863,22 +2863,12 @@ static bool randomize_agent_position(Drive *env, int agent_idx) {
         get_random_point_on_lane(lane, &spawn_x, &spawn_y, &spawn_z, &spawn_heading);
         spawn_z += agent->sim_height / 2.0f;
 
-        // Check collision with all other active agents (excluding this one)
-        bool collision = false;
-        for (int j = 0; j < env->active_agent_count; j++) {
-            int other_idx = env->active_agent_indices[j];
-            if (other_idx == agent_idx) continue;
-            Agent *other = &env->agents[other_idx];
-            if (other->sim_x == INVALID_POSITION || other->removed) continue;
-            float dx = spawn_x - other->sim_x;
-            float dy = spawn_y - other->sim_y;
-            float dist = sqrtf(dx * dx + dy * dy);
-            float min_dist = (agent->sim_length + other->sim_length) / 2.0f;
-            if (dist < min_dist) {
-                collision = true;
-                break;
-            }
-        }
+        // Temporarily invalidate this agent so check_spawn_collision skips it
+        float saved_x = agent->sim_x;
+        agent->sim_x = INVALID_POSITION;
+        bool collision = check_spawn_collision(env, env->active_agent_count, spawn_x, spawn_y, spawn_z,
+                                              spawn_heading, agent->sim_length, agent->sim_width, agent->sim_height);
+        agent->sim_x = saved_x;
         if (collision) continue;
 
         // Check offroad
@@ -2917,6 +2907,10 @@ void respawn_agent(Drive *env, int agent_idx) {
         }
         // Sample a new goal relative to the new position
         sample_new_goal(env, agent_idx);
+        agent->sim_vx = 0.0f;
+        agent->sim_vy = 0.0f;
+        agent->sim_speed = 0.0f;
+        agent->sim_speed_signed = 0.0f;
     } else {
         agent->sim_x = agent->log_trajectory_x[0];
         agent->sim_y = agent->log_trajectory_y[0];
@@ -2924,12 +2918,9 @@ void respawn_agent(Drive *env, int agent_idx) {
         agent->sim_heading = agent->log_heading[0];
         agent->heading_x = cosf(agent->sim_heading);
         agent->heading_y = sinf(agent->sim_heading);
+        agent->sim_vx = agent->log_velocity_x[0];
+        agent->sim_vy = agent->log_velocity_y[0];
     }
-
-    agent->sim_vx = 0.0f;
-    agent->sim_vy = 0.0f;
-    agent->sim_speed = 0.0f;
-    agent->sim_speed_signed = 0.0f;
     agent->metrics_array[COLLISION_IDX] = 0.0f;
     agent->metrics_array[OFFROAD_IDX] = 0.0f;
     agent->metrics_array[REACHED_GOAL_IDX] = 0.0f;

From 4d291240a103e22278bbcefcbab981eadf0e5733 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Sat, 21 Mar 2026 14:28:48 +0000
Subject: [PATCH 5/8] Fix clang-format on randomize_agent_position

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 pufferlib/ocean/drive/drive.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index a0e519ceff..395bfb30b0 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -2843,7 +2843,8 @@ static bool randomize_agent_position(Drive *env, int agent_idx) {
         }
     }
 
-    if (num_drivable == 0) return false;
+    if (num_drivable == 0)
+        return false;
 
     for (int attempt = 0; attempt < MAX_SPAWN_ATTEMPTS; attempt++) {
         // Length-weighted lane selection
@@ -2866,14 +2867,15 @@ static bool randomize_agent_position(Drive *env, int agent_idx) {
         // Temporarily invalidate this agent so check_spawn_collision skips it
         float saved_x = agent->sim_x;
         agent->sim_x = INVALID_POSITION;
-        bool collision = check_spawn_collision(env, env->active_agent_count, spawn_x, spawn_y, spawn_z,
-                                              spawn_heading, agent->sim_length, agent->sim_width, agent->sim_height);
+        bool collision = check_spawn_collision(env, env->active_agent_count, spawn_x, spawn_y, spawn_z, spawn_heading,
+                                               agent->sim_length, agent->sim_width, agent->sim_height);
         agent->sim_x = saved_x;
-        if (collision) continue;
+        if (collision)
+            continue;
 
         // Check offroad
-        if (check_spawn_offroad(env, spawn_x, spawn_y, spawn_z, spawn_heading,
-                                agent->sim_length, agent->sim_width, agent->sim_height))
+        if (check_spawn_offroad(env, spawn_x, spawn_y, spawn_z, spawn_heading, agent->sim_length, agent->sim_width,
+                                agent->sim_height))
             continue;
 
         agent->sim_x = spawn_x;

From b2eb9def9c761dc6a690e04046c728b9c9b50204 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Sat, 28 Mar 2026 23:10:59 -0400
Subject: [PATCH 6/8] Add randomize_respawn config flag to gate respawn
 randomization

Previously randomize-on-respawn was gated on init_mode ==
INIT_VARIABLE_AGENT_NUMBER. Now it's a separate config flag
(randomize_respawn, default 0) so it can be toggled independently.

Includes test in tests/test_randomize_respawn.py (requires cluster).
---
 pufferlib/ocean/drive/binding.c |   1 +
 pufferlib/ocean/drive/drive.h   |   5 +-
 pufferlib/ocean/drive/drive.py  |   4 ++
 tests/test_randomize_respawn.py | 118 ++++++++++++++++++++++++++++++++
 4 files changed, 126 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_randomize_respawn.py

diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c
index f178fedfc3..2fe4ab096c 100644
--- a/pufferlib/ocean/drive/binding.c
+++ b/pufferlib/ocean/drive/binding.c
@@ -335,6 +335,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
     env->collision_behavior = conf.collision_behavior;
     env->offroad_behavior = conf.offroad_behavior;
     env->dt = conf.dt;
+    env->randomize_respawn = (int)unpack(kwargs, "randomize_respawn");
     env->init_mode = (int)unpack(kwargs, "init_mode");
     env->control_mode = (int)unpack(kwargs, "control_mode");
     env->goal_behavior = (int)unpack(kwargs, "goal_behavior");
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 395bfb30b0..796008ad9a 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -347,6 +347,7 @@ struct Drive {
     char scenario_id[SCENARIO_ID_STR_LENGTH];
     int collision_behavior;
     int offroad_behavior;
+    int randomize_respawn;
     float observation_window_size;
     float polyline_reduction_threshold;
     float polyline_max_segment_length;
@@ -2897,7 +2898,7 @@ static bool randomize_agent_position(Drive *env, int agent_idx) {
 void respawn_agent(Drive *env, int agent_idx) {
     Agent *agent = &env->agents[agent_idx];
 
-    if (env->init_mode == INIT_VARIABLE_AGENT_NUMBER) {
+    if (env->randomize_respawn) {
         if (!randomize_agent_position(env, agent_idx)) {
             // Fallback to original position if no valid spawn found
             agent->sim_x = agent->log_trajectory_x[0];
@@ -3186,7 +3187,7 @@ void move_dynamics(Drive *env, int action_idx, int agent_idx) {
 
 void c_reset(Drive *env) {
     env->timestep = env->init_steps;
-    if (env->init_mode == INIT_VARIABLE_AGENT_NUMBER) {
+    if (env->randomize_respawn) {
         // Randomize all agent positions on reset
         for (int x = 0; x < env->active_agent_count; x++) {
             int agent_idx = env->active_agent_indices[x];
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index ec13480daa..c6b61f22a9 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -95,6 +95,7 @@ def __init__(
         spawn_length_min=2.0,
         spawn_length_max=5.5,
         spawn_height=1.5,
+        randomize_respawn=0,
     ):
         # env
         self.dt = dt
@@ -125,6 +126,7 @@ def __init__(
         self.episode_length = episode_length
         self.termination_mode = termination_mode
         self.resample_frequency = resample_frequency
+        self.randomize_respawn = randomize_respawn
         self.dynamics_model = dynamics_model
         # reward randomization bounds
         self.reward_bound_goal_radius_min = reward_bound_goal_radius_min
@@ -416,6 +418,7 @@ def __init__(
                 spawn_length_min=self.spawn_length_min,
                 spawn_length_max=self.spawn_length_max,
                 spawn_height=self.spawn_height,
+                randomize_respawn=self.randomize_respawn,
             )
             env_ids.append(env_id)
 
@@ -573,6 +576,7 @@ def resample_maps(self):
                 spawn_length_min=self.spawn_length_min,
                 spawn_length_max=self.spawn_length_max,
                 spawn_height=self.spawn_height,
+                randomize_respawn=self.randomize_respawn,
             )
             env_ids.append(env_id)
         self.c_envs = binding.vectorize(*env_ids)
diff --git a/tests/test_randomize_respawn.py b/tests/test_randomize_respawn.py
new file mode 100644
index 0000000000..d7b569bf13
--- /dev/null
+++ b/tests/test_randomize_respawn.py
@@ -0,0 +1,118 @@
+"""Test that randomize_respawn produces different agent positions across resets.
+
+Run on the cluster with:
+    srun ... python -m pytest tests/test_randomize_respawn.py -v
+"""
+
+import numpy as np
+import pytest
+from pufferlib.ocean.drive.drive import Drive
+
+
+MAP_DIR = "pufferlib/resources/drive/binaries/carla_data"
+
+
+def get_agent_positions(env):
+    """Extract current agent positions from observations."""
+    # Ego obs starts at index 0: sim_x, sim_y are the first features
+    # But obs are in ego frame (normalized). Use the C env directly.
+    # The simplest proxy: just hash the full observation vector.
+    return env.observations.copy()
+
+
+@pytest.fixture
+def env_randomize():
+    e = Drive(
+        num_agents=8,
+        num_maps=2,
+        map_dir=MAP_DIR,
+        dynamics_model="classic",
+        min_agents_per_env=1,
+        max_agents_per_env=8,
+        init_mode="init_variable_agent_number",
+        control_mode="control_vehicles",
+        episode_length=300,
+        resample_frequency=0,
+        randomize_respawn=1,
+    )
+    e.reset()
+    yield e
+    e.close()
+
+
+@pytest.fixture
+def env_no_randomize():
+    e = Drive(
+        num_agents=8,
+        num_maps=2,
+        map_dir=MAP_DIR,
+        dynamics_model="classic",
+        min_agents_per_env=1,
+        max_agents_per_env=8,
+        init_mode="init_variable_agent_number",
+        control_mode="control_vehicles",
+        episode_length=10,
+        resample_frequency=0,
+        randomize_respawn=0,
+    )
+    e.reset()
+    yield e
+    e.close()
+
+
+def test_randomize_respawn_produces_different_positions(env_randomize):
+    """With randomize_respawn=1, positions should differ after episode reset."""
+    env = env_randomize
+    actions = np.zeros(env.action_space.shape, dtype=env.action_space.dtype)
+
+    # Get initial observations
+    obs_before = env.observations.copy()
+
+    # Step until episode resets (episode_length=300, or force via resample)
+    env.resample_maps()
+    obs_after = env.observations.copy()
+
+    # Observations should differ (agents at different positions)
+    assert not np.allclose(obs_before, obs_after, atol=1e-6), (
+        "Observations should differ after reset with randomize_respawn=1"
+    )
+
+
+def test_no_randomize_same_positions(env_no_randomize):
+    """With randomize_respawn=0, positions should be the same after episode reset."""
+    env = env_no_randomize
+    actions = np.zeros(env.action_space.shape, dtype=env.action_space.dtype)
+
+    # Get initial observations
+    obs_before = env.observations.copy()
+
+    # Step through the full episode to trigger c_reset
+    for _ in range(15):
+        env.step(actions)
+
+    # After reset, positions should return to initial state
+    # Note: obs won't be exactly the same due to metrics/counters,
+    # but the position-related features should match
+    obs_after = env.observations.copy()
+
+    # With no randomization, the first few ego features (position-related)
+    # should be identical after reset
+    # ego features: speed, heading components, goal direction, etc.
+    # After a full reset with no randomization, agents return to log_trajectory[0]
+    assert np.allclose(obs_before[:, :5], obs_after[:, :5], atol=0.1), (
+        "Position features should be similar after reset with randomize_respawn=0"
+    )
+
+
+def test_multiple_resets_produce_variety(env_randomize):
+    """Multiple resets with randomize_respawn should produce different positions each time."""
+    env = env_randomize
+    observations = []
+
+    for _ in range(5):
+        env.resample_maps()
+        observations.append(env.observations[:, :10].copy())
+
+    # Check that not all resets produce the same observations
+    all_same = all(np.allclose(observations[0], obs, atol=1e-6) for obs in observations[1:])
+    assert not all_same, "5 resets with randomize_respawn should not all produce identical observations"

From 900b0401b26a0b383a314496207db811cb2a2b55 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Sat, 28 Mar 2026 17:03:23 -0400
Subject: [PATCH 7/8] Fix: save_map_binary wrote scenario_id prefix that C
 never reads

save_map_binary wrote 16 bytes of scenario_id before sdc_track_index,
but load_map_binary in C starts reading at sdc_track_index with no
scenario_id. This misaligned all subsequent reads, causing segfaults
when loading converted maps.

Also fixes Town10HD conversion failure (scenario_id was an int, not str).
---
 pufferlib/ocean/drive/drive.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index c6b61f22a9..a463caf3ca 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -803,9 +803,8 @@ def save_map_binary(map_data, output_file, unique_map_id):
         sdc_track_index = metadata.get("sdc_track_index", -1)  # -1 as default if not found
         tracks_to_predict = metadata.get("tracks_to_predict", [])
 
-        # Write original scenario_id with fallback to placeholder
-        scenario_id = map_data.get("scenario_id", f"map_{unique_map_id:03d}")
-        f.write(struct.pack("16s", scenario_id.encode("utf-8")))
+        # Note: C load_map_binary does NOT read a scenario_id prefix.
+        # Do not write one here or the binary will be misaligned.
 
         # Write sdc_track_index
         f.write(struct.pack("i", sdc_track_index))

From b1bd6b48e4b07ff9d54983c4fa13ee7d6e0b1cc8 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Sat, 28 Mar 2026 23:26:40 -0400
Subject: [PATCH 8/8] Default randomize_respawn to 1 (enabled)

---
 pufferlib/ocean/drive/drive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index a463caf3ca..cef4e2117f 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -95,7 +95,7 @@ def __init__(
         spawn_length_min=2.0,
         spawn_length_max=5.5,
         spawn_height=1.5,
-        randomize_respawn=0,
+        randomize_respawn=1,
     ):
         # env
         self.dt = dt