Skip to content
1 change: 1 addition & 0 deletions pufferlib/ocean/drive/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
env->collision_behavior = conf.collision_behavior;
env->offroad_behavior = conf.offroad_behavior;
env->dt = conf.dt;
env->randomize_respawn = (int)unpack(kwargs, "randomize_respawn");
env->init_mode = (int)unpack(kwargs, "init_mode");
env->control_mode = (int)unpack(kwargs, "control_mode");
env->goal_behavior = (int)unpack(kwargs, "goal_behavior");
Expand Down
125 changes: 114 additions & 11 deletions pufferlib/ocean/drive/drive.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ struct Drive {
char scenario_id[SCENARIO_ID_STR_LENGTH];
int collision_behavior;
int offroad_behavior;
int randomize_respawn;
float observation_window_size;
float polyline_reduction_threshold;
float polyline_max_segment_length;
Expand Down Expand Up @@ -2824,16 +2825,105 @@ void compute_observations(Drive *env) {
}
}

// Find a random collision-free position on a drivable lane for an existing agent.
// Returns true if a valid position was found and updates the agent's sim_x/y/z/heading.
static bool randomize_agent_position(Drive *env, int agent_idx) {
Agent *agent = &env->agents[agent_idx];

// Pre-compute drivable lanes
int drivable_lanes[env->num_roads];
float lane_lengths[env->num_roads];
int num_drivable = 0;
float total_lane_length = 0.0f;
for (int i = 0; i < env->num_roads; i++) {
if (env->road_elements[i].type == ROAD_LANE && env->road_elements[i].polyline_length > 0.0f) {
drivable_lanes[num_drivable] = i;
lane_lengths[num_drivable] = env->road_elements[i].polyline_length;
total_lane_length += lane_lengths[num_drivable];
num_drivable++;
}
}

if (num_drivable == 0)
Comment on lines +2833 to +2847
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

randomize_agent_position allocates drivable_lanes/lane_lengths as VLAs sized by env->num_roads on the stack and recomputes drivable lanes each call. Since env->num_roads can be large and this function may run frequently (respawns/resets), this can cause excessive stack usage and unnecessary work. Prefer reusing env->num_drivable, env->drivable_lane_indices, env->drivable_lane_lengths, and env->total_drivable_lane_length computed in compute_drivable_lane_points() (or allocate once on the heap) instead of per-call stack arrays.

Suggested change
// Pre-compute drivable lanes
int drivable_lanes[env->num_roads];
float lane_lengths[env->num_roads];
int num_drivable = 0;
float total_lane_length = 0.0f;
for (int i = 0; i < env->num_roads; i++) {
if (env->road_elements[i].type == ROAD_LANE && env->road_elements[i].polyline_length > 0.0f) {
drivable_lanes[num_drivable] = i;
lane_lengths[num_drivable] = env->road_elements[i].polyline_length;
total_lane_length += lane_lengths[num_drivable];
num_drivable++;
}
}
if (num_drivable == 0)
// Use pre-computed drivable lanes from the environment to avoid per-call VLAs and recomputation.
int num_drivable = env->num_drivable;
float total_lane_length = env->total_drivable_lane_length;
int *drivable_lanes = env->drivable_lane_indices;
float *lane_lengths = env->drivable_lane_lengths;
if (num_drivable <= 0 || total_lane_length <= 0.0f || drivable_lanes == NULL || lane_lengths == NULL)

Copilot uses AI. Check for mistakes.
return false;

for (int attempt = 0; attempt < MAX_SPAWN_ATTEMPTS; attempt++) {
// Length-weighted lane selection
float r = ((float)rand() / (float)RAND_MAX) * total_lane_length;
float cumulative = 0.0f;
int selected = num_drivable - 1;
for (int k = 0; k < num_drivable; k++) {
cumulative += lane_lengths[k];
if (r < cumulative) {
selected = k;
break;
}
}
RoadMapElement *lane = &env->road_elements[drivable_lanes[selected]];

float spawn_x, spawn_y, spawn_z, spawn_heading;
get_random_point_on_lane(lane, &spawn_x, &spawn_y, &spawn_z, &spawn_heading);
spawn_z += agent->sim_height / 2.0f;

// Temporarily invalidate this agent so check_spawn_collision skips it
float saved_x = agent->sim_x;
agent->sim_x = INVALID_POSITION;
bool collision = check_spawn_collision(env, env->active_agent_count, spawn_x, spawn_y, spawn_z, spawn_heading,
agent->sim_length, agent->sim_width, agent->sim_height);
agent->sim_x = saved_x;
if (collision)
continue;

// Check offroad
if (check_spawn_offroad(env, spawn_x, spawn_y, spawn_z, spawn_heading, agent->sim_length, agent->sim_width,
agent->sim_height))
continue;

agent->sim_x = spawn_x;
agent->sim_y = spawn_y;
agent->sim_z = spawn_z;
agent->sim_heading = spawn_heading;
agent->heading_x = cosf(spawn_heading);
agent->heading_y = sinf(spawn_heading);
// Update stored initial position so future non-random resets are consistent
agent->log_trajectory_x[0] = spawn_x;
agent->log_trajectory_y[0] = spawn_y;
agent->log_trajectory_z[0] = spawn_z;
agent->log_heading[0] = spawn_heading;
return true;
}
return false;
}

void respawn_agent(Drive *env, int agent_idx) {
Agent *agent = &env->agents[agent_idx];
agent->sim_x = agent->log_trajectory_x[0];
agent->sim_y = agent->log_trajectory_y[0];
agent->sim_z = agent->log_trajectory_z[0];
agent->sim_heading = agent->log_heading[0];
agent->heading_x = cosf(agent->sim_heading);
agent->heading_y = sinf(agent->sim_heading);
agent->sim_vx = agent->log_velocity_x[0];
agent->sim_vy = agent->log_velocity_y[0];

if (env->randomize_respawn) {
if (!randomize_agent_position(env, agent_idx)) {
// Fallback to original position if no valid spawn found
agent->sim_x = agent->log_trajectory_x[0];
agent->sim_y = agent->log_trajectory_y[0];
agent->sim_z = agent->log_trajectory_z[0];
agent->sim_heading = agent->log_heading[0];
agent->heading_x = cosf(agent->sim_heading);
agent->heading_y = sinf(agent->sim_heading);
}
// Sample a new goal relative to the new position
sample_new_goal(env, agent_idx);
agent->sim_vx = 0.0f;
agent->sim_vy = 0.0f;
agent->sim_speed = 0.0f;
agent->sim_speed_signed = 0.0f;
} else {
agent->sim_x = agent->log_trajectory_x[0];
agent->sim_y = agent->log_trajectory_y[0];
agent->sim_z = agent->log_trajectory_z[0];
agent->sim_heading = agent->log_heading[0];
agent->heading_x = cosf(agent->sim_heading);
agent->heading_y = sinf(agent->sim_heading);
agent->sim_vx = agent->log_velocity_x[0];
agent->sim_vy = agent->log_velocity_y[0];
}
agent->metrics_array[COLLISION_IDX] = 0.0f;
agent->metrics_array[OFFROAD_IDX] = 0.0f;
agent->metrics_array[REACHED_GOAL_IDX] = 0.0f;
Expand Down Expand Up @@ -3097,8 +3187,21 @@ void move_dynamics(Drive *env, int action_idx, int agent_idx) {

void c_reset(Drive *env) {
env->timestep = env->init_steps;
set_start_position(env);
reset_goal_positions(env);
if (env->randomize_respawn) {
// Randomize all agent positions on reset
for (int x = 0; x < env->active_agent_count; x++) {
int agent_idx = env->active_agent_indices[x];
randomize_agent_position(env, agent_idx);
}
Comment on lines +3192 to +3195
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In c_reset() variable-agent mode, the return value of randomize_agent_position() is ignored. If it fails to find a valid spawn, the agent will keep its prior episode position (which could be offroad/colliding), and the subsequent sample_new_goal() will be based on that stale state. Please check the return value and apply a deterministic fallback (e.g., retry with different constraints, respawn to log_trajectory_*[0], or mark/remove the agent) so resets are reliable.

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In c_reset() variable-agent mode, positions are randomized but per-agent dynamics state (e.g., sim_vx/sim_vy, speed, accelerations, steering) is never reset. This means agents can start a new episode with leftover velocities from the previous episode even though their pose changed. Reset the dynamics fields after randomize_agent_position() (similar to respawn_agent()’s variable-mode branch) to avoid carry-over behavior and immediate post-reset collisions.

Suggested change
}
}
// Reset dynamics state after positions have been randomized to avoid carry-over
for (int x = 0; x < env->active_agent_count; x++) {
int agent_idx = env->active_agent_indices[x];
Agent *agent = &env->agents[agent_idx];
agent->sim_vx = 0.0f;
agent->sim_vy = 0.0f;
agent->sim_speed = 0.0f;
agent->sim_ax = 0.0f;
agent->sim_ay = 0.0f;
agent->steering = 0.0f;
}

Copilot uses AI. Check for mistakes.
// Sample new goals relative to new positions
for (int x = 0; x < env->active_agent_count; x++) {
int agent_idx = env->active_agent_indices[x];
sample_new_goal(env, agent_idx);
}
Comment on lines +3196 to +3200
Copy link

Copilot AI Mar 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sample_new_goal() increments agent->goals_sampled_this_episode, but c_reset() later unconditionally sets goals_sampled_this_episode = 1.0f for all agents. In variable-agent mode this means the initial goal sampling done here will be double-counted or discarded depending on ordering. Consider moving goal sampling to after the per-agent reset loop (or resetting goals_sampled_this_episode appropriately before sampling) so the counter reflects the actual number of sampled goals.

Copilot uses AI. Check for mistakes.
} else {
set_start_position(env);
reset_goal_positions(env);
}
for (int x = 0; x < env->active_agent_count; x++) {
env->logs[x] = (Log){0};
int agent_idx = env->active_agent_indices[x];
Expand Down Expand Up @@ -3128,7 +3231,7 @@ void c_reset(Drive *env) {
agent->prev_goal_z = agent->sim_z;
generate_reward_coefs(env, agent);

if (env->goal_behavior == GOAL_GENERATE_NEW) {
if (env->goal_behavior == GOAL_GENERATE_NEW && env->init_mode != INIT_VARIABLE_AGENT_NUMBER) {
agent->goal_position_x = agent->init_goal_x;
agent->goal_position_y = agent->init_goal_y;
agent->goal_position_z = agent->init_goal_z;
Expand Down
9 changes: 6 additions & 3 deletions pufferlib/ocean/drive/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def __init__(
spawn_length_min=2.0,
spawn_length_max=5.5,
spawn_height=1.5,
randomize_respawn=1,
):
# env
self.dt = dt
Expand Down Expand Up @@ -125,6 +126,7 @@ def __init__(
self.episode_length = episode_length
self.termination_mode = termination_mode
self.resample_frequency = resample_frequency
self.randomize_respawn = randomize_respawn
self.dynamics_model = dynamics_model
# reward randomization bounds
self.reward_bound_goal_radius_min = reward_bound_goal_radius_min
Expand Down Expand Up @@ -416,6 +418,7 @@ def __init__(
spawn_length_min=self.spawn_length_min,
spawn_length_max=self.spawn_length_max,
spawn_height=self.spawn_height,
randomize_respawn=self.randomize_respawn,
)
env_ids.append(env_id)

Expand Down Expand Up @@ -573,6 +576,7 @@ def resample_maps(self):
spawn_length_min=self.spawn_length_min,
spawn_length_max=self.spawn_length_max,
spawn_height=self.spawn_height,
randomize_respawn=self.randomize_respawn,
)
env_ids.append(env_id)
self.c_envs = binding.vectorize(*env_ids)
Expand Down Expand Up @@ -799,9 +803,8 @@ def save_map_binary(map_data, output_file, unique_map_id):
sdc_track_index = metadata.get("sdc_track_index", -1) # -1 as default if not found
tracks_to_predict = metadata.get("tracks_to_predict", [])

# Write original scenario_id with fallback to placeholder
scenario_id = map_data.get("scenario_id", f"map_{unique_map_id:03d}")
f.write(struct.pack("16s", scenario_id.encode("utf-8")))
# Note: C load_map_binary does NOT read a scenario_id prefix.
# Do not write one here or the binary will be misaligned.

# Write sdc_track_index
f.write(struct.pack("i", sdc_track_index))
Expand Down
118 changes: 118 additions & 0 deletions tests/test_randomize_respawn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Test that randomize_respawn produces different agent positions across resets.

Run on the cluster with:
srun ... python -m pytest tests/test_randomize_respawn.py -v
"""

import numpy as np
import pytest
from pufferlib.ocean.drive.drive import Drive


MAP_DIR = "pufferlib/resources/drive/binaries/carla_data"


def get_agent_positions(env):
"""Extract current agent positions from observations."""
# Ego obs starts at index 0: sim_x, sim_y are the first features
# But obs are in ego frame (normalized). Use the C env directly.
# The simplest proxy: just hash the full observation vector.
return env.observations.copy()


@pytest.fixture
def env_randomize():
e = Drive(
num_agents=8,
num_maps=2,
map_dir=MAP_DIR,
dynamics_model="classic",
min_agents_per_env=1,
max_agents_per_env=8,
init_mode="init_variable_agent_number",
control_mode="control_vehicles",
episode_length=300,
resample_frequency=0,
randomize_respawn=1,
)
e.reset()
yield e
e.close()


@pytest.fixture
def env_no_randomize():
e = Drive(
num_agents=8,
num_maps=2,
map_dir=MAP_DIR,
dynamics_model="classic",
min_agents_per_env=1,
max_agents_per_env=8,
init_mode="init_variable_agent_number",
control_mode="control_vehicles",
episode_length=10,
resample_frequency=0,
randomize_respawn=0,
)
e.reset()
yield e
e.close()


def test_randomize_respawn_produces_different_positions(env_randomize):
"""With randomize_respawn=1, positions should differ after episode reset."""
env = env_randomize
actions = np.zeros(env.action_space.shape, dtype=env.action_space.dtype)

# Get initial observations
obs_before = env.observations.copy()

# Step until episode resets (episode_length=300, or force via resample)
env.resample_maps()
obs_after = env.observations.copy()

# Observations should differ (agents at different positions)
assert not np.allclose(obs_before, obs_after, atol=1e-6), (
"Observations should differ after reset with randomize_respawn=1"
)


def test_no_randomize_same_positions(env_no_randomize):
"""With randomize_respawn=0, positions should be the same after episode reset."""
env = env_no_randomize
actions = np.zeros(env.action_space.shape, dtype=env.action_space.dtype)

# Get initial observations
obs_before = env.observations.copy()

# Step through the full episode to trigger c_reset
for _ in range(15):
env.step(actions)

# After reset, positions should return to initial state
# Note: obs won't be exactly the same due to metrics/counters,
# but the position-related features should match
obs_after = env.observations.copy()

# With no randomization, the first few ego features (position-related)
# should be identical after reset
# ego features: speed, heading components, goal direction, etc.
# After a full reset with no randomization, agents return to log_trajectory[0]
assert np.allclose(obs_before[:, :5], obs_after[:, :5], atol=0.1), (
"Position features should be similar after reset with randomize_respawn=0"
)


def test_multiple_resets_produce_variety(env_randomize):
"""Multiple resets with randomize_respawn should produce different positions each time."""
env = env_randomize
observations = []

for _ in range(5):
env.resample_maps()
observations.append(env.observations[:, :10].copy())

# Check that not all resets produce the same observations
all_same = all(np.allclose(observations[0], obs, atol=1e-6) for obs in observations[1:])
assert not all_same, "5 resets with randomize_respawn should not all produce identical observations"
Loading