Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pufferlib/ocean/drive/drive.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void test_drivenet() {
Weights *weights = load_weights("puffer_drive_weights.bin");
DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, 0);

forward(net, observations, actions);
forward(net, NULL, observations, actions);
for (int i = 0; i < num_agents * num_actions; i++) {
printf("idx: %d, action: %d, logits:", i, actions[i]);
for (int j = 0; j < num_actions; j++) {
Expand Down Expand Up @@ -112,7 +112,7 @@ void demo() {
int *actions = (int *)env.actions; // Single integer per agent

if (!IsKeyDown(KEY_LEFT_SHIFT)) {
forward(net, env.observations, actions);
forward(net, &env, env.observations, actions);
} else {
if (env.dynamics_model == CLASSIC) {
// Classic dynamics: acceleration and steering
Expand Down
187 changes: 123 additions & 64 deletions pufferlib/ocean/drive/drive.h
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,16 @@ struct Drive {
int turn_off_normalization;
RewardBound reward_bounds[NUM_REWARD_COEFS];
float min_avg_speed_to_consider_goal_attempt;

// Predicted trajectory visualization (set from Python, drawn by c_render)
float *predicted_traj_x; // [num_agents * traj_len]
float *predicted_traj_y; // [num_agents * traj_len]
int predicted_traj_len; // steps per agent (0 = no trajectory to draw)
Comment on lines +355 to +358
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

predicted_traj_x/predicted_traj_y are heap-allocated from Python (vec_set_trajectory) but are not freed anywhere in Drive teardown (c_close/free_allocated). This introduces a persistent leak across env lifetimes and can accumulate if trajectories are updated/reset frequently. Please free these buffers (and reset predicted_traj_len/pointers) in c_close/free_allocated (and/or when resetting the env).

Copilot uses AI. Check for mistakes.

// Headless rendering (ffmpeg pipe)
int render_mode; // 0=window, 1=headless
int render_pipe_fd; // write end of pipe to ffmpeg
pid_t render_pid; // ffmpeg child process
};

// ========================================
Expand Down Expand Up @@ -2103,6 +2113,8 @@ void c_close(Drive *env) {
free(env->expert_static_agent_indices);
free(env->tracks_to_predict_indices);
free(env->ini_file);
free(env->predicted_traj_x);
free(env->predicted_traj_y);
}

void allocate(Drive *env) {
Expand Down Expand Up @@ -3914,88 +3926,135 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
}
}

// Start headless ffmpeg pipe for recording
void start_render_pipe(Drive *env, int width, int height) {
int pipefd[2];
if (pipe(pipefd) == -1) {
fprintf(stderr, "Failed to create render pipe\n");
return;
}
pid_t pid = fork();
if (pid == 0) { // child: run ffmpeg
close(pipefd[1]);
dup2(pipefd[0], STDIN_FILENO);
close(pipefd[0]);
for (int fd = 3; fd < 256; fd++)
close(fd);
char size_str[64];
snprintf(size_str, sizeof(size_str), "%dx%d", width, height);
execlp("ffmpeg", "ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgba", "-s", size_str, "-r", "30", "-i", "-",
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-preset", "ultrafast", "-crf", "23", "-loglevel", "error",
"/tmp/pufferdrive_render.mp4", NULL);
fprintf(stderr, "execlp ffmpeg failed\n");
_exit(1);
}
close(pipefd[0]);
env->render_pid = pid;
env->render_pipe_fd = pipefd[1];
}

void stop_render_pipe(Drive *env) {
if (env->render_pipe_fd > 0) {
close(env->render_pipe_fd);
env->render_pipe_fd = 0;
}
if (env->render_pid > 0) {
waitpid(env->render_pid, NULL, 0);
env->render_pid = 0;
Comment on lines +3936 to +3963
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

start_render_pipe/stop_render_pipe use fork()/waitpid() but (1) fork() failure (pid == -1) isn’t handled, and (2) waitpid() requires <sys/wait.h>, which isn’t included in drive.h today (only <unistd.h>). This can cause build failures or leaving zombie ffmpeg processes on fork/exec errors. Please handle pid==-1 and add the required include(s) (or move these helpers to a .c file that includes them).

Copilot uses AI. Check for mistakes.
}
}

void write_render_frame(Drive *env, int width, int height) {
if (env->render_pipe_fd <= 0)
return;
unsigned char *pixels = rlReadScreenPixels(width, height);
if (pixels) {
write(env->render_pipe_fd, pixels, width * height * 4);
RL_FREE(pixels);
}
}

void c_render(Drive *env) {
if (env->client == NULL) {
env->client = make_client(env);
// Initialize pan offset to map center
float cx = (env->grid_map->top_left_x + env->grid_map->bottom_right_x) / 2.0f;
float cy = (env->grid_map->top_left_y + env->grid_map->bottom_right_y) / 2.0f;
env->client->camera_target = (Vector3){cx, cy, 0.0f};
}
Client *client = env->client;
BeginDrawing();
Color road = (Color){35, 35, 37, 255};
ClearBackground(road);
BeginMode3D(client->camera);
handle_camera_controls(env->client);
draw_scene(env, client, 0, 0, 0, 0);
ClearBackground((Color){35, 35, 37, 255});

if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) {
env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count;
float map_height = env->grid_map->top_left_y - env->grid_map->bottom_right_y;

// Mouse wheel zoom
float wheel = GetMouseWheelMove();
if (wheel != 0) {
client->camera_zoom -= wheel * 0.1f;
if (client->camera_zoom < 0.05f)
client->camera_zoom = 0.05f;
if (client->camera_zoom > 3.0f)
client->camera_zoom = 3.0f;
}

// Draw debug info
DrawText(TextFormat("Camera Position: (%.2f, %.2f, %.2f)", client->camera.position.x, client->camera.position.y,
client->camera.position.z),
10, 10, 20, PUFF_WHITE);
DrawText(TextFormat("Camera Target: (%.2f, %.2f, %.2f)", client->camera.target.x, client->camera.target.y,
client->camera.target.z),
10, 30, 20, PUFF_WHITE);
DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE);
// Mouse drag pan (left button)
if (IsMouseButtonDown(MOUSE_BUTTON_LEFT)) {
Vector2 delta = GetMouseDelta();
// Scale mouse pixels to world units based on current zoom
float scale = (map_height * 1.1f * client->camera_zoom) / client->height;
client->camera_target.x -= delta.x * scale;
client->camera_target.y += delta.y * scale;
}

int human_idx = env->active_agent_indices[env->human_agent_idx];
DrawText(TextFormat("Controlling Agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE);
DrawText(TextFormat("Agent Index: %d", human_idx), 10, 90, 20, PUFF_WHITE);
// Build top-down camera from pan/zoom state
Camera3D topdown_cam = {0};
topdown_cam.position = (Vector3){client->camera_target.x, client->camera_target.y, 500.0f};
topdown_cam.target = client->camera_target;
topdown_cam.up = (Vector3){0.0f, -1.0f, 0.0f};
topdown_cam.fovy = map_height * 1.1f * client->camera_zoom;
topdown_cam.projection = CAMERA_ORTHOGRAPHIC;

// Display current action values - yellow when controlling, white otherwise
Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE;
BeginMode3D(topdown_cam);

if (env->action_type == 0) { // discrete
int *action_array = (int *)env->actions;
int action_val = action_array[env->human_agent_idx];
// Draw scene (mode=1 for top-down style) — NOTE: draw_scene calls EndMode3D() internally
draw_scene(env, client, 1, 0, 0, 0);

if (env->dynamics_model == CLASSIC) {
int num_steer = 13;
int accel_idx = action_val / num_steer;
int steer_idx = action_val % num_steer;
float accel_value = ACCELERATION_VALUES[accel_idx];
float steer_value = STEERING_VALUES[steer_idx];

DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color);
DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color);
} else if (env->dynamics_model == JERK) {
int num_lat = 3;
int jerk_long_idx = action_val / num_lat;
int jerk_lat_idx = action_val % num_lat;
float jerk_long_value = JERK_LONG[jerk_long_idx];
float jerk_lat_value = JERK_LAT[jerk_lat_idx];

DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color);
DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color);
// Re-enter 3D mode for trajectory drawing (draw_scene ended it)
BeginMode3D(topdown_cam);
rlDisableDepthTest();
if (env->predicted_traj_x != NULL && env->predicted_traj_len > 0) {
for (int i = 0; i < env->active_agent_count; i++) {
int agent_idx = env->active_agent_indices[i];
Agent *agent = &env->agents[agent_idx];
int tlen = env->predicted_traj_len;
float z = agent->sim_z + 0.1f;

Vector3 prev = {agent->sim_x, agent->sim_y, z};
for (int t = 0; t < tlen; t++) {
float tx = env->predicted_traj_x[i * tlen + t];
float ty = env->predicted_traj_y[i * tlen + t];
Vector3 curr = {tx, ty, z};
rlSetLineWidth(3.0f);
DrawLine3D(prev, curr, RED);
DrawCircle3D(curr, 1.5f, (Vector3){0, 0, 1}, 0.0f, RED);
prev = curr;
}
}
} else { // continuous
float (*action_array_f)[2] = (float (*)[2])env->actions;
DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20, action_color);
DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20, action_color);
}
rlEnableDepthTest();
EndMode3D();

// Show key press status
int status_y = 150;
if (IsKeyDown(KEY_LEFT_SHIFT)) {
DrawText("[shift pressed]", 10, status_y, 20, YELLOW);
status_y += 20;
}
if (IsKeyDown(KEY_SPACE)) {
DrawText("[space pressed]", 10, status_y, 20, YELLOW);
status_y += 20;
}
if (IsKeyDown(KEY_LEFT_CONTROL)) {
DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW);
status_y += 20;
}
// 2D HUD overlay
DrawText(TextFormat("Timestep: %d Agents: %d Zoom: %.1fx", env->timestep, env->active_agent_count,
1.0f / client->camera_zoom),
10, 10, 20, PUFF_WHITE);
DrawText("Scroll=zoom, Drag=pan, TAB=cycle agent", 10, client->height - 25, 18, PUFF_WHITE);

// Controls help
DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10,
client->height - 30, 20, PUFF_WHITE);
if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) {
env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count;
}

DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE);
DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE);
EndDrawing();
}

Expand Down
25 changes: 23 additions & 2 deletions pufferlib/ocean/drive/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,8 +703,29 @@ def get_road_edge_polylines(self):

return polylines

def render(self):
binding.vec_render(self.c_envs, 0)
def render(self, env_id=0):
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

render(env_id) forwards env_id straight into binding.vec_render, which does not bounds-check env_id before indexing vec->envs[env_id]. Passing an invalid env_id can segfault the process. Please add a Python-side check (0 <= env_id < self.num_envs) and raise a ValueError early (or add the bounds check in vec_render for defense-in-depth).

Suggested change
def render(self, env_id=0):
def render(self, env_id=0):
if not (0 <= env_id < self.num_envs):
raise ValueError(f"env_id ({env_id}) must be in range [0, {self.num_envs})")

Copilot uses AI. Check for mistakes.
binding.vec_render(self.c_envs, env_id)

def set_predicted_trajectories(self, action_trajectories):
"""Roll out action trajectories through dynamics and set for rendering.

Args:
action_trajectories: numpy array of shape [num_agents, traj_len]
containing discrete action indices (e.g. classic: accel_idx * 13 + steer_idx).
num_agents is the total across all sub-envs.
"""
_, traj_len = action_trajectories.shape
# Set trajectories per sub-env using agent_offsets to slice correctly
for i in range(self.num_envs):
start = self.agent_offsets[i]
end = self.agent_offsets[i + 1]
sub_actions = action_trajectories[start:end]
binding.vec_set_trajectory(
self.c_envs,
i,
sub_actions.flatten().astype(np.int32),
traj_len,
)

def close(self):
binding.vec_close(self.c_envs)
Expand Down
Loading
Loading