From c8e827be211d37590ab45c9bca957257f5b5884b Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 23 Mar 2026 22:06:40 -0400 Subject: [PATCH 1/5] Add trajectory prediction to C network and renderer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update DriveNet to match Ricky's actions_trajectory policy: - Add past_actions_encoder (Linear→ReLU→Linear, 40→64→64) - 4-way concat (ego, road, partner, past_actions) → shared_embedding - Actor outputs trajectory_len × action_size logits - Roll out predicted trajectory using classic dynamics model after each forward pass, store in predicted_trajectory_x/y - Draw predicted trajectories as sky-blue lines in renderTopDownView Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/drivenet.h | 123 ++++++++++++++++++++++++++++-- pufferlib/ocean/drive/visualize.c | 26 ++++++- 2 files changed, 138 insertions(+), 11 deletions(-) diff --git a/pufferlib/ocean/drive/drivenet.h b/pufferlib/ocean/drive/drivenet.h index 50b6a987a9..a1042adf03 100644 --- a/pufferlib/ocean/drive/drivenet.h +++ b/pufferlib/ocean/drive/drivenet.h @@ -10,14 +10,19 @@ #define NN_INPUT_SIZE 64 #define NN_HIDDEN_SIZE 256 +#define ACTIONS_TRAJECTORY_LENGTH 20 +#define PAST_ACTIONS_DIM (ACTIONS_TRAJECTORY_LENGTH * 2) // 20 steps × 2 action dims typedef struct DriveNet DriveNet; struct DriveNet { int num_agents; int ego_dim; + int action_size; // per-step action size (e.g. 91 for classic) + int trajectory_len; // number of trajectory steps float *obs_self; float *obs_partner; float *obs_road; + float *obs_past_actions; float *partner_linear_output; float *road_linear_output; float *partner_layernorm_output; @@ -27,16 +32,20 @@ struct DriveNet { Linear *ego_encoder; Linear *road_encoder; Linear *partner_encoder; + Linear *past_actions_encoder; LayerNorm *ego_layernorm; LayerNorm *road_layernorm; LayerNorm *partner_layernorm; Linear *ego_encoder_two; Linear *road_encoder_two; Linear *partner_encoder_two; + Linear *past_actions_encoder_two; + ReLU *past_actions_relu; MaxDim1 *partner_max; MaxDim1 *road_max; CatDim1 *cat1; CatDim1 *cat2; + CatDim1 *cat3; GELU *gelu; Linear *shared_embedding; ReLU *relu; @@ -44,6 +53,9 @@ struct DriveNet { Linear *actor; Linear *value_fn; Multidiscrete *multidiscrete; + // Predicted trajectory (rolled out from actor output) + float *predicted_trajectory_x; + float *predicted_trajectory_y; }; DriveNet *init_drivenet(Weights *weights, int num_agents, int dynamics_model, int reward_conditioning) { @@ -74,11 +86,17 @@ DriveNet *init_drivenet(Weights *weights, int num_agents, int dynamics_model, in action_dim = 1; } + int traj_len = ACTIONS_TRAJECTORY_LENGTH; + int total_actor_output = traj_len * action_size; + net->num_agents = num_agents; net->ego_dim = ego_dim; + net->action_size = action_size; + net->trajectory_len = traj_len; net->obs_self = calloc(num_agents * ego_dim, sizeof(float)); net->obs_partner = calloc(num_agents * max_partners * partner_features, sizeof(float)); net->obs_road = calloc(num_agents * max_road_obs * road_feat_onehot, sizeof(float)); + net->obs_past_actions = calloc(num_agents * PAST_ACTIONS_DIM, sizeof(float)); net->partner_linear_output = calloc(num_agents * max_partners * input_size, sizeof(float)); net->road_linear_output = calloc(num_agents * max_road_obs * input_size, sizeof(float)); net->partner_linear_output_two = calloc(num_agents * max_partners * input_size, sizeof(float)); @@ -95,19 +113,27 @@ DriveNet *init_drivenet(Weights *weights, int num_agents, int dynamics_model, in net->partner_encoder = make_linear(weights, num_agents, partner_features, input_size); net->partner_layernorm = make_layernorm(weights, num_agents, input_size); net->partner_encoder_two = make_linear(weights, num_agents, input_size, input_size); + // Past actions encoder: Linear(PAST_ACTIONS_DIM → input_size) + ReLU + Linear(input_size → input_size) + net->past_actions_encoder = make_linear(weights, num_agents, PAST_ACTIONS_DIM, input_size); + net->past_actions_relu = make_relu(num_agents, input_size); + net->past_actions_encoder_two = make_linear(weights, num_agents, input_size, input_size); net->partner_max = make_max_dim1(num_agents, max_partners, input_size); net->road_max = make_max_dim1(num_agents, max_road_obs, input_size); net->cat1 = make_cat_dim1(num_agents, input_size, input_size); net->cat2 = make_cat_dim1(num_agents, input_size + input_size, input_size); - net->gelu = make_gelu(num_agents, 3 * input_size); - net->shared_embedding = make_linear(weights, num_agents, input_size * 3, hidden_size); + net->cat3 = make_cat_dim1(num_agents, input_size * 3, input_size); + net->gelu = make_gelu(num_agents, 4 * input_size); + net->shared_embedding = make_linear(weights, num_agents, input_size * 4, hidden_size); net->relu = make_relu(num_agents, hidden_size); - net->actor = make_linear(weights, num_agents, hidden_size, action_size); + net->actor = make_linear(weights, num_agents, hidden_size, total_actor_output); net->value_fn = make_linear(weights, num_agents, hidden_size, 1); net->lstm = make_lstm(weights, num_agents, hidden_size, NN_HIDDEN_SIZE); memset(net->lstm->state_h, 0, num_agents * NN_HIDDEN_SIZE * sizeof(float)); memset(net->lstm->state_c, 0, num_agents * NN_HIDDEN_SIZE * sizeof(float)); net->multidiscrete = make_multidiscrete(num_agents, logit_sizes, action_dim); + // Trajectory rollout buffers + net->predicted_trajectory_x = calloc(num_agents * traj_len, sizeof(float)); + net->predicted_trajectory_y = calloc(num_agents * traj_len, sizeof(float)); return net; } @@ -115,6 +141,7 @@ void free_drivenet(DriveNet *net) { free(net->obs_self); free(net->obs_partner); free(net->obs_road); + free(net->obs_past_actions); free(net->partner_linear_output); free(net->road_linear_output); free(net->partner_linear_output_two); @@ -124,16 +151,20 @@ void free_drivenet(DriveNet *net) { free(net->ego_encoder); free(net->road_encoder); free(net->partner_encoder); + free(net->past_actions_encoder); free(net->ego_layernorm); free(net->road_layernorm); free(net->partner_layernorm); free(net->ego_encoder_two); free(net->road_encoder_two); free(net->partner_encoder_two); + free(net->past_actions_encoder_two); + free(net->past_actions_relu); free(net->partner_max); free(net->road_max); free(net->cat1); free(net->cat2); + free(net->cat3); free(net->gelu); free(net->shared_embedding); free(net->relu); @@ -141,26 +172,31 @@ void free_drivenet(DriveNet *net) { free(net->actor); free(net->value_fn); free(net->lstm); + free(net->predicted_trajectory_x); + free(net->predicted_trajectory_y); free(net); } -void forward(DriveNet *net, float *observations, int *actions) { +void forward(DriveNet *net, Drive *env, float *observations, int *actions) { int ego_dim = net->ego_dim; int max_partners = MAX_AGENTS - 1; int max_road_obs = MAX_ROAD_SEGMENT_OBSERVATIONS; int partner_features = PARTNER_FEATURES; int road_features = ROAD_FEATURES; int road_feat_onehot = road_features + 6; // one-hot extra 6 features for road + int obs_stride = ego_dim + max_partners * partner_features + max_road_obs * road_features + PAST_ACTIONS_DIM; // Clear previous observations memset(net->obs_self, 0, net->num_agents * ego_dim * sizeof(float)); memset(net->obs_partner, 0, net->num_agents * max_partners * partner_features * sizeof(float)); memset(net->obs_road, 0, net->num_agents * max_road_obs * road_feat_onehot * sizeof(float)); + memset(net->obs_past_actions, 0, net->num_agents * PAST_ACTIONS_DIM * sizeof(float)); for (int b = 0; b < net->num_agents; b++) { - int b_offset = b * (ego_dim + max_partners * partner_features + max_road_obs * road_features); + int b_offset = b * obs_stride; int partner_offset = b_offset + ego_dim; int road_offset = b_offset + ego_dim + max_partners * partner_features; + int past_actions_offset = road_offset + max_road_obs * road_features; // Process self observation for (int i = 0; i < ego_dim; i++) { @@ -191,6 +227,11 @@ void forward(DriveNet *net, float *observations, int *actions) { } } } + + // Process past actions trajectory + for (int i = 0; i < PAST_ACTIONS_DIM; i++) { + net->obs_past_actions[b * PAST_ACTIONS_DIM + i] = observations[past_actions_offset + i]; + } } // Forward pass through the network @@ -258,17 +299,85 @@ void forward(DriveNet *net, float *observations, int *actions) { } } + // Past actions encoder: Linear → ReLU → Linear + linear(net->past_actions_encoder, net->obs_past_actions); + relu(net->past_actions_relu, net->past_actions_encoder->output); + linear(net->past_actions_encoder_two, net->past_actions_relu->output); + max_dim1(net->partner_max, net->partner_linear_output_two); max_dim1(net->road_max, net->road_linear_output_two); + // Concatenate: [ego, road, partner, past_actions] (4 × input_size) cat_dim1(net->cat1, net->ego_encoder_two->output, net->road_max->output); cat_dim1(net->cat2, net->cat1->output, net->partner_max->output); - gelu(net->gelu, net->cat2->output); + cat_dim1(net->cat3, net->cat2->output, net->past_actions_encoder_two->output); + gelu(net->gelu, net->cat3->output); linear(net->shared_embedding, net->gelu->output); relu(net->relu, net->shared_embedding->output); lstm(net->lstm, net->relu->output); linear(net->actor, net->lstm->state_h); linear(net->value_fn, net->lstm->state_h); - // Get action by taking argmax of actor output + // Actor output is [num_agents × (trajectory_len × action_size)] + // Take first timestep's action via softmax_multidiscrete softmax_multidiscrete(net->multidiscrete, net->actor->output, actions); + + // Roll out predicted trajectory using classic dynamics + if (env != NULL) { + int traj_len = net->trajectory_len; + int action_size = net->action_size; + int num_steer = sizeof(STEERING_VALUES) / sizeof(STEERING_VALUES[0]); + + for (int b = 0; b < net->num_agents; b++) { + int agent_idx = env->active_agent_indices[b]; + Agent *agent = &env->agents[agent_idx]; + + // Start from current agent state + float x = agent->sim_x; + float y = agent->sim_y; + float heading = agent->sim_heading; + float vx = agent->sim_vx; + float vy = agent->sim_vy; + + float *actor_out = &net->actor->output[b * traj_len * action_size]; + + for (int t = 0; t < traj_len; t++) { + // Find argmax action for this timestep + float *logits = &actor_out[t * action_size]; + int best_action = 0; + float best_val = logits[0]; + for (int a = 1; a < action_size; a++) { + if (logits[a] > best_val) { + best_val = logits[a]; + best_action = a; + } + } + + // Decode action into acceleration + steering (classic model) + int accel_idx = best_action / num_steer; + int steer_idx = best_action % num_steer; + float acceleration = ACCELERATION_VALUES[accel_idx]; + float steering = STEERING_VALUES[steer_idx]; + + // Classic dynamics step + float speed_mag = sqrtf(vx * vx + vy * vy); + float v_dot = vx * cosf(heading) + vy * sinf(heading); + float signed_speed = copysignf(speed_mag, v_dot); + signed_speed = signed_speed + acceleration * env->dt; + signed_speed = clipSpeed(signed_speed); + float beta = tanhf(0.5f * tanf(steering)); + float new_vx = signed_speed * cosf(heading + beta); + float new_vy = signed_speed * sinf(heading + beta); + float yaw_rate = (signed_speed * cosf(beta) * tanf(steering)) / agent->sim_length; + + x += new_vx * env->dt; + y += new_vy * env->dt; + heading += yaw_rate * env->dt; + vx = new_vx; + vy = new_vy; + + net->predicted_trajectory_x[b * traj_len + t] = x; + net->predicted_trajectory_y[b * traj_len + t] = y; + } + } + } } diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c index f98430875d..8830579bbc 100644 --- a/pufferlib/ocean/drive/visualize.c +++ b/pufferlib/ocean/drive/visualize.c @@ -67,7 +67,7 @@ void CloseVideo(VideoRecorder *recorder) { void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int lasers, int trajectories, int frame_count, float *path, int show_human_logs, int show_grid, int img_width, int img_height, - int zoom_in) { + int zoom_in, DriveNet *net) { BeginDrawing(); // Top-down orthographic camera @@ -124,6 +124,24 @@ void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int } } + // Draw predicted trajectories from policy network + if (net != NULL && net->predicted_trajectory_x != NULL) { + for (int i = 0; i < env->active_agent_count && i < net->num_agents; i++) { + int traj_len = net->trajectory_len; + int agent_idx = env->active_agent_indices[i]; + Agent *agent = &env->agents[agent_idx]; + Vector3 prev = {agent->sim_x, agent->sim_y, 0.6f}; + for (int t = 0; t < traj_len; t++) { + float tx = net->predicted_trajectory_x[i * traj_len + t]; + float ty = net->predicted_trajectory_y[i * traj_len + t]; + Vector3 curr = {tx, ty, 0.6f}; + DrawLine3D(prev, curr, Fade(SKYBLUE, 0.8f)); + DrawSphere(curr, 0.3f, Fade(SKYBLUE, 0.6f)); + prev = curr; + } + } + } + // Draw agent trajs if (trajectories) { for (int i = 0; i < frame_count; i++) { @@ -397,11 +415,11 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o for (int i = 0; i < frame_count; i++) { if (i % frame_skip == 0) { renderTopDownView(&env, client, map_height, 0, 0, 0, frame_count, NULL, show_human_logs, show_grid, - img_width, img_height, zoom_in); + img_width, img_height, zoom_in, net); WriteFrame(&topdown_recorder, img_width, img_height); rendered_frames++; } - forward(net, env.observations, (int *)env.actions); + forward(net, &env, env.observations, (int *)env.actions); c_step(&env); } } @@ -419,7 +437,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o WriteFrame(&agent_recorder, img_width, img_height); rendered_frames++; } - forward(net, env.observations, (int *)env.actions); + forward(net, &env, env.observations, (int *)env.actions); c_step(&env); } } From a59bccb35c1ad6b2bffbc64cd7dcb8b9ef19a929 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 23 Mar 2026 22:32:39 -0400 Subject: [PATCH 2/5] Add trajectory rollout and rendering infrastructure - Add predicted_traj_x/y fields to Drive struct - Add vec_set_trajectory binding: takes action indices, rolls out through classic dynamics in C, stores x/y positions for rendering - c_render draws predicted trajectories as sky-blue lines/spheres - Add headless ffmpeg rendering functions (start/stop/write_render_pipe) - Python set_predicted_trajectories() method on Drive class Usage: after getting actor output from torch policy, extract argmax actions for each trajectory step, pass to env.set_predicted_trajectories(), then call env.render(). Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/drive.h | 76 ++++++++++++++++++++++++++++++++++ pufferlib/ocean/drive/drive.py | 15 +++++++ pufferlib/ocean/env_binding.h | 67 ++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+) diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index c21159be93..f984d298dc 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -351,6 +351,16 @@ struct Drive { int turn_off_normalization; RewardBound reward_bounds[NUM_REWARD_COEFS]; float min_avg_speed_to_consider_goal_attempt; + + // Predicted trajectory visualization (set from Python, drawn by c_render) + float *predicted_traj_x; // [num_agents * traj_len] + float *predicted_traj_y; // [num_agents * traj_len] + int predicted_traj_len; // steps per agent (0 = no trajectory to draw) + + // Headless rendering (ffmpeg pipe) + int render_mode; // 0=window, 1=headless + int render_pipe_fd; // write end of pipe to ffmpeg + pid_t render_pid; // ffmpeg child process }; // ======================================== @@ -3914,6 +3924,54 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers, } } +// Start headless ffmpeg pipe for recording +void start_render_pipe(Drive *env, int width, int height) { + int pipefd[2]; + if (pipe(pipefd) == -1) { + fprintf(stderr, "Failed to create render pipe\n"); + return; + } + pid_t pid = fork(); + if (pid == 0) { // child: run ffmpeg + close(pipefd[1]); + dup2(pipefd[0], STDIN_FILENO); + close(pipefd[0]); + for (int fd = 3; fd < 256; fd++) + close(fd); + char size_str[64]; + snprintf(size_str, sizeof(size_str), "%dx%d", width, height); + execlp("ffmpeg", "ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgba", "-s", size_str, "-r", "30", "-i", "-", + "-c:v", "libx264", "-pix_fmt", "yuv420p", "-preset", "ultrafast", "-crf", "23", "-loglevel", "error", + "/tmp/pufferdrive_render.mp4", NULL); + fprintf(stderr, "execlp ffmpeg failed\n"); + _exit(1); + } + close(pipefd[0]); + env->render_pid = pid; + env->render_pipe_fd = pipefd[1]; +} + +void stop_render_pipe(Drive *env) { + if (env->render_pipe_fd > 0) { + close(env->render_pipe_fd); + env->render_pipe_fd = 0; + } + if (env->render_pid > 0) { + waitpid(env->render_pid, NULL, 0); + env->render_pid = 0; + } +} + +void write_render_frame(Drive *env, int width, int height) { + if (env->render_pipe_fd <= 0) + return; + unsigned char *pixels = rlReadScreenPixels(width, height); + if (pixels) { + write(env->render_pipe_fd, pixels, width * height * 4); + RL_FREE(pixels); + } +} + void c_render(Drive *env) { if (env->client == NULL) { env->client = make_client(env); @@ -3926,6 +3984,24 @@ void c_render(Drive *env) { handle_camera_controls(env->client); draw_scene(env, client, 0, 0, 0, 0); + // Draw predicted trajectories (set from Python) + if (env->predicted_traj_x != NULL && env->predicted_traj_len > 0) { + for (int i = 0; i < env->active_agent_count; i++) { + int agent_idx = env->active_agent_indices[i]; + Agent *agent = &env->agents[agent_idx]; + int tlen = env->predicted_traj_len; + Vector3 prev = {agent->sim_x, agent->sim_y, 0.6f}; + for (int t = 0; t < tlen; t++) { + float tx = env->predicted_traj_x[i * tlen + t]; + float ty = env->predicted_traj_y[i * tlen + t]; + Vector3 curr = {tx, ty, 0.6f}; + DrawLine3D(prev, curr, Fade(SKYBLUE, 0.8f)); + DrawSphere(curr, 0.3f, Fade(SKYBLUE, 0.6f)); + prev = curr; + } + } + } + if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) { env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count; } diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 5a2d5d29ec..1444495c53 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -706,6 +706,21 @@ def get_road_edge_polylines(self): def render(self): binding.vec_render(self.c_envs, 0) + def set_predicted_trajectories(self, action_trajectories): + """Roll out action trajectories through dynamics and set for rendering. + + Args: + action_trajectories: numpy array of shape [num_agents, traj_len] + containing discrete action indices (e.g. classic: accel_idx * 13 + steer_idx). + """ + num_agents, traj_len = action_trajectories.shape + binding.vec_set_trajectory( + self.c_envs, + 0, + action_trajectories.flatten().astype(np.int32), + traj_len, + ) + def close(self): binding.vec_close(self.c_envs) diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h index 2c8ac3192d..cec3abd26e 100644 --- a/pufferlib/ocean/env_binding.h +++ b/pufferlib/ocean/env_binding.h @@ -541,6 +541,72 @@ static PyObject *vec_render(PyObject *self, PyObject *args) { Py_RETURN_NONE; } +// Roll out action trajectories through dynamics and store for rendering. +// action_traj: [num_agents * traj_len] array of discrete action indices. +static PyObject *vec_set_trajectory(PyObject *self, PyObject *args) { + if (PyTuple_Size(args) != 4) { + PyErr_SetString(PyExc_TypeError, "vec_set_trajectory requires 4 args: vec_env, env_id, action_traj, traj_len"); + return NULL; + } + VecEnv *vec = (VecEnv *)PyLong_AsVoidPtr(PyTuple_GetItem(args, 0)); + int env_id = PyLong_AsLong(PyTuple_GetItem(args, 1)); + PyArrayObject *action_arr = (PyArrayObject *)PyTuple_GetItem(args, 2); + int traj_len = PyLong_AsLong(PyTuple_GetItem(args, 3)); + + Env *env = vec->envs[env_id]; + int num_agents = env->active_agent_count; + int *actions = (int *)PyArray_DATA(action_arr); + int num_steer = sizeof(STEERING_VALUES) / sizeof(STEERING_VALUES[0]); + + // Allocate/reallocate trajectory buffers + free(env->predicted_traj_x); + free(env->predicted_traj_y); + int total = num_agents * traj_len; + env->predicted_traj_x = (float *)calloc(total, sizeof(float)); + env->predicted_traj_y = (float *)calloc(total, sizeof(float)); + env->predicted_traj_len = traj_len; + + // Roll out each agent's trajectory using classic dynamics + for (int i = 0; i < num_agents; i++) { + int agent_idx = env->active_agent_indices[i]; + Agent *agent = &env->agents[agent_idx]; + float x = agent->sim_x; + float y = agent->sim_y; + float heading = agent->sim_heading; + float vx = agent->sim_vx; + float vy = agent->sim_vy; + + for (int t = 0; t < traj_len; t++) { + int action_val = actions[i * traj_len + t]; + int accel_idx = action_val / num_steer; + int steer_idx = action_val % num_steer; + float acceleration = ACCELERATION_VALUES[accel_idx]; + float steering = STEERING_VALUES[steer_idx]; + + float speed_mag = sqrtf(vx * vx + vy * vy); + float v_dot = vx * cosf(heading) + vy * sinf(heading); + float signed_speed = copysignf(speed_mag, v_dot); + signed_speed += acceleration * env->dt; + signed_speed = clipSpeed(signed_speed); + float beta = tanhf(0.5f * tanf(steering)); + float new_vx = signed_speed * cosf(heading + beta); + float new_vy = signed_speed * sinf(heading + beta); + float yaw_rate = (signed_speed * cosf(beta) * tanf(steering)) / agent->sim_length; + + x += new_vx * env->dt; + y += new_vy * env->dt; + heading += yaw_rate * env->dt; + vx = new_vx; + vy = new_vy; + + env->predicted_traj_x[i * traj_len + t] = x; + env->predicted_traj_y[i * traj_len + t] = y; + } + } + + Py_RETURN_NONE; +} + static int assign_to_dict(PyObject *dict, char *key, float value) { PyObject *v = PyFloat_FromDouble(value); if (v == NULL) { @@ -977,6 +1043,7 @@ static PyMethodDef methods[] = { {"vec_step", vec_step, METH_VARARGS, "Step the vector of environments"}, {"vec_log", vec_log, METH_VARARGS, "Log the vector of environments"}, {"vec_render", vec_render, METH_VARARGS, "Render the vector of environments"}, + {"vec_set_trajectory", vec_set_trajectory, METH_VARARGS, "Set predicted trajectory for rendering"}, {"vec_close", vec_close, METH_VARARGS, "Close the vector of environments"}, {"shared", (PyCFunction)my_shared, METH_VARARGS | METH_KEYWORDS, "Shared state"}, {"get_global_agent_state", get_global_agent_state, METH_VARARGS, "Get global agent state"}, From e70ddb116159052cbf5d25406f6122a5129f95f0 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 24 Mar 2026 02:18:54 -0400 Subject: [PATCH 3/5] Fix trajectory rendering: re-enter 3D mode after draw_scene draw_scene() calls EndMode3D() internally, so trajectory drawing was happening in 2D screen space instead of world coordinates. Now re-enters BeginMode3D with the same camera after draw_scene returns. Also: - Top-down orthographic camera centered on map - Mouse drag to pan, scroll to zoom - Per-sub-env trajectory slicing via agent_offsets - rlDisableDepthTest so trajectories render on top Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/drive.h | 141 +++++++++++++++------------------ pufferlib/ocean/drive/drive.py | 24 +++--- 2 files changed, 80 insertions(+), 85 deletions(-) diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index f984d298dc..de85e4a177 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -3975,103 +3975,92 @@ void write_render_frame(Drive *env, int width, int height) { void c_render(Drive *env) { if (env->client == NULL) { env->client = make_client(env); + // Initialize pan offset to map center + float cx = (env->grid_map->top_left_x + env->grid_map->bottom_right_x) / 2.0f; + float cy = (env->grid_map->top_left_y + env->grid_map->bottom_right_y) / 2.0f; + env->client->camera_target = (Vector3){cx, cy, 0.0f}; } Client *client = env->client; BeginDrawing(); - Color road = (Color){35, 35, 37, 255}; - ClearBackground(road); - BeginMode3D(client->camera); - handle_camera_controls(env->client); - draw_scene(env, client, 0, 0, 0, 0); + ClearBackground((Color){35, 35, 37, 255}); - // Draw predicted trajectories (set from Python) + float map_height = env->grid_map->top_left_y - env->grid_map->bottom_right_y; + + // Mouse wheel zoom + float wheel = GetMouseWheelMove(); + if (wheel != 0) { + client->camera_zoom -= wheel * 0.1f; + if (client->camera_zoom < 0.05f) + client->camera_zoom = 0.05f; + if (client->camera_zoom > 3.0f) + client->camera_zoom = 3.0f; + } + + // Mouse drag pan (right button or middle button) + if (IsMouseButtonDown(MOUSE_BUTTON_LEFT)) { + Vector2 delta = GetMouseDelta(); + // Scale mouse pixels to world units based on current zoom + float scale = (map_height * 1.1f * client->camera_zoom) / client->height; + client->camera_target.x -= delta.x * scale; + client->camera_target.y += delta.y * scale; + } + + // Build top-down camera from pan/zoom state + Camera3D topdown_cam = {0}; + topdown_cam.position = (Vector3){client->camera_target.x, client->camera_target.y, 500.0f}; + topdown_cam.target = client->camera_target; + topdown_cam.up = (Vector3){0.0f, -1.0f, 0.0f}; + topdown_cam.fovy = map_height * 1.1f * client->camera_zoom; + topdown_cam.projection = CAMERA_ORTHOGRAPHIC; + + BeginMode3D(topdown_cam); + + // Draw scene (mode=1 for top-down style) — NOTE: draw_scene calls EndMode3D() internally + draw_scene(env, client, 1, 0, 0, 0); + + // Re-enter 3D mode for trajectory drawing (draw_scene ended it) + BeginMode3D(topdown_cam); + rlDisableDepthTest(); + // First: draw GREEN circles at every agent position to verify coordinate alignment + for (int i = 0; i < env->active_agent_count; i++) { + int agent_idx = env->active_agent_indices[i]; + Agent *agent = &env->agents[agent_idx]; + DrawCircle3D((Vector3){agent->sim_x, agent->sim_y, agent->sim_z + 0.1f}, + 5.0f, (Vector3){0, 0, 1}, 0.0f, GREEN); + } + // Then: draw trajectory lines and points if (env->predicted_traj_x != NULL && env->predicted_traj_len > 0) { for (int i = 0; i < env->active_agent_count; i++) { int agent_idx = env->active_agent_indices[i]; Agent *agent = &env->agents[agent_idx]; int tlen = env->predicted_traj_len; - Vector3 prev = {agent->sim_x, agent->sim_y, 0.6f}; + float z = agent->sim_z + 0.1f; + + Vector3 prev = {agent->sim_x, agent->sim_y, z}; for (int t = 0; t < tlen; t++) { float tx = env->predicted_traj_x[i * tlen + t]; float ty = env->predicted_traj_y[i * tlen + t]; - Vector3 curr = {tx, ty, 0.6f}; - DrawLine3D(prev, curr, Fade(SKYBLUE, 0.8f)); - DrawSphere(curr, 0.3f, Fade(SKYBLUE, 0.6f)); + Vector3 curr = {tx, ty, z}; + rlSetLineWidth(3.0f); + DrawLine3D(prev, curr, RED); + DrawCircle3D(curr, 1.5f, (Vector3){0, 0, 1}, 0.0f, RED); prev = curr; } } } + rlEnableDepthTest(); + EndMode3D(); - if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) { - env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count; - } - - // Draw debug info - DrawText(TextFormat("Camera Position: (%.2f, %.2f, %.2f)", client->camera.position.x, client->camera.position.y, - client->camera.position.z), + // 2D HUD overlay + DrawText(TextFormat("Timestep: %d Agents: %d Zoom: %.1fx", env->timestep, env->active_agent_count, + 1.0f / client->camera_zoom), 10, 10, 20, PUFF_WHITE); - DrawText(TextFormat("Camera Target: (%.2f, %.2f, %.2f)", client->camera.target.x, client->camera.target.y, - client->camera.target.z), - 10, 30, 20, PUFF_WHITE); - DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE); - - int human_idx = env->active_agent_indices[env->human_agent_idx]; - DrawText(TextFormat("Controlling Agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE); - DrawText(TextFormat("Agent Index: %d", human_idx), 10, 90, 20, PUFF_WHITE); - - // Display current action values - yellow when controlling, white otherwise - Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE; - - if (env->action_type == 0) { // discrete - int *action_array = (int *)env->actions; - int action_val = action_array[env->human_agent_idx]; + DrawText("Scroll=zoom, Drag=pan, TAB=cycle agent", 10, client->height - 25, 18, PUFF_WHITE); - if (env->dynamics_model == CLASSIC) { - int num_steer = 13; - int accel_idx = action_val / num_steer; - int steer_idx = action_val % num_steer; - float accel_value = ACCELERATION_VALUES[accel_idx]; - float steer_value = STEERING_VALUES[steer_idx]; - - DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color); - DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color); - } else if (env->dynamics_model == JERK) { - int num_lat = 3; - int jerk_long_idx = action_val / num_lat; - int jerk_lat_idx = action_val % num_lat; - float jerk_long_value = JERK_LONG[jerk_long_idx]; - float jerk_lat_value = JERK_LAT[jerk_lat_idx]; - - DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color); - DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color); - } - } else { // continuous - float (*action_array_f)[2] = (float (*)[2])env->actions; - DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20, action_color); - DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20, action_color); - } - - // Show key press status - int status_y = 150; - if (IsKeyDown(KEY_LEFT_SHIFT)) { - DrawText("[shift pressed]", 10, status_y, 20, YELLOW); - status_y += 20; - } - if (IsKeyDown(KEY_SPACE)) { - DrawText("[space pressed]", 10, status_y, 20, YELLOW); - status_y += 20; - } - if (IsKeyDown(KEY_LEFT_CONTROL)) { - DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW); - status_y += 20; + if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) { + env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count; } - // Controls help - DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10, - client->height - 30, 20, PUFF_WHITE); - - DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE); - DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE); EndDrawing(); } diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 1444495c53..7a9d9531f9 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -703,8 +703,8 @@ def get_road_edge_polylines(self): return polylines - def render(self): - binding.vec_render(self.c_envs, 0) + def render(self, env_id=0): + binding.vec_render(self.c_envs, env_id) def set_predicted_trajectories(self, action_trajectories): """Roll out action trajectories through dynamics and set for rendering. @@ -712,14 +712,20 @@ def set_predicted_trajectories(self, action_trajectories): Args: action_trajectories: numpy array of shape [num_agents, traj_len] containing discrete action indices (e.g. classic: accel_idx * 13 + steer_idx). + num_agents is the total across all sub-envs. """ - num_agents, traj_len = action_trajectories.shape - binding.vec_set_trajectory( - self.c_envs, - 0, - action_trajectories.flatten().astype(np.int32), - traj_len, - ) + _, traj_len = action_trajectories.shape + # Set trajectories per sub-env using agent_offsets to slice correctly + for i in range(self.num_envs): + start = self.agent_offsets[i] + end = self.agent_offsets[i + 1] + sub_actions = action_trajectories[start:end] + binding.vec_set_trajectory( + self.c_envs, + i, + sub_actions.flatten().astype(np.int32), + traj_len, + ) def close(self): binding.vec_close(self.c_envs) From 2f4e23b6f36b06031ecc7c1548a0fffe5b2f59dc Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 24 Mar 2026 02:40:21 -0400 Subject: [PATCH 4/5] Address PR review: remove debug circles, fix memory leak, fix compat - Remove green debug circles from c_render (leftover instrumentation) - Fix comment: mouse drag uses left button, not right - Free predicted_traj_x/y in c_close (memory leak) - Fix forward() call sites in drive.c to use new 4-arg signature - Remove double EndMode3D in visualize.c renderTopDownView Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/drive.c | 4 ++-- pufferlib/ocean/drive/drive.h | 12 +++--------- pufferlib/ocean/drive/visualize.c | 3 +-- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c index 82c4b94a75..19087227b8 100644 --- a/pufferlib/ocean/drive/drive.c +++ b/pufferlib/ocean/drive/drive.c @@ -23,7 +23,7 @@ void test_drivenet() { Weights *weights = load_weights("puffer_drive_weights.bin"); DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, 0); - forward(net, observations, actions); + forward(net, NULL, observations, actions); for (int i = 0; i < num_agents * num_actions; i++) { printf("idx: %d, action: %d, logits:", i, actions[i]); for (int j = 0; j < num_actions; j++) { @@ -112,7 +112,7 @@ void demo() { int *actions = (int *)env.actions; // Single integer per agent if (!IsKeyDown(KEY_LEFT_SHIFT)) { - forward(net, env.observations, actions); + forward(net, &env, env.observations, actions); } else { if (env.dynamics_model == CLASSIC) { // Classic dynamics: acceleration and steering diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index de85e4a177..a67fbf33bb 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2113,6 +2113,8 @@ void c_close(Drive *env) { free(env->expert_static_agent_indices); free(env->tracks_to_predict_indices); free(env->ini_file); + free(env->predicted_traj_x); + free(env->predicted_traj_y); } void allocate(Drive *env) { @@ -3996,7 +3998,7 @@ void c_render(Drive *env) { client->camera_zoom = 3.0f; } - // Mouse drag pan (right button or middle button) + // Mouse drag pan (left button) if (IsMouseButtonDown(MOUSE_BUTTON_LEFT)) { Vector2 delta = GetMouseDelta(); // Scale mouse pixels to world units based on current zoom @@ -4021,14 +4023,6 @@ void c_render(Drive *env) { // Re-enter 3D mode for trajectory drawing (draw_scene ended it) BeginMode3D(topdown_cam); rlDisableDepthTest(); - // First: draw GREEN circles at every agent position to verify coordinate alignment - for (int i = 0; i < env->active_agent_count; i++) { - int agent_idx = env->active_agent_indices[i]; - Agent *agent = &env->agents[agent_idx]; - DrawCircle3D((Vector3){agent->sim_x, agent->sim_y, agent->sim_z + 0.1f}, - 5.0f, (Vector3){0, 0, 1}, 0.0f, GREEN); - } - // Then: draw trajectory lines and points if (env->predicted_traj_x != NULL && env->predicted_traj_len > 0) { for (int i = 0; i < env->active_agent_count; i++) { int agent_idx = env->active_agent_indices[i]; diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c index 8830579bbc..5c47926473 100644 --- a/pufferlib/ocean/drive/visualize.c +++ b/pufferlib/ocean/drive/visualize.c @@ -149,9 +149,8 @@ void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int } } - // Draw scene + // Draw scene (note: draw_scene calls EndMode3D internally) draw_scene(env, client, 1, obs, lasers, show_grid); - EndMode3D(); EndDrawing(); } From 648181e22d57db25a1de2fa1eae9085583e232af Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 24 Mar 2026 23:12:27 -0400 Subject: [PATCH 5/5] Add trajectory eval support: augment obs with past actions, render trajectories - Eval loop now handles trajectory policies: concatenates past actions buffer to observations, uses sample_logits_action_sequence, and calls set_predicted_trajectories for visualization - Default actions_trajectory_length changed from 80 to 20 in torch.py Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/torch.py | 2 +- pufferlib/pufferl.py | 35 +++++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py index a03fb9a92a..4bc41c9222 100644 --- a/pufferlib/ocean/torch.py +++ b/pufferlib/ocean/torch.py @@ -24,7 +24,7 @@ def __init__(self, env, input_size=128, hidden_size=128, **kwargs): self.max_road_objects = env.max_road_objects self.road_features = env.road_features self.road_features_after_onehot = env.road_features + 6 # 6 is the number of one-hot encoded categories - self.actions_trajectory_length = kwargs.get("actions_trajectory_length", 80) + self.actions_trajectory_length = kwargs.get("actions_trajectory_length", 20) # Determine ego dimension from environment's feature layout self.ego_dim = env.ego_features diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py index ac949a8d52..2d669417e4 100644 --- a/pufferlib/pufferl.py +++ b/pufferlib/pufferl.py @@ -1468,6 +1468,13 @@ def eval(env_name, args=None, vecenv=None, policy=None): num_agents = vecenv.observation_space.shape[0] device = args["train"]["device"] + inner_policy = getattr(policy, 'policy', policy) + traj_len = getattr(inner_policy, 'actions_trajectory_length', 1) + # Past actions buffer for trajectory models (actions decomposed into accel + steer) + prev_actions_traj = torch.full( + (num_agents, traj_len, 2), -1.0, device=device + ) if traj_len > 1 else None + state = {} if args["train"]["use_rnn"]: state = dict( @@ -1494,15 +1501,31 @@ def eval(env_name, args=None, vecenv=None, policy=None): # time.sleep(1/args['fps']) with torch.no_grad(): - ob = torch.as_tensor(ob).to(device) - logits, value = policy.forward_eval(ob, state) - action, logprob, _ = pufferlib.pytorch.sample_logits(logits) - action = action.cpu().numpy().reshape(vecenv.action_space.shape) + ob_t = torch.as_tensor(ob).to(device) + if prev_actions_traj is not None: + prev_traj_flat = prev_actions_traj.reshape(num_agents, -1) + ob_aug = torch.cat([ob_t, prev_traj_flat], dim=-1) + else: + ob_aug = ob_t + + logits, value = policy.forward_eval(ob_aug, state) + if traj_len > 1: + action, logprob, _ = pufferlib.pytorch.sample_logits_action_sequence(logits) + action_np = action.cpu().numpy() + if hasattr(driver, 'set_predicted_trajectories'): + driver.set_predicted_trajectories(action_np) + # Update past actions buffer: decompose action into accel + steer + action_n2 = convert_single_action_integers_to_r2(action) + prev_actions_traj[:] = action_n2 + step_action = action_np[:, 0:1].reshape(vecenv.action_space.shape) + else: + action, logprob, _ = pufferlib.pytorch.sample_logits(logits) + step_action = action.cpu().numpy().reshape(vecenv.action_space.shape) if isinstance(logits, torch.distributions.Normal): - action = np.clip(action, vecenv.action_space.low, vecenv.action_space.high) + step_action = np.clip(step_action, vecenv.action_space.low, vecenv.action_space.high) - ob = vecenv.step(action)[0] + ob = vecenv.step(step_action)[0] if len(frames) > 0 and len(frames) == args["save_frames"]: import imageio