From c8e827be211d37590ab45c9bca957257f5b5884b Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Mon, 23 Mar 2026 22:06:40 -0400
Subject: [PATCH 1/5] Add trajectory prediction to C network and renderer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Update DriveNet to match Ricky's actions_trajectory policy:
  - Add past_actions_encoder (Linear→ReLU→Linear, 40→64→64)
  - 4-way concat (ego, road, partner, past_actions) → shared_embedding
  - Actor outputs trajectory_len × action_size logits
- Roll out predicted trajectory using classic dynamics model
  after each forward pass, store in predicted_trajectory_x/y
- Draw predicted trajectories as sky-blue lines in renderTopDownView

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 pufferlib/ocean/drive/drivenet.h  | 123 ++++++++++++++++++++++++++++--
 pufferlib/ocean/drive/visualize.c |  26 ++++++-
 2 files changed, 138 insertions(+), 11 deletions(-)

diff --git a/pufferlib/ocean/drive/drivenet.h b/pufferlib/ocean/drive/drivenet.h
index 50b6a987a9..a1042adf03 100644
--- a/pufferlib/ocean/drive/drivenet.h
+++ b/pufferlib/ocean/drive/drivenet.h
@@ -10,14 +10,19 @@
 
 #define NN_INPUT_SIZE 64
 #define NN_HIDDEN_SIZE 256
+#define ACTIONS_TRAJECTORY_LENGTH 20
+#define PAST_ACTIONS_DIM (ACTIONS_TRAJECTORY_LENGTH * 2) // 20 steps × 2 action dims
 
 typedef struct DriveNet DriveNet;
 struct DriveNet {
     int num_agents;
     int ego_dim;
+    int action_size;     // per-step action size (e.g. 91 for classic)
+    int trajectory_len;  // number of trajectory steps
     float *obs_self;
     float *obs_partner;
     float *obs_road;
+    float *obs_past_actions;
     float *partner_linear_output;
     float *road_linear_output;
     float *partner_layernorm_output;
@@ -27,16 +32,20 @@ struct DriveNet {
     Linear *ego_encoder;
     Linear *road_encoder;
     Linear *partner_encoder;
+    Linear *past_actions_encoder;
     LayerNorm *ego_layernorm;
     LayerNorm *road_layernorm;
     LayerNorm *partner_layernorm;
     Linear *ego_encoder_two;
     Linear *road_encoder_two;
     Linear *partner_encoder_two;
+    Linear *past_actions_encoder_two;
+    ReLU *past_actions_relu;
     MaxDim1 *partner_max;
     MaxDim1 *road_max;
     CatDim1 *cat1;
     CatDim1 *cat2;
+    CatDim1 *cat3;
     GELU *gelu;
     Linear *shared_embedding;
     ReLU *relu;
@@ -44,6 +53,9 @@ struct DriveNet {
     Linear *actor;
     Linear *value_fn;
     Multidiscrete *multidiscrete;
+    // Predicted trajectory (rolled out from actor output)
+    float *predicted_trajectory_x;
+    float *predicted_trajectory_y;
 };
 
 DriveNet *init_drivenet(Weights *weights, int num_agents, int dynamics_model, int reward_conditioning) {
@@ -74,11 +86,17 @@ DriveNet *init_drivenet(Weights *weights, int num_agents, int dynamics_model, in
         action_dim = 1;
     }
 
+    int traj_len = ACTIONS_TRAJECTORY_LENGTH;
+    int total_actor_output = traj_len * action_size;
+
     net->num_agents = num_agents;
     net->ego_dim = ego_dim;
+    net->action_size = action_size;
+    net->trajectory_len = traj_len;
     net->obs_self = calloc(num_agents * ego_dim, sizeof(float));
     net->obs_partner = calloc(num_agents * max_partners * partner_features, sizeof(float));
     net->obs_road = calloc(num_agents * max_road_obs * road_feat_onehot, sizeof(float));
+    net->obs_past_actions = calloc(num_agents * PAST_ACTIONS_DIM, sizeof(float));
     net->partner_linear_output = calloc(num_agents * max_partners * input_size, sizeof(float));
     net->road_linear_output = calloc(num_agents * max_road_obs * input_size, sizeof(float));
     net->partner_linear_output_two = calloc(num_agents * max_partners * input_size, sizeof(float));
@@ -95,19 +113,27 @@ DriveNet *init_drivenet(Weights *weights, int num_agents, int dynamics_model, in
     net->partner_encoder = make_linear(weights, num_agents, partner_features, input_size);
     net->partner_layernorm = make_layernorm(weights, num_agents, input_size);
     net->partner_encoder_two = make_linear(weights, num_agents, input_size, input_size);
+    // Past actions encoder: Linear(PAST_ACTIONS_DIM → input_size) + ReLU + Linear(input_size → input_size)
+    net->past_actions_encoder = make_linear(weights, num_agents, PAST_ACTIONS_DIM, input_size);
+    net->past_actions_relu = make_relu(num_agents, input_size);
+    net->past_actions_encoder_two = make_linear(weights, num_agents, input_size, input_size);
     net->partner_max = make_max_dim1(num_agents, max_partners, input_size);
     net->road_max = make_max_dim1(num_agents, max_road_obs, input_size);
     net->cat1 = make_cat_dim1(num_agents, input_size, input_size);
     net->cat2 = make_cat_dim1(num_agents, input_size + input_size, input_size);
-    net->gelu = make_gelu(num_agents, 3 * input_size);
-    net->shared_embedding = make_linear(weights, num_agents, input_size * 3, hidden_size);
+    net->cat3 = make_cat_dim1(num_agents, input_size * 3, input_size);
+    net->gelu = make_gelu(num_agents, 4 * input_size);
+    net->shared_embedding = make_linear(weights, num_agents, input_size * 4, hidden_size);
     net->relu = make_relu(num_agents, hidden_size);
-    net->actor = make_linear(weights, num_agents, hidden_size, action_size);
+    net->actor = make_linear(weights, num_agents, hidden_size, total_actor_output);
     net->value_fn = make_linear(weights, num_agents, hidden_size, 1);
     net->lstm = make_lstm(weights, num_agents, hidden_size, NN_HIDDEN_SIZE);
     memset(net->lstm->state_h, 0, num_agents * NN_HIDDEN_SIZE * sizeof(float));
     memset(net->lstm->state_c, 0, num_agents * NN_HIDDEN_SIZE * sizeof(float));
     net->multidiscrete = make_multidiscrete(num_agents, logit_sizes, action_dim);
+    // Trajectory rollout buffers
+    net->predicted_trajectory_x = calloc(num_agents * traj_len, sizeof(float));
+    net->predicted_trajectory_y = calloc(num_agents * traj_len, sizeof(float));
     return net;
 }
 
@@ -115,6 +141,7 @@ void free_drivenet(DriveNet *net) {
     free(net->obs_self);
     free(net->obs_partner);
     free(net->obs_road);
+    free(net->obs_past_actions);
     free(net->partner_linear_output);
     free(net->road_linear_output);
     free(net->partner_linear_output_two);
@@ -124,16 +151,20 @@ void free_drivenet(DriveNet *net) {
     free(net->ego_encoder);
     free(net->road_encoder);
     free(net->partner_encoder);
+    free(net->past_actions_encoder);
     free(net->ego_layernorm);
     free(net->road_layernorm);
     free(net->partner_layernorm);
     free(net->ego_encoder_two);
     free(net->road_encoder_two);
     free(net->partner_encoder_two);
+    free(net->past_actions_encoder_two);
+    free(net->past_actions_relu);
     free(net->partner_max);
     free(net->road_max);
     free(net->cat1);
     free(net->cat2);
+    free(net->cat3);
     free(net->gelu);
     free(net->shared_embedding);
     free(net->relu);
@@ -141,26 +172,31 @@ void free_drivenet(DriveNet *net) {
     free(net->actor);
     free(net->value_fn);
     free(net->lstm);
+    free(net->predicted_trajectory_x);
+    free(net->predicted_trajectory_y);
     free(net);
 }
 
-void forward(DriveNet *net, float *observations, int *actions) {
+void forward(DriveNet *net, Drive *env, float *observations, int *actions) {
     int ego_dim = net->ego_dim;
     int max_partners = MAX_AGENTS - 1;
     int max_road_obs = MAX_ROAD_SEGMENT_OBSERVATIONS;
     int partner_features = PARTNER_FEATURES;
     int road_features = ROAD_FEATURES;
     int road_feat_onehot = road_features + 6; // one-hot extra 6 features for road
+    int obs_stride = ego_dim + max_partners * partner_features + max_road_obs * road_features + PAST_ACTIONS_DIM;
 
     // Clear previous observations
     memset(net->obs_self, 0, net->num_agents * ego_dim * sizeof(float));
     memset(net->obs_partner, 0, net->num_agents * max_partners * partner_features * sizeof(float));
     memset(net->obs_road, 0, net->num_agents * max_road_obs * road_feat_onehot * sizeof(float));
+    memset(net->obs_past_actions, 0, net->num_agents * PAST_ACTIONS_DIM * sizeof(float));
 
     for (int b = 0; b < net->num_agents; b++) {
-        int b_offset = b * (ego_dim + max_partners * partner_features + max_road_obs * road_features);
+        int b_offset = b * obs_stride;
         int partner_offset = b_offset + ego_dim;
         int road_offset = b_offset + ego_dim + max_partners * partner_features;
+        int past_actions_offset = road_offset + max_road_obs * road_features;
 
         // Process self observation
         for (int i = 0; i < ego_dim; i++) {
@@ -191,6 +227,11 @@ void forward(DriveNet *net, float *observations, int *actions) {
                 }
             }
         }
+
+        // Process past actions trajectory
+        for (int i = 0; i < PAST_ACTIONS_DIM; i++) {
+            net->obs_past_actions[b * PAST_ACTIONS_DIM + i] = observations[past_actions_offset + i];
+        }
     }
 
     // Forward pass through the network
@@ -258,17 +299,85 @@ void forward(DriveNet *net, float *observations, int *actions) {
         }
     }
 
+    // Past actions encoder: Linear → ReLU → Linear
+    linear(net->past_actions_encoder, net->obs_past_actions);
+    relu(net->past_actions_relu, net->past_actions_encoder->output);
+    linear(net->past_actions_encoder_two, net->past_actions_relu->output);
+
     max_dim1(net->partner_max, net->partner_linear_output_two);
     max_dim1(net->road_max, net->road_linear_output_two);
+    // Concatenate: [ego, road, partner, past_actions] (4 × input_size)
     cat_dim1(net->cat1, net->ego_encoder_two->output, net->road_max->output);
     cat_dim1(net->cat2, net->cat1->output, net->partner_max->output);
-    gelu(net->gelu, net->cat2->output);
+    cat_dim1(net->cat3, net->cat2->output, net->past_actions_encoder_two->output);
+    gelu(net->gelu, net->cat3->output);
     linear(net->shared_embedding, net->gelu->output);
     relu(net->relu, net->shared_embedding->output);
     lstm(net->lstm, net->relu->output);
     linear(net->actor, net->lstm->state_h);
     linear(net->value_fn, net->lstm->state_h);
 
-    // Get action by taking argmax of actor output
+    // Actor output is [num_agents × (trajectory_len × action_size)]
+    // Take first timestep's action via softmax_multidiscrete
     softmax_multidiscrete(net->multidiscrete, net->actor->output, actions);
+
+    // Roll out predicted trajectory using classic dynamics
+    if (env != NULL) {
+        int traj_len = net->trajectory_len;
+        int action_size = net->action_size;
+        int num_steer = sizeof(STEERING_VALUES) / sizeof(STEERING_VALUES[0]);
+
+        for (int b = 0; b < net->num_agents; b++) {
+            int agent_idx = env->active_agent_indices[b];
+            Agent *agent = &env->agents[agent_idx];
+
+            // Start from current agent state
+            float x = agent->sim_x;
+            float y = agent->sim_y;
+            float heading = agent->sim_heading;
+            float vx = agent->sim_vx;
+            float vy = agent->sim_vy;
+
+            float *actor_out = &net->actor->output[b * traj_len * action_size];
+
+            for (int t = 0; t < traj_len; t++) {
+                // Find argmax action for this timestep
+                float *logits = &actor_out[t * action_size];
+                int best_action = 0;
+                float best_val = logits[0];
+                for (int a = 1; a < action_size; a++) {
+                    if (logits[a] > best_val) {
+                        best_val = logits[a];
+                        best_action = a;
+                    }
+                }
+
+                // Decode action into acceleration + steering (classic model)
+                int accel_idx = best_action / num_steer;
+                int steer_idx = best_action % num_steer;
+                float acceleration = ACCELERATION_VALUES[accel_idx];
+                float steering = STEERING_VALUES[steer_idx];
+
+                // Classic dynamics step
+                float speed_mag = sqrtf(vx * vx + vy * vy);
+                float v_dot = vx * cosf(heading) + vy * sinf(heading);
+                float signed_speed = copysignf(speed_mag, v_dot);
+                signed_speed = signed_speed + acceleration * env->dt;
+                signed_speed = clipSpeed(signed_speed);
+                float beta = tanhf(0.5f * tanf(steering));
+                float new_vx = signed_speed * cosf(heading + beta);
+                float new_vy = signed_speed * sinf(heading + beta);
+                float yaw_rate = (signed_speed * cosf(beta) * tanf(steering)) / agent->sim_length;
+
+                x += new_vx * env->dt;
+                y += new_vy * env->dt;
+                heading += yaw_rate * env->dt;
+                vx = new_vx;
+                vy = new_vy;
+
+                net->predicted_trajectory_x[b * traj_len + t] = x;
+                net->predicted_trajectory_y[b * traj_len + t] = y;
+            }
+        }
+    }
 }
diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c
index f98430875d..8830579bbc 100644
--- a/pufferlib/ocean/drive/visualize.c
+++ b/pufferlib/ocean/drive/visualize.c
@@ -67,7 +67,7 @@ void CloseVideo(VideoRecorder *recorder) {
 
 void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int lasers, int trajectories,
                        int frame_count, float *path, int show_human_logs, int show_grid, int img_width, int img_height,
-                       int zoom_in) {
+                       int zoom_in, DriveNet *net) {
     BeginDrawing();
 
     // Top-down orthographic camera
@@ -124,6 +124,24 @@ void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int
         }
     }
 
+    // Draw predicted trajectories from policy network
+    if (net != NULL && net->predicted_trajectory_x != NULL) {
+        for (int i = 0; i < env->active_agent_count && i < net->num_agents; i++) {
+            int traj_len = net->trajectory_len;
+            int agent_idx = env->active_agent_indices[i];
+            Agent *agent = &env->agents[agent_idx];
+            Vector3 prev = {agent->sim_x, agent->sim_y, 0.6f};
+            for (int t = 0; t < traj_len; t++) {
+                float tx = net->predicted_trajectory_x[i * traj_len + t];
+                float ty = net->predicted_trajectory_y[i * traj_len + t];
+                Vector3 curr = {tx, ty, 0.6f};
+                DrawLine3D(prev, curr, Fade(SKYBLUE, 0.8f));
+                DrawSphere(curr, 0.3f, Fade(SKYBLUE, 0.6f));
+                prev = curr;
+            }
+        }
+    }
+
     // Draw agent trajs
     if (trajectories) {
         for (int i = 0; i < frame_count; i++) {
@@ -397,11 +415,11 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
         for (int i = 0; i < frame_count; i++) {
             if (i % frame_skip == 0) {
                 renderTopDownView(&env, client, map_height, 0, 0, 0, frame_count, NULL, show_human_logs, show_grid,
-                                  img_width, img_height, zoom_in);
+                                  img_width, img_height, zoom_in, net);
                 WriteFrame(&topdown_recorder, img_width, img_height);
                 rendered_frames++;
             }
-            forward(net, env.observations, (int *)env.actions);
+            forward(net, &env, env.observations, (int *)env.actions);
             c_step(&env);
         }
     }
@@ -419,7 +437,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
                 WriteFrame(&agent_recorder, img_width, img_height);
                 rendered_frames++;
             }
-            forward(net, env.observations, (int *)env.actions);
+            forward(net, &env, env.observations, (int *)env.actions);
             c_step(&env);
         }
     }

From a59bccb35c1ad6b2bffbc64cd7dcb8b9ef19a929 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Mon, 23 Mar 2026 22:32:39 -0400
Subject: [PATCH 2/5] Add trajectory rollout and rendering infrastructure

- Add predicted_traj_x/y fields to Drive struct
- Add vec_set_trajectory binding: takes action indices, rolls out
  through classic dynamics in C, stores x/y positions for rendering
- c_render draws predicted trajectories as sky-blue lines/spheres
- Add headless ffmpeg rendering functions (start/stop/write_render_pipe)
- Python set_predicted_trajectories() method on Drive class

Usage: after getting actor output from torch policy, extract argmax
actions for each trajectory step, pass to env.set_predicted_trajectories(),
then call env.render().

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 pufferlib/ocean/drive/drive.h  | 76 ++++++++++++++++++++++++++++++++++
 pufferlib/ocean/drive/drive.py | 15 +++++++
 pufferlib/ocean/env_binding.h  | 67 ++++++++++++++++++++++++++++++
 3 files changed, 158 insertions(+)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index c21159be93..f984d298dc 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -351,6 +351,16 @@ struct Drive {
     int turn_off_normalization;
     RewardBound reward_bounds[NUM_REWARD_COEFS];
     float min_avg_speed_to_consider_goal_attempt;
+
+    // Predicted trajectory visualization (set from Python, drawn by c_render)
+    float *predicted_traj_x;  // [num_agents * traj_len]
+    float *predicted_traj_y;  // [num_agents * traj_len]
+    int predicted_traj_len;   // steps per agent (0 = no trajectory to draw)
+
+    // Headless rendering (ffmpeg pipe)
+    int render_mode;      // 0=window, 1=headless
+    int render_pipe_fd;   // write end of pipe to ffmpeg
+    pid_t render_pid;     // ffmpeg child process
 };
 
 // ========================================
@@ -3914,6 +3924,54 @@ void draw_scene(Drive *env, Client *client, int mode, int obs_only, int lasers,
     }
 }
 
+// Start headless ffmpeg pipe for recording
+void start_render_pipe(Drive *env, int width, int height) {
+    int pipefd[2];
+    if (pipe(pipefd) == -1) {
+        fprintf(stderr, "Failed to create render pipe\n");
+        return;
+    }
+    pid_t pid = fork();
+    if (pid == 0) { // child: run ffmpeg
+        close(pipefd[1]);
+        dup2(pipefd[0], STDIN_FILENO);
+        close(pipefd[0]);
+        for (int fd = 3; fd < 256; fd++)
+            close(fd);
+        char size_str[64];
+        snprintf(size_str, sizeof(size_str), "%dx%d", width, height);
+        execlp("ffmpeg", "ffmpeg", "-y", "-f", "rawvideo", "-pix_fmt", "rgba", "-s", size_str, "-r", "30", "-i", "-",
+               "-c:v", "libx264", "-pix_fmt", "yuv420p", "-preset", "ultrafast", "-crf", "23", "-loglevel", "error",
+               "/tmp/pufferdrive_render.mp4", NULL);
+        fprintf(stderr, "execlp ffmpeg failed\n");
+        _exit(1);
+    }
+    close(pipefd[0]);
+    env->render_pid = pid;
+    env->render_pipe_fd = pipefd[1];
+}
+
+void stop_render_pipe(Drive *env) {
+    if (env->render_pipe_fd > 0) {
+        close(env->render_pipe_fd);
+        env->render_pipe_fd = 0;
+    }
+    if (env->render_pid > 0) {
+        waitpid(env->render_pid, NULL, 0);
+        env->render_pid = 0;
+    }
+}
+
+void write_render_frame(Drive *env, int width, int height) {
+    if (env->render_pipe_fd <= 0)
+        return;
+    unsigned char *pixels = rlReadScreenPixels(width, height);
+    if (pixels) {
+        write(env->render_pipe_fd, pixels, width * height * 4);
+        RL_FREE(pixels);
+    }
+}
+
 void c_render(Drive *env) {
     if (env->client == NULL) {
         env->client = make_client(env);
@@ -3926,6 +3984,24 @@ void c_render(Drive *env) {
     handle_camera_controls(env->client);
     draw_scene(env, client, 0, 0, 0, 0);
 
+    // Draw predicted trajectories (set from Python)
+    if (env->predicted_traj_x != NULL && env->predicted_traj_len > 0) {
+        for (int i = 0; i < env->active_agent_count; i++) {
+            int agent_idx = env->active_agent_indices[i];
+            Agent *agent = &env->agents[agent_idx];
+            int tlen = env->predicted_traj_len;
+            Vector3 prev = {agent->sim_x, agent->sim_y, 0.6f};
+            for (int t = 0; t < tlen; t++) {
+                float tx = env->predicted_traj_x[i * tlen + t];
+                float ty = env->predicted_traj_y[i * tlen + t];
+                Vector3 curr = {tx, ty, 0.6f};
+                DrawLine3D(prev, curr, Fade(SKYBLUE, 0.8f));
+                DrawSphere(curr, 0.3f, Fade(SKYBLUE, 0.6f));
+                prev = curr;
+            }
+        }
+    }
+
     if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) {
         env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count;
     }
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index 5a2d5d29ec..1444495c53 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -706,6 +706,21 @@ def get_road_edge_polylines(self):
     def render(self):
         binding.vec_render(self.c_envs, 0)
 
+    def set_predicted_trajectories(self, action_trajectories):
+        """Roll out action trajectories through dynamics and set for rendering.
+
+        Args:
+            action_trajectories: numpy array of shape [num_agents, traj_len]
+                containing discrete action indices (e.g. classic: accel_idx * 13 + steer_idx).
+        """
+        num_agents, traj_len = action_trajectories.shape
+        binding.vec_set_trajectory(
+            self.c_envs,
+            0,
+            action_trajectories.flatten().astype(np.int32),
+            traj_len,
+        )
+
     def close(self):
         binding.vec_close(self.c_envs)
 
diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h
index 2c8ac3192d..cec3abd26e 100644
--- a/pufferlib/ocean/env_binding.h
+++ b/pufferlib/ocean/env_binding.h
@@ -541,6 +541,72 @@ static PyObject *vec_render(PyObject *self, PyObject *args) {
     Py_RETURN_NONE;
 }
 
+// Roll out action trajectories through dynamics and store for rendering.
+// action_traj: [num_agents * traj_len] array of discrete action indices.
+static PyObject *vec_set_trajectory(PyObject *self, PyObject *args) {
+    if (PyTuple_Size(args) != 4) {
+        PyErr_SetString(PyExc_TypeError, "vec_set_trajectory requires 4 args: vec_env, env_id, action_traj, traj_len");
+        return NULL;
+    }
+    VecEnv *vec = (VecEnv *)PyLong_AsVoidPtr(PyTuple_GetItem(args, 0));
+    int env_id = PyLong_AsLong(PyTuple_GetItem(args, 1));
+    PyArrayObject *action_arr = (PyArrayObject *)PyTuple_GetItem(args, 2);
+    int traj_len = PyLong_AsLong(PyTuple_GetItem(args, 3));
+
+    Env *env = vec->envs[env_id];
+    int num_agents = env->active_agent_count;
+    int *actions = (int *)PyArray_DATA(action_arr);
+    int num_steer = sizeof(STEERING_VALUES) / sizeof(STEERING_VALUES[0]);
+
+    // Allocate/reallocate trajectory buffers
+    free(env->predicted_traj_x);
+    free(env->predicted_traj_y);
+    int total = num_agents * traj_len;
+    env->predicted_traj_x = (float *)calloc(total, sizeof(float));
+    env->predicted_traj_y = (float *)calloc(total, sizeof(float));
+    env->predicted_traj_len = traj_len;
+
+    // Roll out each agent's trajectory using classic dynamics
+    for (int i = 0; i < num_agents; i++) {
+        int agent_idx = env->active_agent_indices[i];
+        Agent *agent = &env->agents[agent_idx];
+        float x = agent->sim_x;
+        float y = agent->sim_y;
+        float heading = agent->sim_heading;
+        float vx = agent->sim_vx;
+        float vy = agent->sim_vy;
+
+        for (int t = 0; t < traj_len; t++) {
+            int action_val = actions[i * traj_len + t];
+            int accel_idx = action_val / num_steer;
+            int steer_idx = action_val % num_steer;
+            float acceleration = ACCELERATION_VALUES[accel_idx];
+            float steering = STEERING_VALUES[steer_idx];
+
+            float speed_mag = sqrtf(vx * vx + vy * vy);
+            float v_dot = vx * cosf(heading) + vy * sinf(heading);
+            float signed_speed = copysignf(speed_mag, v_dot);
+            signed_speed += acceleration * env->dt;
+            signed_speed = clipSpeed(signed_speed);
+            float beta = tanhf(0.5f * tanf(steering));
+            float new_vx = signed_speed * cosf(heading + beta);
+            float new_vy = signed_speed * sinf(heading + beta);
+            float yaw_rate = (signed_speed * cosf(beta) * tanf(steering)) / agent->sim_length;
+
+            x += new_vx * env->dt;
+            y += new_vy * env->dt;
+            heading += yaw_rate * env->dt;
+            vx = new_vx;
+            vy = new_vy;
+
+            env->predicted_traj_x[i * traj_len + t] = x;
+            env->predicted_traj_y[i * traj_len + t] = y;
+        }
+    }
+
+    Py_RETURN_NONE;
+}
+
 static int assign_to_dict(PyObject *dict, char *key, float value) {
     PyObject *v = PyFloat_FromDouble(value);
     if (v == NULL) {
@@ -977,6 +1043,7 @@ static PyMethodDef methods[] = {
     {"vec_step", vec_step, METH_VARARGS, "Step the vector of environments"},
     {"vec_log", vec_log, METH_VARARGS, "Log the vector of environments"},
     {"vec_render", vec_render, METH_VARARGS, "Render the vector of environments"},
+    {"vec_set_trajectory", vec_set_trajectory, METH_VARARGS, "Set predicted trajectory for rendering"},
     {"vec_close", vec_close, METH_VARARGS, "Close the vector of environments"},
     {"shared", (PyCFunction)my_shared, METH_VARARGS | METH_KEYWORDS, "Shared state"},
     {"get_global_agent_state", get_global_agent_state, METH_VARARGS, "Get global agent state"},

From e70ddb116159052cbf5d25406f6122a5129f95f0 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Tue, 24 Mar 2026 02:18:54 -0400
Subject: [PATCH 3/5] Fix trajectory rendering: re-enter 3D mode after
 draw_scene

draw_scene() calls EndMode3D() internally, so trajectory drawing was
happening in 2D screen space instead of world coordinates. Now re-enters
BeginMode3D with the same camera after draw_scene returns.

Also:
- Top-down orthographic camera centered on map
- Mouse drag to pan, scroll to zoom
- Per-sub-env trajectory slicing via agent_offsets
- rlDisableDepthTest so trajectories render on top

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 pufferlib/ocean/drive/drive.h  | 141 +++++++++++++++------------------
 pufferlib/ocean/drive/drive.py |  24 +++---
 2 files changed, 80 insertions(+), 85 deletions(-)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index f984d298dc..de85e4a177 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -3975,103 +3975,92 @@ void write_render_frame(Drive *env, int width, int height) {
 void c_render(Drive *env) {
     if (env->client == NULL) {
         env->client = make_client(env);
+        // Initialize pan offset to map center
+        float cx = (env->grid_map->top_left_x + env->grid_map->bottom_right_x) / 2.0f;
+        float cy = (env->grid_map->top_left_y + env->grid_map->bottom_right_y) / 2.0f;
+        env->client->camera_target = (Vector3){cx, cy, 0.0f};
     }
     Client *client = env->client;
     BeginDrawing();
-    Color road = (Color){35, 35, 37, 255};
-    ClearBackground(road);
-    BeginMode3D(client->camera);
-    handle_camera_controls(env->client);
-    draw_scene(env, client, 0, 0, 0, 0);
+    ClearBackground((Color){35, 35, 37, 255});
 
-    // Draw predicted trajectories (set from Python)
+    float map_height = env->grid_map->top_left_y - env->grid_map->bottom_right_y;
+
+    // Mouse wheel zoom
+    float wheel = GetMouseWheelMove();
+    if (wheel != 0) {
+        client->camera_zoom -= wheel * 0.1f;
+        if (client->camera_zoom < 0.05f)
+            client->camera_zoom = 0.05f;
+        if (client->camera_zoom > 3.0f)
+            client->camera_zoom = 3.0f;
+    }
+
+    // Mouse drag pan (right button or middle button)
+    if (IsMouseButtonDown(MOUSE_BUTTON_LEFT)) {
+        Vector2 delta = GetMouseDelta();
+        // Scale mouse pixels to world units based on current zoom
+        float scale = (map_height * 1.1f * client->camera_zoom) / client->height;
+        client->camera_target.x -= delta.x * scale;
+        client->camera_target.y += delta.y * scale;
+    }
+
+    // Build top-down camera from pan/zoom state
+    Camera3D topdown_cam = {0};
+    topdown_cam.position = (Vector3){client->camera_target.x, client->camera_target.y, 500.0f};
+    topdown_cam.target = client->camera_target;
+    topdown_cam.up = (Vector3){0.0f, -1.0f, 0.0f};
+    topdown_cam.fovy = map_height * 1.1f * client->camera_zoom;
+    topdown_cam.projection = CAMERA_ORTHOGRAPHIC;
+
+    BeginMode3D(topdown_cam);
+
+    // Draw scene (mode=1 for top-down style) — NOTE: draw_scene calls EndMode3D() internally
+    draw_scene(env, client, 1, 0, 0, 0);
+
+    // Re-enter 3D mode for trajectory drawing (draw_scene ended it)
+    BeginMode3D(topdown_cam);
+    rlDisableDepthTest();
+    // First: draw GREEN circles at every agent position to verify coordinate alignment
+    for (int i = 0; i < env->active_agent_count; i++) {
+        int agent_idx = env->active_agent_indices[i];
+        Agent *agent = &env->agents[agent_idx];
+        DrawCircle3D((Vector3){agent->sim_x, agent->sim_y, agent->sim_z + 0.1f},
+                     5.0f, (Vector3){0, 0, 1}, 0.0f, GREEN);
+    }
+    // Then: draw trajectory lines and points
     if (env->predicted_traj_x != NULL && env->predicted_traj_len > 0) {
         for (int i = 0; i < env->active_agent_count; i++) {
             int agent_idx = env->active_agent_indices[i];
             Agent *agent = &env->agents[agent_idx];
             int tlen = env->predicted_traj_len;
-            Vector3 prev = {agent->sim_x, agent->sim_y, 0.6f};
+            float z = agent->sim_z + 0.1f;
+
+            Vector3 prev = {agent->sim_x, agent->sim_y, z};
             for (int t = 0; t < tlen; t++) {
                 float tx = env->predicted_traj_x[i * tlen + t];
                 float ty = env->predicted_traj_y[i * tlen + t];
-                Vector3 curr = {tx, ty, 0.6f};
-                DrawLine3D(prev, curr, Fade(SKYBLUE, 0.8f));
-                DrawSphere(curr, 0.3f, Fade(SKYBLUE, 0.6f));
+                Vector3 curr = {tx, ty, z};
+                rlSetLineWidth(3.0f);
+                DrawLine3D(prev, curr, RED);
+                DrawCircle3D(curr, 1.5f, (Vector3){0, 0, 1}, 0.0f, RED);
                 prev = curr;
             }
         }
     }
+    rlEnableDepthTest();
+    EndMode3D();
 
-    if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) {
-        env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count;
-    }
-
-    // Draw debug info
-    DrawText(TextFormat("Camera Position: (%.2f, %.2f, %.2f)", client->camera.position.x, client->camera.position.y,
-                        client->camera.position.z),
+    // 2D HUD overlay
+    DrawText(TextFormat("Timestep: %d  Agents: %d  Zoom: %.1fx", env->timestep, env->active_agent_count,
+                        1.0f / client->camera_zoom),
              10, 10, 20, PUFF_WHITE);
-    DrawText(TextFormat("Camera Target: (%.2f, %.2f, %.2f)", client->camera.target.x, client->camera.target.y,
-                        client->camera.target.z),
-             10, 30, 20, PUFF_WHITE);
-    DrawText(TextFormat("Timestep: %d", env->timestep), 10, 50, 20, PUFF_WHITE);
-
-    int human_idx = env->active_agent_indices[env->human_agent_idx];
-    DrawText(TextFormat("Controlling Agent: %d", env->human_agent_idx), 10, 70, 20, PUFF_WHITE);
-    DrawText(TextFormat("Agent Index: %d", human_idx), 10, 90, 20, PUFF_WHITE);
-
-    // Display current action values - yellow when controlling, white otherwise
-    Color action_color = IsKeyDown(KEY_LEFT_SHIFT) ? YELLOW : PUFF_WHITE;
-
-    if (env->action_type == 0) { // discrete
-        int *action_array = (int *)env->actions;
-        int action_val = action_array[env->human_agent_idx];
+    DrawText("Scroll=zoom, Drag=pan, TAB=cycle agent", 10, client->height - 25, 18, PUFF_WHITE);
 
-        if (env->dynamics_model == CLASSIC) {
-            int num_steer = 13;
-            int accel_idx = action_val / num_steer;
-            int steer_idx = action_val % num_steer;
-            float accel_value = ACCELERATION_VALUES[accel_idx];
-            float steer_value = STEERING_VALUES[steer_idx];
-
-            DrawText(TextFormat("Acceleration: %.2f m/s^2", accel_value), 10, 110, 20, action_color);
-            DrawText(TextFormat("Steering: %.3f", steer_value), 10, 130, 20, action_color);
-        } else if (env->dynamics_model == JERK) {
-            int num_lat = 3;
-            int jerk_long_idx = action_val / num_lat;
-            int jerk_lat_idx = action_val % num_lat;
-            float jerk_long_value = JERK_LONG[jerk_long_idx];
-            float jerk_lat_value = JERK_LAT[jerk_lat_idx];
-
-            DrawText(TextFormat("Longitudinal Jerk: %.2f m/s^3", jerk_long_value), 10, 110, 20, action_color);
-            DrawText(TextFormat("Lateral Jerk: %.2f m/s^3", jerk_lat_value), 10, 130, 20, action_color);
-        }
-    } else { // continuous
-        float (*action_array_f)[2] = (float (*)[2])env->actions;
-        DrawText(TextFormat("Acceleration: %.2f", action_array_f[env->human_agent_idx][0]), 10, 110, 20, action_color);
-        DrawText(TextFormat("Steering: %.2f", action_array_f[env->human_agent_idx][1]), 10, 130, 20, action_color);
-    }
-
-    // Show key press status
-    int status_y = 150;
-    if (IsKeyDown(KEY_LEFT_SHIFT)) {
-        DrawText("[shift pressed]", 10, status_y, 20, YELLOW);
-        status_y += 20;
-    }
-    if (IsKeyDown(KEY_SPACE)) {
-        DrawText("[space pressed]", 10, status_y, 20, YELLOW);
-        status_y += 20;
-    }
-    if (IsKeyDown(KEY_LEFT_CONTROL)) {
-        DrawText("[ctrl pressed]", 10, status_y, 20, YELLOW);
-        status_y += 20;
+    if (IsKeyPressed(KEY_TAB) && env->active_agent_count > 0) {
+        env->human_agent_idx = (env->human_agent_idx + 1) % env->active_agent_count;
     }
 
-    // Controls help
-    DrawText("Controls: SHIFT + W/S - Accelerate/Brake, SHIFT + A/D - Steer, TAB - Switch Agent", 10,
-             client->height - 30, 20, PUFF_WHITE);
-
-    DrawText(TextFormat("Grid Rows: %d", env->grid_map->grid_rows), 10, status_y, 20, PUFF_WHITE);
-    DrawText(TextFormat("Grid Cols: %d", env->grid_map->grid_cols), 10, status_y + 20, 20, PUFF_WHITE);
     EndDrawing();
 }
 
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index 1444495c53..7a9d9531f9 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -703,8 +703,8 @@ def get_road_edge_polylines(self):
 
         return polylines
 
-    def render(self):
-        binding.vec_render(self.c_envs, 0)
+    def render(self, env_id=0):
+        binding.vec_render(self.c_envs, env_id)
 
     def set_predicted_trajectories(self, action_trajectories):
         """Roll out action trajectories through dynamics and set for rendering.
@@ -712,14 +712,20 @@ def set_predicted_trajectories(self, action_trajectories):
         Args:
             action_trajectories: numpy array of shape [num_agents, traj_len]
                 containing discrete action indices (e.g. classic: accel_idx * 13 + steer_idx).
+                num_agents is the total across all sub-envs.
         """
-        num_agents, traj_len = action_trajectories.shape
-        binding.vec_set_trajectory(
-            self.c_envs,
-            0,
-            action_trajectories.flatten().astype(np.int32),
-            traj_len,
-        )
+        _, traj_len = action_trajectories.shape
+        # Set trajectories per sub-env using agent_offsets to slice correctly
+        for i in range(self.num_envs):
+            start = self.agent_offsets[i]
+            end = self.agent_offsets[i + 1]
+            sub_actions = action_trajectories[start:end]
+            binding.vec_set_trajectory(
+                self.c_envs,
+                i,
+                sub_actions.flatten().astype(np.int32),
+                traj_len,
+            )
 
     def close(self):
         binding.vec_close(self.c_envs)

From 2f4e23b6f36b06031ecc7c1548a0fffe5b2f59dc Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Tue, 24 Mar 2026 02:40:21 -0400
Subject: [PATCH 4/5] Address PR review: remove debug circles, fix memory leak,
 fix compat

- Remove green debug circles from c_render (leftover instrumentation)
- Fix comment: mouse drag uses left button, not right
- Free predicted_traj_x/y in c_close (memory leak)
- Fix forward() call sites in drive.c to use new 4-arg signature
- Remove double EndMode3D in visualize.c renderTopDownView

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 pufferlib/ocean/drive/drive.c     |  4 ++--
 pufferlib/ocean/drive/drive.h     | 12 +++---------
 pufferlib/ocean/drive/visualize.c |  3 +--
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c
index 82c4b94a75..19087227b8 100644
--- a/pufferlib/ocean/drive/drive.c
+++ b/pufferlib/ocean/drive/drive.c
@@ -23,7 +23,7 @@ void test_drivenet() {
     Weights *weights = load_weights("puffer_drive_weights.bin");
     DriveNet *net = init_drivenet(weights, num_agents, CLASSIC, 0);
 
-    forward(net, observations, actions);
+    forward(net, NULL, observations, actions);
     for (int i = 0; i < num_agents * num_actions; i++) {
         printf("idx: %d, action: %d, logits:", i, actions[i]);
         for (int j = 0; j < num_actions; j++) {
@@ -112,7 +112,7 @@ void demo() {
         int *actions = (int *)env.actions; // Single integer per agent
 
         if (!IsKeyDown(KEY_LEFT_SHIFT)) {
-            forward(net, env.observations, actions);
+            forward(net, &env, env.observations, actions);
         } else {
             if (env.dynamics_model == CLASSIC) {
                 // Classic dynamics: acceleration and steering
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index de85e4a177..a67fbf33bb 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -2113,6 +2113,8 @@ void c_close(Drive *env) {
     free(env->expert_static_agent_indices);
     free(env->tracks_to_predict_indices);
     free(env->ini_file);
+    free(env->predicted_traj_x);
+    free(env->predicted_traj_y);
 }
 
 void allocate(Drive *env) {
@@ -3996,7 +3998,7 @@ void c_render(Drive *env) {
             client->camera_zoom = 3.0f;
     }
 
-    // Mouse drag pan (right button or middle button)
+    // Mouse drag pan (left button)
     if (IsMouseButtonDown(MOUSE_BUTTON_LEFT)) {
         Vector2 delta = GetMouseDelta();
         // Scale mouse pixels to world units based on current zoom
@@ -4021,14 +4023,6 @@ void c_render(Drive *env) {
     // Re-enter 3D mode for trajectory drawing (draw_scene ended it)
     BeginMode3D(topdown_cam);
     rlDisableDepthTest();
-    // First: draw GREEN circles at every agent position to verify coordinate alignment
-    for (int i = 0; i < env->active_agent_count; i++) {
-        int agent_idx = env->active_agent_indices[i];
-        Agent *agent = &env->agents[agent_idx];
-        DrawCircle3D((Vector3){agent->sim_x, agent->sim_y, agent->sim_z + 0.1f},
-                     5.0f, (Vector3){0, 0, 1}, 0.0f, GREEN);
-    }
-    // Then: draw trajectory lines and points
     if (env->predicted_traj_x != NULL && env->predicted_traj_len > 0) {
         for (int i = 0; i < env->active_agent_count; i++) {
             int agent_idx = env->active_agent_indices[i];
diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c
index 8830579bbc..5c47926473 100644
--- a/pufferlib/ocean/drive/visualize.c
+++ b/pufferlib/ocean/drive/visualize.c
@@ -149,9 +149,8 @@ void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int
         }
     }
 
-    // Draw scene
+    // Draw scene (note: draw_scene calls EndMode3D internally)
     draw_scene(env, client, 1, obs, lasers, show_grid);
-    EndMode3D();
     EndDrawing();
 }
 

From 648181e22d57db25a1de2fa1eae9085583e232af Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <vinitsky.eugene@gmail.com>
Date: Tue, 24 Mar 2026 23:12:27 -0400
Subject: [PATCH 5/5] Add trajectory eval support: augment obs with past
 actions, render trajectories

- Eval loop now handles trajectory policies: concatenates past actions buffer
  to observations, uses sample_logits_action_sequence, and calls
  set_predicted_trajectories for visualization
- Default actions_trajectory_length changed from 80 to 20 in torch.py

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 pufferlib/ocean/torch.py |  2 +-
 pufferlib/pufferl.py     | 35 +++++++++++++++++++++++++++++------
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py
index a03fb9a92a..4bc41c9222 100644
--- a/pufferlib/ocean/torch.py
+++ b/pufferlib/ocean/torch.py
@@ -24,7 +24,7 @@ def __init__(self, env, input_size=128, hidden_size=128, **kwargs):
         self.max_road_objects = env.max_road_objects
         self.road_features = env.road_features
         self.road_features_after_onehot = env.road_features + 6  # 6 is the number of one-hot encoded categories
-        self.actions_trajectory_length = kwargs.get("actions_trajectory_length", 80)
+        self.actions_trajectory_length = kwargs.get("actions_trajectory_length", 20)
         # Determine ego dimension from environment's feature layout
         self.ego_dim = env.ego_features
 
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index ac949a8d52..2d669417e4 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -1468,6 +1468,13 @@ def eval(env_name, args=None, vecenv=None, policy=None):
         num_agents = vecenv.observation_space.shape[0]
         device = args["train"]["device"]
 
+        inner_policy = getattr(policy, 'policy', policy)
+        traj_len = getattr(inner_policy, 'actions_trajectory_length', 1)
+        # Past actions buffer for trajectory models (actions decomposed into accel + steer)
+        prev_actions_traj = torch.full(
+            (num_agents, traj_len, 2), -1.0, device=device
+        ) if traj_len > 1 else None
+
         state = {}
         if args["train"]["use_rnn"]:
             state = dict(
@@ -1494,15 +1501,31 @@ def eval(env_name, args=None, vecenv=None, policy=None):
                 # time.sleep(1/args['fps'])
 
             with torch.no_grad():
-                ob = torch.as_tensor(ob).to(device)
-                logits, value = policy.forward_eval(ob, state)
-                action, logprob, _ = pufferlib.pytorch.sample_logits(logits)
-                action = action.cpu().numpy().reshape(vecenv.action_space.shape)
+                ob_t = torch.as_tensor(ob).to(device)
+                if prev_actions_traj is not None:
+                    prev_traj_flat = prev_actions_traj.reshape(num_agents, -1)
+                    ob_aug = torch.cat([ob_t, prev_traj_flat], dim=-1)
+                else:
+                    ob_aug = ob_t
+
+                logits, value = policy.forward_eval(ob_aug, state)
+                if traj_len > 1:
+                    action, logprob, _ = pufferlib.pytorch.sample_logits_action_sequence(logits)
+                    action_np = action.cpu().numpy()
+                    if hasattr(driver, 'set_predicted_trajectories'):
+                        driver.set_predicted_trajectories(action_np)
+                    # Update past actions buffer: decompose action into accel + steer
+                    action_n2 = convert_single_action_integers_to_r2(action)
+                    prev_actions_traj[:] = action_n2
+                    step_action = action_np[:, 0:1].reshape(vecenv.action_space.shape)
+                else:
+                    action, logprob, _ = pufferlib.pytorch.sample_logits(logits)
+                    step_action = action.cpu().numpy().reshape(vecenv.action_space.shape)
 
             if isinstance(logits, torch.distributions.Normal):
-                action = np.clip(action, vecenv.action_space.low, vecenv.action_space.high)
+                step_action = np.clip(step_action, vecenv.action_space.low, vecenv.action_space.high)
 
-            ob = vecenv.step(action)[0]
+            ob = vecenv.step(step_action)[0]
 
             if len(frames) > 0 and len(frames) == args["save_frames"]:
                 import imageio