diff --git a/.gitignore b/.gitignore
index 66e4b066be..71497ea416 100644
--- a/.gitignore
+++ b/.gitignore
@@ -194,3 +194,4 @@ pufferlib/resources/drive/output*.mp4
 
 # Local TODO tracking
 TODO.md
+*.mp4
diff --git a/pufferlib/config/ocean/adaptive.ini b/pufferlib/config/ocean/adaptive.ini
index 598faf50ff..8abea752a9 100644
--- a/pufferlib/config/ocean/adaptive.ini
+++ b/pufferlib/config/ocean/adaptive.ini
@@ -2,18 +2,20 @@
 package = ocean
 env_name = puffer_adaptive_drive
 policy_name = Drive
-transformer_name = Transformer
- ; Changed from rnn_name
+rnn_name = Recurrent
 
 [vec]
 num_workers = 16
 num_envs = 16
-batch_size = 2
+batch_size = 1
 ; backend = Serial
 
 [policy]
-input_size = 128
-; Increased from 64 for richer representations
+input_size = 64
+hidden_size = 256
+
+[rnn]
+input_size = 256
 hidden_size = 256
 
 [transformer]
@@ -29,8 +31,8 @@ dropout = 0.0
 ; Dropout (keep at 0 for RL stability initially)
 
 [env]
-num_agents = 1512
-num_ego_agents = 756
+num_agents = 1024
+num_ego_agents = 512
 ; Options: discrete, continuous
 action_type = discrete
 ; Options: classic, jerk
@@ -120,7 +122,7 @@ minibatch_size = 36400
 ; 400 * 91
 max_minibatch_size = 36400
 minibatch_multiplier = 400
-policy_architecture = Transformer
+policy_architecture = Recurrent
 ; Matches scenario_length for buffer organization
 bptt_horizon = 32
 ; Keep for backward compatibility
@@ -131,7 +133,7 @@ clip_coef = 0.2
 ent_coef = 0.005
 gae_lambda = 0.95
 gamma = 0.98
-learning_rate = 0.0003
+learning_rate = 0.003
 ; Reduced from 0.003 (transformers often need lower LR)
 max_grad_norm = 1.0
 prio_alpha = 0.85
@@ -193,6 +195,8 @@ human_replay_num_agents = 32
 human_replay_num_rollouts = 100
 ; Number of maps to use for human replay evaluation
 human_replay_num_maps = 100
+; Number of maps to render for human replay (subset of eval maps)
+human_replay_render_num_maps = 3
 
 [sweep.train.learning_rate]
 distribution = log_normal
diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 7d5efd43be..c9a9eaf5c5 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -2,7 +2,7 @@
 package = ocean
 env_name = puffer_drive
 policy_name = Drive
-rnn_name = Transformer
+rnn_name = Recurrent
 
 [vec]
 num_workers = 16
@@ -14,9 +14,9 @@ batch_size = 2
 input_size = 64
 hidden_size = 256
 
-; [rnn]
-; input_size = 256
-; hidden_size = 256
+[rnn]
+input_size = 256
+hidden_size = 256
 
 [transformer]
 input_size = 256
@@ -112,15 +112,13 @@ discount_weight_ub = 0.80
 [train]
 seed=42
 total_timesteps = 2_000_000_000
-# learning_rate = 0.02
-# gamma = 0.985
 anneal_lr = True
-; Needs to be: num_agents * num_workers * BPTT horizon
+; Needs to be: num_agents * num_workers * context_window
 batch_size = auto
 minibatch_size = 32768
 max_minibatch_size = 32768
-; minibatch_size = 256
-; max_minibatch_size = 256
+minibatch_multiplier = 400
+policy_architecture = Recurrent
 bptt_horizon = 32
 adam_beta1 = 0.9
 adam_beta2 = 0.999
@@ -130,17 +128,15 @@ ent_coef = 0.005
 gae_lambda = 0.95
 gamma = 0.98
 learning_rate = 0.003
-max_grad_norm = 1
-prio_alpha = 0.8499999999999999
-prio_beta0 = 0.8499999999999999
+max_grad_norm = 1.0
+prio_alpha = 0.85
+prio_beta0 = 0.85
 update_epochs = 1
-vf_clip_coef = 0.1999999999999999
-vf_coef = 2
+vf_clip_coef = 0.2
+vf_coef = 2.0
 vtrace_c_clip = 1
 vtrace_rho_clip = 1
-checkpoint_interval = 100
-use_transformer = True
-context_window = 32
+checkpoint_interval = 10
 # Rendering options
 render = True
 render_interval = 100
@@ -184,8 +180,16 @@ wosac_sanity_check = False
 wosac_aggregate_results = True
 ; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
 human_replay_eval = False
-; Control only the self-driving car
-human_replay_control_mode = "control_sdc_only"
+; Control mode for human replay (control_vehicles with max_controlled_agents=1 controls one agent)
+human_replay_control_mode = "control_vehicles"
+; Number of agents in human replay evaluation environment
+human_replay_num_agents = 32
+; Number of rollouts for human replay evaluation
+human_replay_num_rollouts = 100
+; Number of maps to use for human replay evaluation
+human_replay_num_maps = 100
+; Number of maps to render for human replay (subset of eval maps)
+human_replay_render_num_maps = 3
 
 [sweep.train.learning_rate]
 distribution = log_normal
diff --git a/pufferlib/ocean/drive/drive.c b/pufferlib/ocean/drive/drive.c
index 9f6337051c..bae514790c 100644
--- a/pufferlib/ocean/drive/drive.c
+++ b/pufferlib/ocean/drive/drive.c
@@ -34,20 +34,18 @@ void test_drivenet() {
 void demo() {
 
     // Note: The settings below are hardcoded for demo purposes. Since the policy was
-    // trained with these exact settings, that changing them may lead to
-    // weird behavior.
+    // trained with these exact settings, changing them may lead to weird behavior.
     Drive env = {
         .human_agent_idx = 0,
-        .dynamics_model = conf.dynamics_model,
-        .reward_vehicle_collision = conf.reward_vehicle_collision,
-        .reward_offroad_collision = conf.reward_offroad_collision,
-        .reward_ade = conf.reward_ade,
-        .goal_radius = conf.goal_radius,
-        .dt = conf.dt,
+        .dynamics_model = CLASSIC,
+        .reward_vehicle_collision = -1.0f,
+        .reward_offroad_collision = -1.0f,
+        .goal_radius = 2.0f,
+        .dt = 0.1f,
         .map_name = "resources/drive/binaries/training/map_000.bin",
-        .init_steps = conf.init_steps,
-        .collision_behavior = conf.collision_behavior,
-        .offroad_behavior = conf.offroad_behavior,
+        .init_steps = 0,
+        .collision_behavior = 0,
+        .offroad_behavior = 0,
     };
     allocate(&env);
     c_reset(&env);
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index 420e726b5e..f4938cd58d 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -1084,7 +1084,8 @@ def test_performance(timeout=10, atn_cache=1024, num_agents=1024):
 if __name__ == "__main__":
     # test_performance()
     # Process the train dataset
-    process_all_maps(data_folder="/data/processed/training")
+    # process_all_maps(data_folder="/data/processed/training")
+    process_all_maps(data_folder="/data/nuplan_gpudrive/nuplan")
     # Process the validation/test dataset
     # process_all_maps(data_folder="data/processed/validation")
     # # Process the validation_interactive dataset
diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c
index 4820bd9be0..6fcc26a73e 100644
--- a/pufferlib/ocean/drive/visualize.c
+++ b/pufferlib/ocean/drive/visualize.c
@@ -65,11 +65,15 @@ void CloseVideo(VideoRecorder *recorder) {
     waitpid(recorder->pid, NULL, 0);
 }
 
-void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int lasers, int trajectories,
-                       int frame_count, float *path, int show_human_logs, int show_grid, int img_width, int img_height,
-                       int zoom_in) {
+void renderTopDownView(Drive *env, Client *client, float map_width, float map_height, int obs, int lasers,
+                       int trajectories, int frame_count, float *path, int show_human_logs, int show_grid,
+                       int img_width, int img_height, int zoom_in, int current_scenario, int total_scenarios) {
     BeginDrawing();
 
+    // Calculate map center
+    float center_x = (env->grid_map->top_left_x + env->grid_map->bottom_right_x) / 2.0f;
+    float center_y = (env->grid_map->top_left_y + env->grid_map->bottom_right_y) / 2.0f;
+
     // Top-down orthographic camera
     Camera3D camera = {0};
 
@@ -77,10 +81,11 @@ void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int
         camera.position = (Vector3){0.0f, 0.0f, 500.0f}; // above the scene
         camera.target = (Vector3){0.0f, 0.0f, 0.0f};     // look at origin
         camera.fovy = map_height;
-    } else { // Show full map
-        camera.position = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 500.0f};
-        camera.target = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 0.0f};
-        camera.fovy = 2 * map_height;
+    } else { // Show full map - center camera on map
+        camera.position = (Vector3){center_x, center_y, 500.0f};
+        camera.target = (Vector3){center_x, center_y, 0.0f};
+        // Use the larger dimension to ensure full map is visible
+        camera.fovy = (map_height > map_width) ? map_height * 1.1f : map_width * 1.1f;
     }
 
     camera.up = (Vector3){0.0f, -1.0f, 0.0f};
@@ -133,6 +138,12 @@ void renderTopDownView(Drive *env, Client *client, int map_height, int obs, int
     // Draw scene
     draw_scene(env, client, 1, obs, lasers, show_grid);
     EndMode3D();
+
+    // Draw scenario counter overlay (2D text on top of 3D scene)
+    char scenario_text[64];
+    snprintf(scenario_text, sizeof(scenario_text), "Scenario %d / %d", current_scenario, total_scenarios);
+    DrawText(scenario_text, 20, 20, 30, WHITE);
+
     EndDrawing();
 }
 
@@ -189,13 +200,114 @@ static int make_gif_from_frames(const char *pattern, int fps, const char *palett
     return 0;
 }
 
+// Transform observations from ego format to co-player format by inserting conditioning values
+// src_obs: Source observations (may include ego conditioning)
+// dst_obs: Destination buffer for co-player format (with co-player conditioning)
+// num_agents: Number of agents to transform
+// ego_base_dim: Base ego features (7 for CLASSIC, 10 for JERK)
+// co_use_rc/ec/dc: Co-player conditioning flags (determines which features to insert)
+void transform_obs_for_coplayer(float *src_obs, float *dst_obs, int num_agents, int ego_obs_size, int coplayer_obs_size,
+                                int ego_base_dim, int co_use_rc, int co_use_ec, int co_use_dc, float collision_lb,
+                                float collision_ub, float offroad_lb, float offroad_ub, float goal_lb, float goal_ub,
+                                float entropy_lb, float entropy_ub, float discount_lb, float discount_ub) {
+    // Fixed sizes for partner and road features (from drive.h constants)
+    int partner_features = (MAX_AGENTS - 1) * PARTNER_FEATURES;
+    int road_features = MAX_ROAD_SEGMENT_OBSERVATIONS * ROAD_FEATURES;
+    int partner_road_features = partner_features + road_features;
+
+    // Derive source conditioning size from ego_obs_size
+    // This handles cases where ego policy has conditioning (type != "none")
+    int src_conditioning = ego_obs_size - ego_base_dim - partner_road_features;
+
+    // Calculate destination conditioning size from flags
+    int dst_conditioning = (co_use_rc ? 3 : 0) + (co_use_ec ? 1 : 0) + (co_use_dc ? 1 : 0);
+
+    for (int i = 0; i < num_agents; i++) {
+        float *src = src_obs + i * ego_obs_size;
+        float *dst = dst_obs + i * coplayer_obs_size;
+
+        // Copy ego base features (without conditioning)
+        memcpy(dst, src, ego_base_dim * sizeof(float));
+
+        // Sample and insert conditioning values based on flags
+        // Order must match: reward (3), entropy (1), discount (1)
+        int cond_idx = ego_base_dim;
+        if (co_use_rc) {
+            // Reward conditioning (3 features: collision, offroad, goal)
+            dst[cond_idx++] = collision_lb + (float)rand() / RAND_MAX * (collision_ub - collision_lb);
+            dst[cond_idx++] = offroad_lb + (float)rand() / RAND_MAX * (offroad_ub - offroad_lb);
+            dst[cond_idx++] = goal_lb + (float)rand() / RAND_MAX * (goal_ub - goal_lb);
+        }
+        if (co_use_ec) {
+            // Entropy conditioning (1 feature)
+            dst[cond_idx++] = entropy_lb + (float)rand() / RAND_MAX * (entropy_ub - entropy_lb);
+        }
+        if (co_use_dc) {
+            // Discount conditioning (1 feature)
+            dst[cond_idx++] = discount_lb + (float)rand() / RAND_MAX * (discount_ub - discount_lb);
+        }
+
+        // Copy partner + road features, skipping over any source conditioning
+        memcpy(dst + ego_base_dim + dst_conditioning, src + ego_base_dim + src_conditioning,
+               partner_road_features * sizeof(float));
+    }
+}
+
+// Helper function for dual-policy forward pass
+// Runs ego policy on first num_ego_agents, co-player policy on the rest
+// Handles different observation sizes between ego and co-player policies
+void forward_population(DriveNet *ego_net, DriveNet *co_player_net, float *observations, int *actions,
+                        int num_ego_agents, int num_co_players, int ego_obs_size, int coplayer_obs_size,
+                        int ego_base_dim, int co_use_rc, int co_use_ec, int co_use_dc, float co_collision_lb,
+                        float co_collision_ub, float co_offroad_lb, float co_offroad_ub, float co_goal_lb,
+                        float co_goal_ub, float co_entropy_lb, float co_entropy_ub, float co_discount_lb,
+                        float co_discount_ub) {
+    if (co_player_net == NULL || num_co_players == 0) {
+        // Single policy mode - use ego net for all agents
+        forward(ego_net, observations, actions);
+        return;
+    }
+
+    // Allocate temporary buffers for ego observations/actions
+    float *ego_obs = (float *)malloc(num_ego_agents * ego_obs_size * sizeof(float));
+    int *ego_actions = (int *)malloc(num_ego_agents * sizeof(int));
+
+    // Allocate temporary buffers for co-player observations/actions
+    float *co_obs_raw = observations + num_ego_agents * ego_obs_size;
+    float *co_obs_transformed = (float *)malloc(num_co_players * coplayer_obs_size * sizeof(float));
+    int *co_actions = (int *)malloc(num_co_players * sizeof(int));
+
+    // Copy ego observations (already correct format)
+    memcpy(ego_obs, observations, num_ego_agents * ego_obs_size * sizeof(float));
+
+    // Transform co-player observations (add conditioning features)
+    transform_obs_for_coplayer(co_obs_raw, co_obs_transformed, num_co_players, ego_obs_size, coplayer_obs_size,
+                               ego_base_dim, co_use_rc, co_use_ec, co_use_dc, co_collision_lb, co_collision_ub,
+                               co_offroad_lb, co_offroad_ub, co_goal_lb, co_goal_ub, co_entropy_lb, co_entropy_ub,
+                               co_discount_lb, co_discount_ub);
+
+    // Run forward on each network
+    forward(ego_net, ego_obs, ego_actions);
+    forward(co_player_net, co_obs_transformed, co_actions);
+
+    // Combine actions back
+    memcpy(actions, ego_actions, num_ego_agents * sizeof(int));
+    memcpy(actions + num_ego_agents, co_actions, num_co_players * sizeof(int));
+
+    // Cleanup
+    free(ego_obs);
+    free(ego_actions);
+    free(co_obs_transformed);
+    free(co_actions);
+}
+
 int eval_gif(const char *map_name, const char *policy_name, int show_grid, int obs_only, int lasers,
              int show_human_logs, int frame_skip, const char *view_mode, const char *output_topdown,
-             const char *output_agent, int num_maps, int zoom_in) {
+             const char *output_agent, int num_maps, int zoom_in, const char *ini_file, int k_scenarios_cli,
+             int max_controlled_agents_cli, const char *co_player_policy_name) {
 
     // Parse configuration from INI file
     env_init_config conf = {0};
-    const char *ini_file = "pufferlib/config/ocean/drive.ini";
     if (ini_parse(ini_file, handler, &conf) < 0) {
         fprintf(stderr, "Error: Could not load %s. Cannot determine environment configuration.\n", ini_file);
         return -1;
@@ -269,7 +381,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
         .entropy_weight_ub = (conf.conditioning != NULL) ? conf.conditioning->entropy_weight_ub : 0.0f,
         .discount_weight_lb = (conf.conditioning != NULL) ? conf.conditioning->discount_weight_lb : 0.0f,
         .discount_weight_ub = (conf.conditioning != NULL) ? conf.conditioning->discount_weight_ub : 0.0f,
-        .max_controlled_agents = 32,
+        .max_controlled_agents = (max_controlled_agents_cli > 0) ? max_controlled_agents_cli : 32,
     };
 
     allocate(&env);
@@ -317,11 +429,92 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
     client->cyclist = LoadModel("resources/drive/cyclist.glb");
     client->pedestrian = LoadModel("resources/drive/pedestrian.glb");
 
+    // Determine number of ego agents vs co-players
+    int num_ego_agents = env.active_agent_count;
+    int num_co_players = 0;
+    DriveNet *co_player_net = NULL;
+
+    // Co-player conditioning flags (hoisted to outer scope for later use)
+    int co_use_rc = 0, co_use_ec = 0, co_use_dc = 0;
+
+    // Check if co-player policy is provided (either via CLI or INI)
+    const char *actual_co_player_policy = co_player_policy_name;
+    if (actual_co_player_policy == NULL && conf.co_player_enabled && strlen(conf.co_player_policy_path) > 0) {
+        actual_co_player_policy = conf.co_player_policy_path;
+    }
+
+    if (actual_co_player_policy != NULL) {
+        // Population play mode - split agents between ego and co-player
+        // Use num_ego_agents from config, or default to half
+        if (conf.num_ego_agents > 0 && conf.num_ego_agents < env.active_agent_count) {
+            num_ego_agents = conf.num_ego_agents;
+        } else {
+            num_ego_agents = env.active_agent_count / 2;
+        }
+        num_co_players = env.active_agent_count - num_ego_agents;
+
+        printf("Population play: %d ego agents, %d co-players\n", num_ego_agents, num_co_players);
+
+        // Load co-player policy
+        FILE *co_policy_file = fopen(actual_co_player_policy, "rb");
+        if (co_policy_file != NULL) {
+            fclose(co_policy_file);
+            Weights *co_weights = load_weights(actual_co_player_policy);
+
+            // Determine co-player conditioning from config
+            if (conf.co_player_conditioning != NULL) {
+                co_use_rc = (strcmp(conf.co_player_conditioning->type, "reward") == 0 ||
+                             strcmp(conf.co_player_conditioning->type, "all") == 0);
+                co_use_ec = (strcmp(conf.co_player_conditioning->type, "entropy") == 0 ||
+                             strcmp(conf.co_player_conditioning->type, "all") == 0);
+                co_use_dc = (strcmp(conf.co_player_conditioning->type, "discount") == 0 ||
+                             strcmp(conf.co_player_conditioning->type, "all") == 0);
+            }
+
+            co_player_net =
+                init_drivenet(co_weights, num_co_players, env.dynamics_model, co_use_rc, co_use_ec, co_use_dc);
+            printf("Co-player policy loaded with conditioning: rc=%d, ec=%d, dc=%d\n", co_use_rc, co_use_ec, co_use_dc);
+        } else {
+            printf("Warning: Could not load co-player policy from %s. Using main policy for all agents.\n",
+                   actual_co_player_policy);
+            num_ego_agents = env.active_agent_count;
+            num_co_players = 0;
+        }
+    }
+
+    // Extract co-player conditioning bounds from config
+    float co_collision_lb = 0, co_collision_ub = 0;
+    float co_offroad_lb = 0, co_offroad_ub = 0;
+    float co_goal_lb = 0, co_goal_ub = 0;
+    float co_entropy_lb = 0, co_entropy_ub = 0;
+    float co_discount_lb = 0, co_discount_ub = 0;
+
+    // Get conditioning dims directly from co_player_net to ensure consistency
+    int coplayer_num_conditioning = (co_player_net != NULL) ? co_player_net->conditioning_dims : 0;
+
+    if (conf.co_player_conditioning != NULL) {
+        co_collision_lb = conf.co_player_conditioning->reward_collision_weight_lb;
+        co_collision_ub = conf.co_player_conditioning->reward_collision_weight_ub;
+        co_offroad_lb = conf.co_player_conditioning->reward_offroad_weight_lb;
+        co_offroad_ub = conf.co_player_conditioning->reward_offroad_weight_ub;
+        co_goal_lb = conf.co_player_conditioning->reward_goal_weight_lb;
+        co_goal_ub = conf.co_player_conditioning->reward_goal_weight_ub;
+        co_entropy_lb = conf.co_player_conditioning->entropy_weight_lb;
+        co_entropy_ub = conf.co_player_conditioning->entropy_weight_ub;
+        co_discount_lb = conf.co_player_conditioning->discount_weight_lb;
+        co_discount_ub = conf.co_player_conditioning->discount_weight_ub;
+    }
+
+    // Load main (ego) policy
     Weights *weights = load_weights(policy_name);
     printf("Active agents in map: %d\n", env.active_agent_count);
-    DriveNet *net = init_drivenet(weights, env.active_agent_count, env.dynamics_model, use_rc, use_ec, use_dc);
+    DriveNet *net = init_drivenet(weights, num_ego_agents, env.dynamics_model, use_rc, use_ec, use_dc);
 
-    int frame_count = env.scenario_length > 0 ? env.scenario_length : TRAJECTORY_LENGTH_DEFAULT;
+    // Calculate frame count: k_scenarios * scenario_length for adaptive agents
+    int scenario_length = env.scenario_length > 0 ? env.scenario_length : TRAJECTORY_LENGTH_DEFAULT;
+    int k_scenarios = (k_scenarios_cli > 0) ? k_scenarios_cli : (conf.k_scenarios > 0 ? conf.k_scenarios : 1);
+    int frame_count = k_scenarios * scenario_length;
+    printf("Rendering %d scenarios x %d steps = %d total frames\n", k_scenarios, scenario_length, frame_count);
     char filename_topdown[256];
     char filename_agent[256];
 
@@ -373,16 +566,39 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
         }
     }
 
+    // Calculate observation sizes per agent
+    // ego_base_dim: 7 for CLASSIC dynamics, 10 for JERK dynamics
+    int ego_base_dim = (env.dynamics_model == 1) ? 10 : 7; // 1 = JERK
+
+    // Ego observation size (environment generates observations without conditioning for ego)
+    int ego_obs_size =
+        net->ego_dim + (MAX_AGENTS - 1) * PARTNER_FEATURES + MAX_ROAD_SEGMENT_OBSERVATIONS * ROAD_FEATURES;
+
+    // Co-player observation size (includes conditioning features)
+    int coplayer_obs_size = ego_obs_size;
+    if (co_player_net != NULL) {
+        coplayer_obs_size = co_player_net->ego_dim + (MAX_AGENTS - 1) * PARTNER_FEATURES +
+                            MAX_ROAD_SEGMENT_OBSERVATIONS * ROAD_FEATURES;
+    }
+
+    printf("Observation sizes: ego=%d, coplayer=%d, ego_base_dim=%d, coplayer_conditioning=%d\n", ego_obs_size,
+           coplayer_obs_size, ego_base_dim, coplayer_num_conditioning);
+
     if (render_topdown) {
         printf("Recording topdown view...\n");
         for (int i = 0; i < frame_count; i++) {
+            // Calculate current scenario (1-indexed for display)
+            int current_scenario = (i / scenario_length) + 1;
             if (i % frame_skip == 0) {
-                renderTopDownView(&env, client, map_height, 0, 0, 0, frame_count, NULL, show_human_logs, show_grid,
-                                  img_width, img_height, zoom_in);
+                renderTopDownView(&env, client, map_width, map_height, 0, 0, 0, frame_count, NULL, show_human_logs,
+                                  show_grid, img_width, img_height, zoom_in, current_scenario, k_scenarios);
                 WriteFrame(&topdown_recorder, img_width, img_height);
                 rendered_frames++;
             }
-            forward(net, env.observations, (int *)env.actions);
+            forward_population(net, co_player_net, env.observations, (int *)env.actions, num_ego_agents, num_co_players,
+                               ego_obs_size, coplayer_obs_size, ego_base_dim, co_use_rc, co_use_ec, co_use_dc,
+                               co_collision_lb, co_collision_ub, co_offroad_lb, co_offroad_ub, co_goal_lb, co_goal_ub,
+                               co_entropy_lb, co_entropy_ub, co_discount_lb, co_discount_ub);
             c_step(&env);
         }
     }
@@ -400,7 +616,10 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
                 WriteFrame(&agent_recorder, img_width, img_height);
                 rendered_frames++;
             }
-            forward(net, env.observations, (int *)env.actions);
+            forward_population(net, co_player_net, env.observations, (int *)env.actions, num_ego_agents, num_co_players,
+                               ego_obs_size, coplayer_obs_size, ego_base_dim, co_use_rc, co_use_ec, co_use_dc,
+                               co_collision_lb, co_collision_ub, co_offroad_lb, co_offroad_ub, co_goal_lb, co_goal_ub,
+                               co_entropy_lb, co_entropy_ub, co_discount_lb, co_discount_ub);
             c_step(&env);
         }
     }
@@ -424,6 +643,9 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
     free_allocated(&env);
     free_drivenet(net);
     free(weights);
+    if (co_player_net != NULL) {
+        free_drivenet(co_player_net);
+    }
     return 0;
 }
 
@@ -440,10 +662,14 @@ int main(int argc, char *argv[]) {
     // File paths and num_maps (not in [env] section)
     const char *map_name = NULL;
     const char *policy_name = "resources/drive/puffer_drive_weights.bin";
+    const char *co_player_policy_name = NULL;
     const char *output_topdown = NULL;
     const char *output_agent = NULL;
+    const char *ini_file = "pufferlib/config/ocean/drive.ini";
     int num_maps = 1;
     int scenario_length_cli = -1;
+    int k_scenarios_cli = -1;
+    int max_controlled_agents_cli = -1;
     int use_rc = 0;
     int use_ec = 0;
     int use_dc = 0;
@@ -515,10 +741,43 @@ int main(int argc, char *argv[]) {
                 num_maps = atoi(argv[i + 1]);
                 i++;
             }
+        } else if (strcmp(argv[i], "--ini-file") == 0) {
+            if (i + 1 < argc) {
+                ini_file = argv[i + 1];
+                i++;
+            } else {
+                fprintf(stderr, "Error: --ini-file option requires a file path\n");
+                return 1;
+            }
+        } else if (strcmp(argv[i], "--k-scenarios") == 0) {
+            if (i + 1 < argc) {
+                k_scenarios_cli = atoi(argv[i + 1]);
+                i++;
+            } else {
+                fprintf(stderr, "Error: --k-scenarios option requires a number\n");
+                return 1;
+            }
+        } else if (strcmp(argv[i], "--max-controlled-agents") == 0) {
+            if (i + 1 < argc) {
+                max_controlled_agents_cli = atoi(argv[i + 1]);
+                i++;
+            } else {
+                fprintf(stderr, "Error: --max-controlled-agents option requires a number\n");
+                return 1;
+            }
+        } else if (strcmp(argv[i], "--co-player-policy") == 0) {
+            if (i + 1 < argc) {
+                co_player_policy_name = argv[i + 1];
+                i++;
+            } else {
+                fprintf(stderr, "Error: --co-player-policy option requires a file path\n");
+                return 1;
+            }
         }
     }
 
     eval_gif(map_name, policy_name, show_grid, obs_only, lasers, show_human_logs, frame_skip, view_mode, output_topdown,
-             output_agent, num_maps, zoom_in);
+             output_agent, num_maps, zoom_in, ini_file, k_scenarios_cli, max_controlled_agents_cli,
+             co_player_policy_name);
     return 0;
 }
diff --git a/pufferlib/ocean/env_config.h b/pufferlib/ocean/env_config.h
index 4f26a8b32e..35f2806806 100644
--- a/pufferlib/ocean/env_config.h
+++ b/pufferlib/ocean/env_config.h
@@ -38,12 +38,18 @@ typedef struct {
     int goal_behavior;
     float goal_target_distance;
     int scenario_length;
+    int k_scenarios;
     int termination_mode;
     int init_steps;
     int init_mode;
     int control_mode;
     char map_dir[256];
     conditioning_config *conditioning;
+    // Population play settings
+    int co_player_enabled;
+    int num_ego_agents;
+    char co_player_policy_path[256];
+    conditioning_config *co_player_conditioning;
 } env_init_config;
 
 // INI file parser handler - parses all environment configuration from drive.ini
@@ -97,6 +103,8 @@ static int handler(void *config, const char *section, const char *name, const ch
         env_config->dt = atof(value);
     } else if (MATCH("env", "scenario_length")) {
         env_config->scenario_length = atoi(value);
+    } else if (MATCH("env", "k_scenarios")) {
+        env_config->k_scenarios = atoi(value);
     } else if (MATCH("env", "termination_mode")) {
         env_config->termination_mode = atoi(value);
     } else if (MATCH("env", "init_steps")) {
@@ -175,6 +183,85 @@ static int handler(void *config, const char *section, const char *name, const ch
         }
         env_config->conditioning->discount_weight_ub = atof(value);
     }
+    // Population play settings
+    else if (MATCH("env", "co_player_enabled")) {
+        if (strcmp(value, "True") == 0 || strcmp(value, "true") == 0 || strcmp(value, "1") == 0) {
+            env_config->co_player_enabled = 1;
+        } else {
+            env_config->co_player_enabled = 0;
+        }
+    } else if (MATCH("env", "num_ego_agents")) {
+        env_config->num_ego_agents = atoi(value);
+    }
+    // Co-player policy settings
+    else if (MATCH("env.co_player_policy", "policy_path")) {
+        if (sscanf(value, "\"%255[^\"]\"", env_config->co_player_policy_path) != 1) {
+            strncpy(env_config->co_player_policy_path, value, sizeof(env_config->co_player_policy_path) - 1);
+            env_config->co_player_policy_path[sizeof(env_config->co_player_policy_path) - 1] = '\0';
+        }
+    } else if (MATCH("env.co_player_policy.conditioning", "type")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        if (value[0] == '"') {
+            size_t len = strlen(value) - 2;
+            env_config->co_player_conditioning->type = (char *)malloc(len + 1);
+            strncpy(env_config->co_player_conditioning->type, value + 1, len);
+            env_config->co_player_conditioning->type[len] = '\0';
+        } else {
+            env_config->co_player_conditioning->type = strdup(value);
+        }
+    } else if (MATCH("env.co_player_policy.conditioning", "collision_weight_lb")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        env_config->co_player_conditioning->reward_collision_weight_lb = atof(value);
+    } else if (MATCH("env.co_player_policy.conditioning", "collision_weight_ub")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        env_config->co_player_conditioning->reward_collision_weight_ub = atof(value);
+    } else if (MATCH("env.co_player_policy.conditioning", "offroad_weight_lb")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        env_config->co_player_conditioning->reward_offroad_weight_lb = atof(value);
+    } else if (MATCH("env.co_player_policy.conditioning", "offroad_weight_ub")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        env_config->co_player_conditioning->reward_offroad_weight_ub = atof(value);
+    } else if (MATCH("env.co_player_policy.conditioning", "goal_weight_lb")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        env_config->co_player_conditioning->reward_goal_weight_lb = atof(value);
+    } else if (MATCH("env.co_player_policy.conditioning", "goal_weight_ub")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        env_config->co_player_conditioning->reward_goal_weight_ub = atof(value);
+    } else if (MATCH("env.co_player_policy.conditioning", "entropy_weight_lb")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        env_config->co_player_conditioning->entropy_weight_lb = atof(value);
+    } else if (MATCH("env.co_player_policy.conditioning", "entropy_weight_ub")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        env_config->co_player_conditioning->entropy_weight_ub = atof(value);
+    } else if (MATCH("env.co_player_policy.conditioning", "discount_weight_lb")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        env_config->co_player_conditioning->discount_weight_lb = atof(value);
+    } else if (MATCH("env.co_player_policy.conditioning", "discount_weight_ub")) {
+        if (env_config->co_player_conditioning == NULL) {
+            env_config->co_player_conditioning = (conditioning_config *)malloc(sizeof(conditioning_config));
+        }
+        env_config->co_player_conditioning->discount_weight_ub = atof(value);
+    }
 
     else {
         return 0; // Unknown section/name, indicate failure to handle
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index 270205ed70..758a269dcd 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -789,6 +789,38 @@ def train(self):
         ):
             pufferlib.utils.run_human_replay_eval_in_subprocess(self.config, self.logger, self.global_step)
 
+        # Eval rendering for adaptive agents (ego vs human logs)
+        if self.adaptive_driving_agent and self.config["eval"].get("human_replay_eval", False):
+            if self.epoch % self.config["eval"]["eval_interval"] == 0 or done_training:
+                model_dir = os.path.join(self.config["data_dir"], f"{self.config['env']}_{self.logger.run_id}")
+                model_files = glob.glob(os.path.join(model_dir, "model_*.pt"))
+
+                if model_files:
+                    latest_cpt = max(model_files, key=os.path.getctime)
+                    bin_path = f"{model_dir}.bin"
+
+                    try:
+                        export_args = {"env_name": self.config["env"], "load_model_path": latest_cpt, **self.config}
+                        export(
+                            args=export_args,
+                            env_name=self.config["env"],
+                            vecenv=self.vecenv,
+                            policy=self.uncompiled_policy,
+                            path=bin_path,
+                            silent=True,
+                        )
+                        eval_video_dir = os.path.join(model_dir, "eval_videos")
+                        pufferlib.utils.render_human_replay_videos(
+                            config=self.config,
+                            policy_bin_path=bin_path,
+                            output_dir=eval_video_dir,
+                            num_maps=self.config["eval"].get("human_replay_render_num_maps", 3),
+                            logger=self.logger,
+                            global_step=self.global_step,
+                        )
+                    except Exception as e:
+                        print(f"Failed to render eval videos: {e}")
+
     def mean_and_log(self):
         config = self.config
         for k in list(self.stats.keys()):
@@ -1673,17 +1705,16 @@ def load_policy(args, vecenv, env_name=""):
     policy_cls = getattr(env_module.torch, args["policy_name"])
     policy = policy_cls(vecenv.driver_env, **args["policy"])
 
-    # Handle both RNN and Transformer wrappers
+    # Handle both RNN and Transformer wrappers via rnn_name
     rnn_name = args.get("rnn_name")
-    transformer_name = args.get("transformer_name")
 
-    if transformer_name is not None:
+    if rnn_name == "Transformer":
         # Load transformer wrapper
-        transformer_cls = getattr(env_module.torch, transformer_name)
+        transformer_cls = getattr(env_module.torch, rnn_name)
         args["transformer"]["context_length"] = vecenv.driver_env.episode_length
         policy = transformer_cls(vecenv.driver_env, policy, **args["transformer"])
     elif rnn_name is not None:
-        # Load RNN wrapper
+        # Load RNN wrapper (Recurrent)
         rnn_cls = getattr(env_module.torch, rnn_name)
         policy = rnn_cls(vecenv.driver_env, policy, **args["rnn"])
 
diff --git a/pufferlib/resources/drive/puffer_adaptive_drive_co_player.bin b/pufferlib/resources/drive/puffer_adaptive_drive_co_player.bin
new file mode 100644
index 0000000000..cc7567a82e
Binary files /dev/null and b/pufferlib/resources/drive/puffer_adaptive_drive_co_player.bin differ
diff --git a/pufferlib/utils.py b/pufferlib/utils.py
index 8ef3cb4034..2bb240e485 100644
--- a/pufferlib/utils.py
+++ b/pufferlib/utils.py
@@ -38,13 +38,19 @@ def run_human_replay_eval_in_subprocess(config, logger, global_step):
 
         if is_adaptive:
             # Use evaluate_human_logs.py for adaptive agents with human replay
+            # Get policy_architecture from train config section
+            train_config = config.get("train", {})
+            policy_architecture = train_config.get(
+                "policy_architecture", config.get("policy_architecture", "Recurrent")
+            )
+
             cmd = [
                 sys.executable,
                 "evaluate_human_logs.py",
                 "--policy-path",
                 latest_cpt,
                 "--policy-architecture",
-                config.get("policy_architecture", "Transformer"),
+                policy_architecture,
                 "--adaptive-driving-agent",
                 "1",
                 "--k-scenarios",
@@ -302,9 +308,37 @@ def render_videos(config, vecenv, logger, epoch, global_step, bin_path):
         env_vars = os.environ.copy()
         env_vars["ASAN_OPTIONS"] = "exitcode=0"
 
+        # Detect if this is an adaptive agent
+        env_name = config.get("env", "")
+        is_adaptive = "adaptive" in env_name
+
+        # Select correct INI file based on agent type
+        if is_adaptive:
+            ini_file = "pufferlib/config/ocean/adaptive.ini"
+        else:
+            ini_file = "pufferlib/config/ocean/drive.ini"
+
         # Base command with only visualization flags (env config comes from INI)
         base_cmd = ["xvfb-run", "-a", "-s", "-screen 0 1280x720x24", "./visualize"]
 
+        # Pass the correct INI file
+        base_cmd.extend(["--ini-file", ini_file])
+
+        # Get env config for k_scenarios and co-player settings
+        env_config = config.get("env_config", {})
+
+        # Pass k_scenarios for adaptive agents (longer videos)
+        k_scenarios = env_config.get("k_scenarios", 1)
+        if k_scenarios > 1:
+            base_cmd.extend(["--k-scenarios", str(k_scenarios)])
+
+        # Pass co-player policy if population play is enabled
+        co_player_enabled = env_config.get("co_player_enabled", False)
+        if co_player_enabled:
+            co_player_path = f"resources/drive/{config['env']}_co_player.bin"
+            if os.path.exists(co_player_path):
+                base_cmd.extend(["--co-player-policy", co_player_path])
+
         # Visualization config flags only
         if config.get("show_grid", False):
             base_cmd.append("--show-grid")
@@ -407,3 +441,122 @@ def render_videos(config, vecenv, logger, epoch, global_step, bin_path):
         # Clean up bin weights file
         if os.path.exists(expected_weights_path):
             os.remove(expected_weights_path)
+
+
+def render_human_replay_videos(config, policy_bin_path, output_dir, num_maps=5, logger=None, global_step=0):
+    """
+    Render videos for human replay evaluation (1 ego agent + human log trajectories).
+
+    In this mode, only one agent is policy-controlled (the ego), while all other agents
+    follow their logged human trajectories (rendered in GOLD).
+
+    Args:
+        config: Configuration dictionary with env settings
+        policy_bin_path: Path to the policy weights .bin file
+        output_dir: Directory to save output videos
+        num_maps: Number of maps to render
+        logger: Optional logger with wandb attribute for logging
+        global_step: Current training step for wandb logging
+
+    Returns:
+        List of output video paths
+    """
+    if not os.path.exists(policy_bin_path):
+        print(f"Policy weights file does not exist: {policy_bin_path}")
+        return []
+
+    try:
+        os.makedirs(output_dir, exist_ok=True)
+
+        # Copy the binary weights to the expected location
+        expected_weights_path = "resources/drive/puffer_drive_weights.bin"
+        os.makedirs(os.path.dirname(expected_weights_path), exist_ok=True)
+        shutil.copy2(policy_bin_path, expected_weights_path)
+
+        env_vars = os.environ.copy()
+        env_vars["ASAN_OPTIONS"] = "exitcode=0"
+
+        # Get env config
+        env_config = config.get("env_config", config.get("env", {}))
+        k_scenarios = env_config.get("k_scenarios", 2)
+
+        # Build command for human replay rendering
+        cmd = [
+            "xvfb-run",
+            "-a",
+            "-s",
+            "-screen 0 1280x720x24",
+            "./visualize",
+            "--ini-file",
+            "pufferlib/config/ocean/adaptive.ini",
+            "--policy-name",
+            expected_weights_path,
+            "--max-controlled-agents",
+            "1",  # Only 1 ego agent
+            "--k-scenarios",
+            str(k_scenarios),
+            "--num-maps",
+            str(num_maps),
+            "--log-trajectories",  # Show human trajectory logs
+            "--zoom-in",
+            "--view",
+            "both",
+            "--output-topdown",
+            "resources/drive/output_topdown.mp4",
+            "--output-agent",
+            "resources/drive/output_agent.mp4",
+        ]
+
+        output_videos = []
+        videos_to_log_world = []
+        videos_to_log_agent = []
+
+        for map_idx in range(num_maps):
+            result = subprocess.run(cmd, cwd=os.getcwd(), capture_output=True, text=True, timeout=600, env=env_vars)
+
+            vids_exist = os.path.exists("resources/drive/output_topdown.mp4") and os.path.exists(
+                "resources/drive/output_agent.mp4"
+            )
+
+            if result.returncode == 0 or (result.returncode == 1 and vids_exist):
+                videos = [
+                    ("resources/drive/output_topdown.mp4", f"human_replay_map{map_idx:02d}_topdown.mp4"),
+                    ("resources/drive/output_agent.mp4", f"human_replay_map{map_idx:02d}_agent.mp4"),
+                ]
+
+                for source_vid, target_filename in videos:
+                    if os.path.exists(source_vid):
+                        target_path = os.path.join(output_dir, target_filename)
+                        shutil.move(source_vid, target_path)
+                        output_videos.append(target_path)
+
+                        if logger and hasattr(logger, "wandb") and logger.wandb:
+                            import wandb
+
+                            if "topdown" in target_filename:
+                                videos_to_log_world.append(wandb.Video(target_path, format="mp4"))
+                            else:
+                                videos_to_log_agent.append(wandb.Video(target_path, format="mp4"))
+            else:
+                print(f"Human replay rendering failed for map {map_idx}: {result.stderr}")
+
+        # Log to wandb
+        if logger and hasattr(logger, "wandb") and logger.wandb and (videos_to_log_world or videos_to_log_agent):
+            payload = {}
+            if videos_to_log_world:
+                payload["eval/human_replay_world_view"] = videos_to_log_world
+            if videos_to_log_agent:
+                payload["eval/human_replay_agent_view"] = videos_to_log_agent
+            logger.wandb.log(payload, step=global_step)
+
+        return output_videos
+
+    except subprocess.TimeoutExpired:
+        print("Human replay rendering timed out")
+        return []
+    except Exception as e:
+        print(f"Failed to render human replay videos: {e}")
+        return []
+    finally:
+        if os.path.exists(expected_weights_path):
+            os.remove(expected_weights_path)
diff --git a/scripts/run_baseline.sh b/scripts/run_baseline.sh
new file mode 100755
index 0000000000..7e2f5b5e60
--- /dev/null
+++ b/scripts/run_baseline.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+#SBATCH --job-name=puffer_baseline
+#SBATCH --output=/scratch/mmk9418/logs/%A_%x.out
+#SBATCH --error=/scratch/mmk9418/logs/%A_%x.err
+#SBATCH --mem=128GB
+#SBATCH --time=24:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --account=torch_pr_355_tandon_priority
+#SBATCH --cpus-per-task=48
+#SBATCH --gres=gpu:1
+
+# Vanilla baseline training script
+# No co-players - other vehicles follow recorded human trajectories from Waymo data
+# For comparison with co-player experiments in run.sh
+
+singularity exec --nv \
+ --overlay "$OVERLAY_FILE:ro" \
+ "$SINGULARITY_IMAGE" \
+ bash -c "
+   set -e
+
+   source ~/.bashrc
+   cd /scratch/mmk9418/projects/Adaptive_Driving_Agent
+   source .venv/bin/activate
+
+   # Start GPU heartbeat in background (for RL training which is CPU-bound)
+   nice -n 19 python scripts/gpu_heartbeat.py &
+   HEARTBEAT_PID=\$!
+   echo \"Started GPU Heartbeat with PID: \$HEARTBEAT_PID\"
+
+   puffer train puffer_adaptive_drive --wandb --tag adaptive_baseline \
+     --env.num-maps 1000 \
+     --env.conditioning.type none \
+     --env.co-player-enabled 0 \
+     --train.seed 42
+
+   kill \$HEARTBEAT_PID
+ "
diff --git a/scripts/run_nuplan_coplayers.sh b/scripts/run_nuplan_coplayers.sh
new file mode 100755
index 0000000000..3c172eb280
--- /dev/null
+++ b/scripts/run_nuplan_coplayers.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+#SBATCH --job-name=nuplan_coplayer
+#SBATCH --output=/scratch/mmk9418/logs/%A_%a_%x.out
+#SBATCH --error=/scratch/mmk9418/logs/%A_%a_%x.err
+#SBATCH --mem=128GB
+#SBATCH --time=24:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --account=torch_pr_355_tandon_priority
+#SBATCH --cpus-per-task=48
+#SBATCH --gres=gpu:1
+#SBATCH --array=0-15
+
+# Train co-player policies on NuPlan data with varying entropy/discount conditioning
+# These trained policies can later be used as co-players for adaptive agent training
+#
+# PREREQUISITE: Convert NuPlan JSON to binary format first:
+#   python -c "from pufferlib.ocean.drive.drive import process_all_maps; \
+#              process_all_maps('data/nuplan_gpudrive/nuplan', max_maps=5000)"
+#
+# This will create: resources/drive/binaries/nuplan/map_*.bin (dataset_name derived from folder)
+
+# Define configurations for each array task ID
+# Grid: 4 entropy levels × 4 discount levels = 16 configurations
+# Each entry: "entropy_weight_ub discount_weight_lb"
+ZIPPED_RUNS=(
+  "0.5 0.8"
+  "0.1 0.8"
+  "0.01 0.8"
+  "0 0.8"
+
+  "0.5 0.6"
+  "0.1 0.6"
+  "0.01 0.6"
+  "0 0.6"
+
+  "0.5 0.4"
+  "0.1 0.4"
+  "0.01 0.4"
+  "0 0.4"
+
+  "0.5 0.2"
+  "0.1 0.2"
+  "0.01 0.2"
+  "0 0.2"
+)
+
+# Parse the values for this array task
+read -r ENTROPY_UB DISCOUNT_LB <<< "${ZIPPED_RUNS[$SLURM_ARRAY_TASK_ID]}"
+
+# Fixed values
+CONDITION_TYPE="all"
+DISCOUNT_UB=1
+ENTROPY_LB=0
+
+# NuPlan has ~5235 maps
+NUPLAN_NUM_MAPS=5000
+
+singularity exec --nv \
+ --overlay "$OVERLAY_FILE:ro" \
+ "$SINGULARITY_IMAGE" \
+ bash -c "
+   set -e
+
+   source ~/.bashrc
+   cd /scratch/mmk9418/projects/Adaptive_Driving_Agent
+   source .venv/bin/activate
+
+   # Start GPU heartbeat in background (for RL training which is CPU-bound)
+   nice -n 19 python scripts/gpu_heartbeat.py &
+   HEARTBEAT_PID=\$!
+   echo \"Started GPU Heartbeat with PID: \$HEARTBEAT_PID\"
+
+   puffer train puffer_drive --wandb --tag nuplan_coplayer_training \
+     --env.map-dir resources/drive/binaries/nuplan \
+     --env.num-maps $NUPLAN_NUM_MAPS \
+     --env.conditioning.type $CONDITION_TYPE \
+     --env.conditioning.entropy-weight-lb $ENTROPY_LB \
+     --env.conditioning.entropy-weight-ub $ENTROPY_UB \
+     --env.conditioning.discount-weight-lb $DISCOUNT_LB \
+     --env.conditioning.discount-weight-ub $DISCOUNT_UB
+
+   kill \$HEARTBEAT_PID
+ "
diff --git a/scripts/run_transformer_adaptive.sh b/scripts/run_transformer_adaptive.sh
new file mode 100755
index 0000000000..79b2a7caae
--- /dev/null
+++ b/scripts/run_transformer_adaptive.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+#SBATCH --job-name=puffer_transformer
+#SBATCH --output=/scratch/mmk9418/logs/%A_%a_%x.out
+#SBATCH --error=/scratch/mmk9418/logs/%A_%a_%x.err
+#SBATCH --mem=128GB
+#SBATCH --time=24:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --account=torch_pr_355_tandon_priority
+#SBATCH --cpus-per-task=48
+#SBATCH --gres=gpu:1
+#SBATCH --array=0-15
+
+# Transformer-based adaptive agent training with co-players
+# Same co-player configurations as run.sh, but using Transformer architecture
+
+# Define configurations for each array task ID
+# Each entry: "path entropy_weight_ub discount_weight_lb"
+ZIPPED_RUNS=(
+  "experiments/puffer_drive_8u92j3ts/model_puffer_drive_003000.pt 0.5 0.8"
+  "experiments/puffer_drive_x4xs711x.pt 0.1 0.8"
+  "experiments/puffer_drive_hhzdzhl8.pt 0.01 0.8"
+  "experiments/puffer_drive_3xd48djp.pt 0 0.8"
+
+  "experiments/puffer_drive_fgglgofu.pt 0.5 0.6"
+  "experiments/puffer_drive_g3x9e5rn.pt 0.01 0.6"
+  "experiments/puffer_drive_gzuuzs0o.pt 0.1 0.6"
+  "experiments/puffer_drive_6nzf7xha.pt 0 0.6"
+
+  "experiments/puffer_drive_3iefv59j.pt 0.5 0.4"
+  "experiments/puffer_drive_7h07nrxy.pt 0.1 0.4"
+  "experiments/puffer_drive_bot2wl0m.pt 0.01 0.4"
+  "experiments/puffer_drive_n7mx9f4b.pt 0 0.4"
+
+  "experiments/puffer_drive_9jv4q77m.pt 0.5 0.2"
+  "experiments/puffer_drive_5p8gpw84.pt 0.1 0.2"
+  "experiments/puffer_drive_jskw659g.pt 0.01 0.2"
+  "experiments/puffer_drive_eeyizdrk.pt 0 0.2"
+)
+
+# Parse the values for this array task
+read -r COPLAYER_PATH ENTROPY_UB DISCOUNT_LB <<< "${ZIPPED_RUNS[$SLURM_ARRAY_TASK_ID]}"
+
+# Fixed values
+CONDITION_TYPE="all"
+DISCOUNT_UB=1
+ENTROPY_LB=0
+
+singularity exec --nv \
+ --overlay "$OVERLAY_FILE:ro" \
+ "$SINGULARITY_IMAGE" \
+ bash -c "
+   set -e
+
+   source ~/.bashrc
+   cd /scratch/mmk9418/projects/Adaptive_Driving_Agent
+   source .venv/bin/activate
+
+   # Start GPU heartbeat in background (for RL training which is CPU-bound)
+   nice -n 19 python scripts/gpu_heartbeat.py &
+   HEARTBEAT_PID=\$!
+   echo \"Started GPU Heartbeat with PID: \$HEARTBEAT_PID\"
+
+   puffer train puffer_adaptive_drive --wandb --tag adaptive_transformer \
+     --env.num-maps 1000 \
+     --env.conditioning.type none \
+     --env.co-player-enabled 1 \
+     --env.co-player-policy.policy-path $COPLAYER_PATH \
+     --env.co-player-policy.conditioning.type $CONDITION_TYPE \
+     --env.co-player-policy.conditioning.discount-weight-lb $DISCOUNT_LB \
+     --env.co-player-policy.conditioning.discount-weight-ub $DISCOUNT_UB \
+     --env.co-player-policy.conditioning.entropy-weight-lb $ENTROPY_LB \
+     --env.co-player-policy.conditioning.entropy-weight-ub $ENTROPY_UB \
+     --rnn-name Transformer \
+     --train.policy-architecture Transformer
+
+   kill \$HEARTBEAT_PID
+ "