diff --git a/examples/eval_pipeline.py b/examples/eval_pipeline.py
index 3a90687fac..94155e0a08 100644
--- a/examples/eval_pipeline.py
+++ b/examples/eval_pipeline.py
@@ -1,3 +1,4 @@
+import copy
 import numpy as np
 import pandas as pd
 import torch
@@ -11,6 +12,35 @@
 
 POLICY_DIR = "models"
 
+# Policy configurations: (path, dynamics_model, type)
+POLICY_CONFIGS = {
+    "bc_policy": {
+        "path": POLICY_DIR + "/bc_policy.pt",
+        "dynamics": "classic",
+        "type": "bc",
+    },
+    # "self_play_rl (classic)": {
+    #     "path": POLICY_DIR + "/self_play_rl_simple_policy.pt",
+    #     "dynamics": "classic",
+    #     "type": "rl",
+    # },
+    # "guided_self_play_rl (classic)": {
+    #     "path": POLICY_DIR + "/guided_self_play_classic_policy.pt",
+    #     "dynamics": "classic",
+    #     "type": "rl",
+    # },
+    # "self_play_rl (jerk)": {
+    #     "path": POLICY_DIR + "/self_play_jerk_policy.pt",
+    #     "dynamics": "jerk",
+    #     "type": "rl",
+    # },
+    # "guided_self_play_rl (jerk)": {
+    #     "path": POLICY_DIR + "/guided_self_play_jerk_policy.pt",
+    #     "dynamics": "jerk",
+    #     "type": "rl",
+    # },
+}
+
 COLUMN_ORDER = [
     "policy",
     "realism_meta_score",
@@ -222,13 +252,31 @@ def evaluate_bc_policy(config, vecenv, evaluator, policy_path):
 
 
 def evaluate_rl_policy(config, vecenv, evaluator, policy_path):
+    """Evaluate an RL policy using WOSAC metrics."""
+
+    # Use a copy to avoid mutating the original config
+    config = copy.deepcopy(config)
+
+    # Ensure evaluator is in RL mode
+    evaluator.mode = "rl"
+
+    # Enable RNN state initialization for LSTM-based policies
     config["train"]["use_rnn"] = True
-    evaluator.mode = "rl_policy"
 
+    config["load_model_path"] = policy_path
+
+    # Load policy
     policy = load_policy(config, vecenv, "puffer_drive")
+    policy.eval()
 
     gt_trajectories = evaluator.collect_ground_truth_trajectories(vecenv)
-    simulated_trajectories = evaluator.collect_simulated_trajectories(config, vecenv, policy)
+
+    # Roll out trained policy in the simulator
+    simulated_trajectories = evaluator.collect_simulated_trajectories(
+        args=config,
+        puffer_env=vecenv,
+        policy=policy,
+    )
 
     # Compute metrics
     agent_state = vecenv.driver_env.get_global_agent_state()
@@ -242,18 +290,16 @@ def evaluate_rl_policy(config, vecenv, evaluator, policy_path):
     return results
 
 
-def pipeline(env_name="puffer_drive"):
-    """Obtain WOSAC scores for various baselines and policies."""
-
+def create_config_and_env(env_name, dynamics_model="classic"):
+    """Create a config and vecenv for a specific dynamics model."""
     config = load_config(env_name)
 
-    # Dataset configuration
-    config["env"]["map_dir"] = "pufferlib/resources/drive/binaries/training"
+    # Common WOSAC evaluation settings
+    config["env"]["num_maps"] = 100
+    config["env"]["map_dir"] = "pufferlib/resources/drive/binaries/validation"
     config["eval"]["wosac_target_scenarios"] = 1000
     config["eval"]["wosac_batch_size"] = 100
-    config["eval"]["wosac_scenario_pool_size"] = 10_0000
-
-    # WOSAC settings
+    config["eval"]["wosac_scenario_pool_size"] = 10_000
     config["wosac"]["enabled"] = True
     config["vec"]["backend"] = "PufferEnv"
     config["vec"]["num_envs"] = 1
@@ -264,63 +310,100 @@ def pipeline(env_name="puffer_drive"):
     config["env"]["goal_radius"] = 1.0
     config["env"]["save_data_to_disk"] = False
 
-    # Make env
-    vecenv = load_env(env_name, config)
+    # Set dynamics model
+    config["env"]["dynamics_model"] = dynamics_model
+
+    # Disable human data preparation for jerk dynamics (not implemented)
+    if dynamics_model == "jerk":
+        config["env"]["prep_human_data"] = False
 
-    # Make evaluator
+    vecenv = load_env(env_name, config)
     evaluator = WOSACEvaluator(config)
 
-    # Baseline: Ground truth
-    evaluator.eval_mode = "ground_truth"
-    df_results_gt = evaluator.evaluate(config, vecenv, policy=None)
-    df_results_gt["policy"] = "ground_truth"
+    return config, vecenv, evaluator
 
-    # Baseline: Agent with inferred human actions (using classic bicycle dynamics model)
-    # df_results_inferred_human = evaluate_human_inferred_actions(config, vecenv, evaluator)
-    # df_results_inferred_human["policy"] = "inferred_human_actions"
-
-    # Baseline: Imitation learning policy
-    # df_results_bc = evaluate_bc_policy(config, vecenv, evaluator, POLICY_DIR + "/bc_policy.pt")
-    # df_results_bc["policy"] = "bc_policy"
-
-    # Baseline: Self-play RL policy
-    # run: https://wandb.ai/emerge_/gsp/runs/qld2z6tn?nw=nwuserdaphnecor
-    # df_results_self_play = evaluate_rl_policy(
-    #     config, vecenv, evaluator, "pufferlib/resources/drive/pufferdrive_weights.pt"
-    # )  # POLICY_DIR + "/puffer_drive_sp_qld2z6tn.pt")
-    # df_results_self_play["policy"] = "self_play_rl_base"
-
-    # TODO: Guided self-play policy (guidance in rewards)
-    # ...
-
-    # TODO: Guided self-play policy (regularization)
-    # ...
-
-    # Baseline: Random policy
-    # df_results_random = evaluate_random_policy(config, vecenv, evaluator)
-    # df_results_random["policy"] = "random"
-
-    # Combine
-    df = pd.concat(
-        [
-            df_results_gt,
-            # df_results_inferred_human,
-            # df_results_random,
-            # df_results_bc,
-            # df_results_self_play,
-        ],
-        ignore_index=True,
-    )
 
+def pipeline(env_name="puffer_drive"):
+    """Obtain WOSAC scores for various baselines and policies across dynamics models."""
+
+    all_results = []
+
+    config_classic, vecenv_classic, evaluator_classic = create_config_and_env(env_name, "classic")
+
+    # Ground truth (dynamics-agnostic, only need to run once)
+    print("Evaluating: ground_truth")
+    evaluator_classic.eval_mode = "ground_truth"
+    df_results_gt = evaluator_classic.evaluate(config_classic, vecenv_classic, policy=None)
+    df_results_gt["policy"] = "ground_truth"
+    all_results.append(df_results_gt)
+
+    # Inferred human actions (classic only - not implemented for jerk)
+    print("Evaluating: inferred_human_actions (classic)")
+    df_results_inferred_human = evaluate_human_inferred_actions(config_classic, vecenv_classic, evaluator_classic)
+    df_results_inferred_human["policy"] = "inferred_human (classic)"
+    all_results.append(df_results_inferred_human)
+
+    # --- Classic dynamics evaluations ---
+    print("=" * 60)
+    print("Running evaluations with CLASSIC dynamics model...")
+    print("=" * 60)
+
+    # Random baseline for classic
+    print("Evaluating: random (classic)")
+    df_results_random_classic = evaluate_random_policy(config_classic, vecenv_classic, evaluator_classic)
+    df_results_random_classic["policy"] = "random (classic)"
+    all_results.append(df_results_random_classic)
+
+    # Evaluate classic dynamics policies
+    evaluator_classic.eval_mode = "policy"
+    for policy_name, policy_cfg in POLICY_CONFIGS.items():
+        if policy_cfg["dynamics"] != "classic":
+            continue
+        print(f"Evaluating: {policy_name}")
+        if policy_cfg["type"] == "bc":
+            df_result = evaluate_bc_policy(config_classic, vecenv_classic, evaluator_classic, policy_cfg["path"])
+        else:
+            df_result = evaluate_rl_policy(config_classic, vecenv_classic, evaluator_classic, policy_cfg["path"])
+        df_result["policy"] = policy_name
+        all_results.append(df_result)
+
+    # --- Jerk dynamics evaluations ---
+    # Check if any jerk policies are configured
+    jerk_policies = {k: v for k, v in POLICY_CONFIGS.items() if v["dynamics"] == "jerk"}
+
+    if jerk_policies:
+        print("=" * 60)
+        print("Running evaluations with JERK dynamics model...")
+        print("=" * 60)
+
+        config_jerk, vecenv_jerk, evaluator_jerk = create_config_and_env(env_name, "jerk")
+
+        # Random baseline for jerk (different action space, so separate baseline)
+        print("Evaluating: random (jerk)")
+        df_results_random_jerk = evaluate_random_policy(config_jerk, vecenv_jerk, evaluator_jerk)
+        df_results_random_jerk["policy"] = "random (jerk)"
+        all_results.append(df_results_random_jerk)
+
+        # Evaluate jerk dynamics policies
+        for policy_name, policy_cfg in jerk_policies.items():
+            print(f"Evaluating: {policy_name}")
+            df_result = evaluate_rl_policy(config_jerk, vecenv_jerk, evaluator_jerk, policy_cfg["path"])
+            df_result["policy"] = policy_name
+            all_results.append(df_result)
+
+    # Combine all results
+    df = pd.concat(all_results, ignore_index=True)
     df = df[COLUMN_ORDER]
 
     # Visualize
     plot_wosac_results(df)
     plot_realism_score_distributions(df)
 
-    print(f"total agents: {df_results_gt['num_agents_per_scene'].sum().item()}")
-
-    print(df.groupby("policy")["realism_meta_score"].mean())
+    # Print summary
+    print("\n" + "=" * 60)
+    print("RESULTS SUMMARY")
+    print("=" * 60)
+    print(df.groupby("policy")["realism_meta_score"].mean().sort_values(ascending=False))
     print("---")
     print(df.groupby("policy")["kinematic_metrics"].mean())
     print("---")
diff --git a/greene/a100_baseline_sweep.sh b/greene/a100_baseline_sweep.sh
index ca34d82059..2ff845ca7b 100644
--- a/greene/a100_baseline_sweep.sh
+++ b/greene/a100_baseline_sweep.sh
@@ -78,4 +78,4 @@ puffer sweep puffer_drive \
 
 # Print completion info
 echo "Sweep completed"
-date
\ No newline at end of file
+date
diff --git a/greene/h100_gsp_rew_sweep.sh b/greene/h100_gsp_rew_sweep.sh
index dde3f2d040..71b219dd86 100644
--- a/greene/h100_gsp_rew_sweep.sh
+++ b/greene/h100_gsp_rew_sweep.sh
@@ -35,8 +35,8 @@ source .venv/bin/activate
 NUM_WORKERS=48
 NUM_ENVS=48
 VEC_BATCH_SIZE=6
-NUM_AGENTS=1024 
-BPTT_HORIZON=32 
+NUM_AGENTS=1024
+BPTT_HORIZON=32
 HUMAN_REG_COEF=0.0
 USE_GUIDANCE_REWARDS=0
 
@@ -79,4 +79,4 @@ puffer sweep puffer_drive \
 
 # Print completion info
 echo "Sweep completed"
-date
\ No newline at end of file
+date
diff --git a/greene/rtx8000_sweep.sh b/greene/rtx8000_sweep.sh
index 6b8ba2d42b..79e051f632 100644
--- a/greene/rtx8000_sweep.sh
+++ b/greene/rtx8000_sweep.sh
@@ -47,8 +47,8 @@ fi
 NUM_WORKERS=32
 NUM_ENVS=32
 VEC_BATCH_SIZE=4
-NUM_AGENTS=1024 
-BPTT_HORIZON=32 
+NUM_AGENTS=1024
+BPTT_HORIZON=32
 HUMAN_REG_COEF=0.0
 USE_GUIDANCE_REWARDS=1
 
@@ -91,4 +91,4 @@ puffer sweep puffer_drive \
 
 # Print completion info
 echo "Sweep completed"
-date
\ No newline at end of file
+date
diff --git a/models/guided_self_play_jerk_policy.pt b/models/guided_self_play_jerk_policy.pt
new file mode 100644
index 0000000000..fabdd5ce35
Binary files /dev/null and b/models/guided_self_play_jerk_policy.pt differ
diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index f369faae17..2ae2b45e77 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -31,6 +31,7 @@ use_guided_autonomy = 0
 guidance_speed_weight = 0.0
 guidance_heading_weight = 0.0
 waypoint_reach_threshold = 2.0
+use_guidance_observations = 0
 dt = 0.1
 reward_goal = 1.0
 reward_goal_post_respawn = 0.25
@@ -161,12 +162,64 @@ human_replay_control_mode = "control_sdc_only"
 ; Number of scenarios for human replay evaluation equals the number of agents
 human_replay_num_agents = 128
 
-; General sweep settings:
+[render]
+; Mode to render a bunch of maps with a given policy
+; Path to dataset used for rendering
+map_dir = "resources/drive/binaries/training"
+; Directory to output rendered videos
+output_dir = "resources/drive/render_videos"
+; Evaluation will run on the first num_maps maps in the map_dir directory
+num_maps = 100
+; "both", "topdown", "agent"; Other args are passed from train confs
+view_mode = "both"
+; Policy bin file used for rendering videos
+policy_path = "resources/drive/puffer_drive_weights.bin"
+; Allows more than cpu cores workers for rendering
+overwork = False
+; If True, show exactly what the agent sees in agent observation
+obs_only = True
+; Show grid lines
+show_grid = False
+; Draws lines from ego agent observed ORUs and road elements to show detection range
+show_lasers = False
+; Display human xy logs in the background
+show_human_logs = False
+; If True, zoom in on a part of the map. Otherwise, show full map
+zoom_in = True
+; If True, render single PNG snapshots with trajectory overlays instead of videos
+snapshot_only = False
+
+; ============================================================================
+; SWEEP CONFIGURATION
+; ============================================================================
+; Run with: puffer sweep puffer_drive --wandb --wandb-project drive-sweep --max-runs 50
+
+[sweep]
+method = Random
+metric = score
+goal = maximize
+downsample = 5
+
+; === Core RL Hyperparameters ===
 [sweep.train.learning_rate]
 distribution = log_normal
-min = 0.001
+min = 0.0003
 mean = 0.003
-max = 0.005
+max = 0.01
+scale = auto
+
+[sweep.train.gamma]
+distribution = logit_normal
+min = 0.95
+mean = 0.98
+max = 0.995
+scale = auto
+
+[sweep.train.gae_lambda]
+distribution = logit_normal
+min = 0.9
+mean = 0.95
+max = 0.99
 scale = auto
 
 [sweep.train.ent_coef]
@@ -176,30 +229,61 @@ mean = 0.005
 max = 0.03
 scale = auto
 
-[sweep.train.gamma]
+[sweep.train.vf_coef]
 distribution = log_normal
-min = 0.97
-mean = 0.98
-max = 0.999
+min = 0.5
+mean = 2.0
+max = 5.0
 scale = auto
 
-; [sweep.env.guidance_heading_weight]
-; distribution = uniform
-; min = 0.1
-; max = 0.5
-; mean = 0.3
-; scale = auto
+[sweep.train.clip_coef]
+distribution = uniform
+min = 0.1
+mean = 0.2
+max = 0.3
+scale = auto
 
-; [sweep.env.guidance_speed_weight]
-; distribution = uniform
-; min = 0.1
-; max = 0.5
-; mean = 0.3
-; scale = auto
+; === Minibatch Size ===
+; Must be divisible by bptt_horizon (32) and <= batch_size (393216)
+; uniform_pow2 samples powers of 2: 8192, 16384, 32768, 65536
+[sweep.train.minibatch_size]
+distribution = uniform_pow2
+min = 8192
+mean = 24576
+max = 65536
+scale = auto
+
+; === Learning Rate Decay Ablation ===
+; 0 = no decay (constant LR), 1 = cosine annealing
+[sweep.train.anneal_lr]
+distribution = int_uniform
+min = 0
+mean = 1
+max = 1
+scale = auto
 
 ; Settings for controlled experiments
 [controlled_exp.train.human_ll_coef]
 values = [0.0, 0.1, 0.5, 0.9]
 
-; [controlled_exp.env.max_expert_sequences]
-; values = [512, 1024, 2048]
+; === Guidance Reward Parameters ===
+[sweep.env.guidance_speed_weight]
+distribution = uniform
+min = 0.05
+mean = 0.1
+max = 0.3
+scale = auto
+
+[sweep.env.guidance_heading_weight]
+distribution = uniform
+min = 0.05
+mean = 0.1
+max = 0.3
+scale = auto
+
+[sweep.env.waypoint_reach_threshold]
+distribution = uniform
+min = 1.0
+mean = 2.0
+max = 5.0
+scale = auto
diff --git a/pufferlib/ocean/benchmark/metrics_sanity_check.py b/pufferlib/ocean/benchmark/metrics_sanity_check.py
index 0983dfa963..9cabc42377 100644
--- a/pufferlib/ocean/benchmark/metrics_sanity_check.py
+++ b/pufferlib/ocean/benchmark/metrics_sanity_check.py
@@ -1,22 +1,14 @@
 """
-Comprehensive evaluation script for Drive environment checkpoints.
+Validation script for WOSAC log-likelihood metrics.
 
-Evaluates all .pt checkpoints in a folder using:
-1. WOSAC metrics (realism, ADE, likelihood metrics)
-2. Collision rates (SDC-only control mode)
-
-Includes baselines for ground truth and random policy.
+Idea is to check how the log-likelihood metrics change as we replace
+increasing numbers of random rollouts with ground-truth data.
 """
 
 import argparse
-import os
-import glob
-from pathlib import Path
 import numpy as np
-import pandas as pd
-import torch
 
-from pufferlib.pufferl import load_env, load_policy
+from pufferlib.pufferl import load_config, load_env, load_policy
 from pufferlib.ocean.benchmark.evaluator import WOSACEvaluator
 
 
@@ -37,7 +29,6 @@ def replace_rollouts_with_gt(simulated_traj, gt_traj, num_replacements):
 def run_validation_experiment(config, vecenv):
     evaluator = WOSACEvaluator(config)
 
-    # Collect ground truth trajectories
     gt_trajectories = evaluator.collect_ground_truth_trajectories(vecenv)
     simulated_trajectories = evaluator.collect_wosac_random_baseline(vecenv)
     agent_state = vecenv.driver_env.get_global_agent_state()
@@ -88,24 +79,9 @@ def format_results_table(results):
 
 
 def main():
-    parser = argparse.ArgumentParser(
-        description="Evaluate Drive environment checkpoints", formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("checkpoint_dir", type=str, help="Directory containing .pt checkpoint files")
-    parser.add_argument(
-        "--output",
-        type=str,
-        default="evaluation_results.csv",
-        help="Output CSV file path (default: evaluation_results.csv)",
-    )
-    parser.add_argument(
-        "--num-collision-episodes",
-        type=int,
-        default=100,
-        help="Number of episodes for collision rate evaluation (default: 100)",
-    )
-    parser.add_argument("--skip-wosac", action="store_true", help="Skip WOSAC evaluation")
-    parser.add_argument("--skip-collision", action="store_true", help="Skip collision rate evaluation")
+    parser = argparse.ArgumentParser(description="Validate WOSAC log-likelihood metrics")
+    parser.add_argument("--env", default="puffer_drive")
+    parser.add_argument("--config", default="config/ocean/drive.ini")
     args = parser.parse_args()
 
     config = load_config(args.env)
diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c
index 16ac2ca2a3..0e5b4056e2 100644
--- a/pufferlib/ocean/drive/binding.c
+++ b/pufferlib/ocean/drive/binding.c
@@ -191,29 +191,66 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
         PyErr_SetString(PyExc_ValueError, "episode_length must be > 0 (set in INI or kwargs)");
         return -1;
     }
+
+// Allow all settings to be overridden via kwargs (ini provides defaults)
+#define OVERRIDE_INT(field)                                                                                            \
+    if (kwargs && PyDict_GetItemString(kwargs, #field)) {                                                              \
+        conf.field = (int)unpack(kwargs, #field);                                                                      \
+    }
+#define OVERRIDE_FLOAT(field)                                                                                          \
+    if (kwargs && PyDict_GetItemString(kwargs, #field)) {                                                              \
+        conf.field = (float)unpack(kwargs, #field);                                                                    \
+    }
+
+    OVERRIDE_INT(action_type);
+    OVERRIDE_INT(dynamics_model);
+    OVERRIDE_FLOAT(reward_vehicle_collision);
+    OVERRIDE_FLOAT(reward_offroad_collision);
+    OVERRIDE_FLOAT(reward_goal);
+    OVERRIDE_FLOAT(reward_goal_post_respawn);
+    OVERRIDE_INT(use_guided_autonomy);
+    OVERRIDE_FLOAT(guidance_speed_weight);
+    OVERRIDE_FLOAT(guidance_heading_weight);
+    OVERRIDE_FLOAT(waypoint_reach_threshold);
+    OVERRIDE_INT(use_guidance_observations);
+    OVERRIDE_INT(collision_behavior);
+    OVERRIDE_INT(offroad_behavior);
+    OVERRIDE_FLOAT(dt);
+    OVERRIDE_INT(termination_mode);
+    OVERRIDE_INT(init_mode);
+    OVERRIDE_INT(control_mode);
+    OVERRIDE_INT(goal_behavior);
+    OVERRIDE_FLOAT(goal_target_distance);
+    OVERRIDE_FLOAT(goal_radius);
+    OVERRIDE_FLOAT(goal_speed);
+
+#undef OVERRIDE_INT
+#undef OVERRIDE_FLOAT
+
+    // Assign all settings from conf (which now has ini defaults + any kwargs overrides)
     env->action_type = conf.action_type;
     env->dynamics_model = conf.dynamics_model;
     env->reward_vehicle_collision = conf.reward_vehicle_collision;
     env->reward_offroad_collision = conf.reward_offroad_collision;
     env->reward_goal = conf.reward_goal;
     env->reward_goal_post_respawn = conf.reward_goal_post_respawn;
-    env->episode_length = conf.episode_length;
-    env->termination_mode = conf.termination_mode;
     env->use_guided_autonomy = conf.use_guided_autonomy;
     env->guidance_speed_weight = conf.guidance_speed_weight;
     env->guidance_heading_weight = conf.guidance_heading_weight;
     env->waypoint_reach_threshold = conf.waypoint_reach_threshold;
-    env->goal_radius = conf.goal_radius;
+    env->use_guidance_observations = conf.use_guidance_observations;
+    env->episode_length = conf.episode_length;
+    env->termination_mode = conf.termination_mode;
     env->collision_behavior = conf.collision_behavior;
     env->offroad_behavior = conf.offroad_behavior;
-    env->max_controlled_agents = unpack(kwargs, "max_controlled_agents");
     env->dt = conf.dt;
-    env->init_mode = (int)unpack(kwargs, "init_mode");
-    env->control_mode = (int)unpack(kwargs, "control_mode");
-    env->goal_behavior = (int)unpack(kwargs, "goal_behavior");
-    env->goal_target_distance = (float)unpack(kwargs, "goal_target_distance");
-    env->goal_radius = (float)unpack(kwargs, "goal_radius");
-    env->goal_speed = (float)unpack(kwargs, "goal_speed");
+    env->init_mode = conf.init_mode;
+    env->control_mode = conf.control_mode;
+    env->goal_behavior = conf.goal_behavior;
+    env->goal_target_distance = conf.goal_target_distance;
+    env->goal_radius = conf.goal_radius;
+    env->goal_speed = conf.goal_speed;
+    env->max_controlled_agents = unpack(kwargs, "max_controlled_agents");
     char *map_dir = unpack_str(kwargs, "map_dir");
     int map_id = unpack(kwargs, "map_id");
     int max_agents = unpack(kwargs, "max_agents");
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index bbea158a98..c89a9b6e1d 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -82,8 +82,13 @@
 #define PARTNER_FEATURES 7
 
 // Ego features depend on dynamics model
-#define EGO_FEATURES_CLASSIC 7
-#define EGO_FEATURES_JERK 10
+#define EGO_FEATURES_CLASSIC 8
+#define EGO_FEATURES_JERK 11
+
+// Guidance waypoint observations
+#define GUIDANCE_WAYPOINT_FEATURES 2                                            // x, y per waypoint
+#define NUM_GUIDANCE_WAYPOINTS 91                                               // Full trajectory length
+#define GUIDANCE_OBS_SIZE (NUM_GUIDANCE_WAYPOINTS * GUIDANCE_WAYPOINT_FEATURES) // 182
 
 // Observation normalization constants
 #define MAX_SPEED 100.0f
@@ -340,6 +345,7 @@ struct Drive {
     float guidance_speed_weight;    // Weight for speed deviation penalty
     float guidance_heading_weight;  // Weight for heading deviation penalty
     float waypoint_reach_threshold; // Distance threshold for hitting waypoints (e.g., 2.0m)
+    int use_guidance_observations;  // Boolean: whether to include egocentric guidance waypoints in observations
     char *map_name;
     float world_mean_x;
     float world_mean_y;
@@ -582,7 +588,6 @@ Entity *load_map_binary(const char *filename, Drive *env) {
     fread(&env->num_roads, sizeof(int), 1, file);
     env->num_entities = env->num_objects + env->num_roads;
     Entity *entities = (Entity *)malloc(env->num_entities * sizeof(Entity));
-
     for (int i = 0; i < env->num_entities; i++) {
         // Read base entity data
         fread(&entities[i].scenario_id, sizeof(int), 1, file);
@@ -1320,7 +1325,7 @@ void compute_agent_metrics(Drive *env, int agent_idx) {
         entity = &env->entities[entity_list[i].entity_idx];
 
         // Check for offroad collision with road edges
-        if (entity->type == ROAD_EDGE) {
+        if (entity->type == ROAD_EDGE && agent->type == VEHICLE) {
             int geometry_idx = entity_list[i].geometry_idx;
             float start[2] = {entity->traj_x[geometry_idx], entity->traj_y[geometry_idx]};
             float end[2] = {entity->traj_x[geometry_idx + 1], entity->traj_y[geometry_idx + 1]};
@@ -1519,7 +1524,7 @@ void set_active_agents(Drive *env) {
 
         is_controlled = should_control_agent(env, i);
 
-        if (is_controlled) {
+        if (is_controlled && env->active_agent_count < env->num_agents) {
             active_agent_indices[env->active_agent_count] = i;
             env->active_agent_count++;
             env->entities[i].active_agent = 1;
@@ -1660,7 +1665,9 @@ void c_close(Drive *env) {
 
 void allocate(Drive *env) {
     init(env);
-    int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
+    int ego_dim_base = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
+    int guidance_size = env->use_guidance_observations ? GUIDANCE_OBS_SIZE : 0;
+    int ego_dim = ego_dim_base + guidance_size;
     int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
     env->observations = (float *)calloc(env->active_agent_count * max_obs, sizeof(float));
     env->actions = (float *)calloc(env->active_agent_count * 2, sizeof(float));
@@ -1687,8 +1694,6 @@ float clipSpeed(float speed) {
     return speed;
 }
 
-float normalize_value(float value, float min, float max) { return (value - min) / (max - min); }
-
 void move_dynamics(Drive *env, int action_idx, int agent_idx) {
     Entity *agent = &env->entities[agent_idx];
     if (agent->removed)
@@ -1951,7 +1956,9 @@ void c_get_road_edge_polylines(Drive *env, float *x_out, float *y_out, int *leng
 }
 
 void compute_observations(Drive *env) {
-    int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
+    int ego_dim_base = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
+    int guidance_size = env->use_guidance_observations ? GUIDANCE_OBS_SIZE : 0;
+    int ego_dim = ego_dim_base + guidance_size;
     int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
     memset(env->observations, 0, max_obs * env->active_agent_count * sizeof(float));
     float (*observations)[max_obs] = (float (*)[max_obs])env->observations;
@@ -1989,8 +1996,39 @@ void compute_observations(Drive *env) {
                 (ego_entity->a_long < 0) ? ego_entity->a_long / (-JERK_LONG[0]) : ego_entity->a_long / JERK_LONG[3];
             obs[8] = ego_entity->a_lat / JERK_LAT[2];
             obs[9] = (ego_entity->respawn_timestep != -1) ? 1 : 0;
+            obs[10] = ego_entity->type / 3.0f;
         } else {
             obs[6] = (ego_entity->respawn_timestep != -1) ? 1 : 0;
+            obs[7] = ego_entity->type / 3.0f;
+        }
+
+        // Egocentric guidance waypoint observations
+        if (env->use_guidance_observations) {
+            int guidance_start_idx = ego_dim_base;
+            for (int w = 0; w < NUM_GUIDANCE_WAYPOINTS; w++) {
+                int traj_idx = env->timestep + w;
+                float wp_ego_x = 0.0f;
+                float wp_ego_y = 0.0f;
+
+                // Check if trajectory index is valid and within bounds
+                if (traj_idx < ego_entity->array_size && ego_entity->traj_valid[traj_idx]) {
+                    float wp_world_x = ego_entity->traj_x[traj_idx];
+                    float wp_world_y = ego_entity->traj_y[traj_idx];
+
+                    // Skip invalid positions
+                    if (wp_world_x != INVALID_POSITION && wp_world_y != INVALID_POSITION) {
+                        // Transform to egocentric coordinates
+                        float dx = wp_world_x - ego_entity->x;
+                        float dy = wp_world_y - ego_entity->y;
+                        wp_ego_x = dx * cos_heading + dy * sin_heading;
+                        wp_ego_y = -dx * sin_heading + dy * cos_heading;
+                    }
+                }
+
+                // Store normalized egocentric waypoint (same scale as goal: 0.005)
+                obs[guidance_start_idx + w * 2] = wp_ego_x * 0.005f;
+                obs[guidance_start_idx + w * 2 + 1] = wp_ego_y * 0.005f;
+            }
         }
 
         // Relative Pos of other cars
@@ -2316,7 +2354,6 @@ void c_step(Drive *env) {
 
         int lane_aligned = env->entities[agent_idx].metrics_array[LANE_ALIGNED_IDX];
         env->logs[i].lane_alignment_rate = lane_aligned;
-
         // Apply guided autonomy reward
         if (env->use_guided_autonomy) {
             float ga_reward = compute_guided_autonomy_reward(env, agent_idx, i);
@@ -2383,8 +2420,10 @@ void c_step(Drive *env) {
 void c_collect_expert_data(Drive *env, float *expert_actions_discrete_out, float *expert_actions_continuous_out,
                            float *expert_obs_out) {
 
-    int ego_dim = (env->dynamics_model == JERK) ? 10 : 7;
-    int max_obs = ego_dim + 7 * (MAX_AGENTS - 1) + 7 * MAX_ROAD_SEGMENT_OBSERVATIONS;
+    int ego_dim_base = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
+    int guidance_size = env->use_guidance_observations ? GUIDANCE_OBS_SIZE : 0;
+    int ego_dim = ego_dim_base + guidance_size;
+    int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
 
     int original_timestep = env->timestep;
 
@@ -2636,7 +2675,9 @@ void draw_agent_obs(Drive *env, int agent_index, int mode, int obs_only, int las
         return;
     }
 
-    int ego_dim = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
+    int ego_dim_base = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
+    int guidance_size = env->use_guidance_observations ? GUIDANCE_OBS_SIZE : 0;
+    int ego_dim = ego_dim_base + guidance_size;
     int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
     float (*observations)[max_obs] = (float (*)[max_obs])env->observations;
     float *agent_obs = &observations[agent_index][0];
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index 1d4e186e27..0d5a385519 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -29,6 +29,7 @@ def __init__(
         guidance_speed_weight=0.0,
         guidance_heading_weight=0.0,
         waypoint_reach_threshold=2.0,
+        use_guidance_observations=0,
         goal_behavior=0,
         goal_target_distance=10.0,
         goal_radius=2.0,
@@ -76,6 +77,7 @@ def __init__(
         self.guidance_speed_weight = guidance_speed_weight
         self.guidance_heading_weight = guidance_heading_weight
         self.waypoint_reach_threshold = waypoint_reach_threshold
+        self.use_guidance_observations = use_guidance_observations
         self.human_agent_idx = human_agent_idx
         self.episode_length = episode_length
         self.termination_mode = termination_mode
@@ -85,9 +87,11 @@ def __init__(
         self.save_data_to_disk = save_data_to_disk
 
         # Observation space calculation
-        self.ego_features = {"classic": binding.EGO_FEATURES_CLASSIC, "jerk": binding.EGO_FEATURES_JERK}.get(
+        ego_features_base = {"classic": binding.EGO_FEATURES_CLASSIC, "jerk": binding.EGO_FEATURES_JERK}.get(
             dynamics_model
         )
+        guidance_obs_size = binding.GUIDANCE_OBS_SIZE if use_guidance_observations else 0
+        self.ego_features = ego_features_base + guidance_obs_size
 
         # Extract observation shapes from constants
         # These need to be defined in C, since they determine the shape of the arrays
@@ -149,6 +153,7 @@ def __init__(
             raise ValueError(f"action_space must be 'discrete' or 'continuous'. Got: {action_type}")
 
         self._action_type_flag = 0 if action_type == "discrete" else 1
+        self._dynamics_model_flag = 0 if dynamics_model == "classic" else 1
 
         # Check if resources directory exists
         binary_path = f"{map_dir}/map_000.bin"
@@ -195,6 +200,7 @@ def __init__(
                 self.truncations[cur:nxt],
                 seed,
                 action_type=self._action_type_flag,
+                dynamics_model=self._dynamics_model_flag,
                 human_agent_idx=human_agent_idx,
                 reward_vehicle_collision=reward_vehicle_collision,
                 reward_offroad_collision=reward_offroad_collision,
@@ -204,6 +210,7 @@ def __init__(
                 guidance_speed_weight=guidance_speed_weight,
                 guidance_heading_weight=guidance_heading_weight,
                 waypoint_reach_threshold=waypoint_reach_threshold,
+                use_guidance_observations=use_guidance_observations,
                 goal_radius=goal_radius,
                 goal_speed=goal_speed,
                 goal_behavior=self.goal_behavior,
diff --git a/pufferlib/ocean/drive/drivenet.h b/pufferlib/ocean/drive/drivenet.h
index 714cc0fc3b..7e503bfd4a 100644
--- a/pufferlib/ocean/drive/drivenet.h
+++ b/pufferlib/ocean/drive/drivenet.h
@@ -46,10 +46,12 @@ struct DriveNet {
     Multidiscrete *multidiscrete;
 };
 
-DriveNet *init_drivenet(Weights *weights, int num_agents, int dynamics_model) {
+DriveNet *init_drivenet(Weights *weights, int num_agents, int dynamics_model, int use_guidance_observations) {
     DriveNet *net = calloc(1, sizeof(DriveNet));
     // Use constants directly from drive.h
-    int ego_dim = (dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
+    int ego_dim_base = (dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
+    int guidance_size = use_guidance_observations ? GUIDANCE_OBS_SIZE : 0;
+    int ego_dim = ego_dim_base + guidance_size;
     int max_partners = MAX_AGENTS - 1;
     int max_road_obs = MAX_ROAD_SEGMENT_OBSERVATIONS;
     int partner_features = PARTNER_FEATURES;
diff --git a/pufferlib/ocean/drive/visualize.c b/pufferlib/ocean/drive/visualize.c
index 3a511a2624..73554425dd 100644
--- a/pufferlib/ocean/drive/visualize.c
+++ b/pufferlib/ocean/drive/visualize.c
@@ -281,6 +281,134 @@ static void set_actions(Drive *env, int timestep, int control, DriveNet *net) {
     }
 }
 
+void render_snapshot(Drive *env, Client *client, DriveNet *net, int frame_count, float map_height, int show_human_logs,
+                     int show_grid, int lasers, int zoom_in, int img_width, int img_height,
+                     const char *output_filename) {
+
+    // Allocate trajectory storage
+    int num_agents = env->active_agent_count;
+    float *traj_x = (float *)calloc(num_agents * frame_count, sizeof(float));
+    float *traj_y = (float *)calloc(num_agents * frame_count, sizeof(float));
+    int *traj_valid = (int *)calloc(num_agents * frame_count, sizeof(int));
+
+    // Track whether each agent is done (respawned, stopped, or removed)
+    int *agent_done = (int *)calloc(num_agents, sizeof(int));
+
+    // Run simulation, record positions
+    printf("Running simulation for %d steps...\n", frame_count);
+    for (int t = 0; t < frame_count; t++) {
+        // Record positions before stepping
+        for (int i = 0; i < num_agents; i++) {
+            int idx = env->active_agent_indices[i];
+            traj_x[i * frame_count + t] = env->entities[idx].x;
+            traj_y[i * frame_count + t] = env->entities[idx].y;
+            traj_valid[i * frame_count + t] = !agent_done[i];
+        }
+        forward(net, env->observations, (int *)env->actions);
+        c_step(env);
+        // Check for done agents after stepping (respawn/stop/remove happens inside c_step)
+        for (int i = 0; i < num_agents; i++) {
+            int idx = env->active_agent_indices[i];
+            if (env->entities[idx].respawn_timestep != -1 || env->entities[idx].stopped || env->entities[idx].removed) {
+                agent_done[i] = 1;
+            }
+        }
+    }
+    free(agent_done);
+
+    // Reset env to get initial scene for the background
+    c_reset(env);
+
+    // Render one top-down frame
+    BeginDrawing();
+
+    Camera3D camera = {0};
+    if (zoom_in) {
+        camera.position = (Vector3){0.0f, 0.0f, 500.0f};
+        camera.target = (Vector3){0.0f, 0.0f, 0.0f};
+        camera.fovy = map_height;
+    } else {
+        camera.position = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 500.0f};
+        camera.target = (Vector3){env->grid_map->top_left_x, env->grid_map->bottom_right_y, 0.0f};
+        camera.fovy = 2 * map_height;
+    }
+    camera.up = (Vector3){0.0f, -1.0f, 0.0f};
+    camera.projection = CAMERA_ORTHOGRAPHIC;
+
+    client->width = img_width;
+    client->height = img_height;
+
+    Color road = (Color){35, 35, 37, 255};
+    ClearBackground(road);
+    BeginMode3D(camera);
+    rlEnableDepthTest();
+
+    // Draw human replay trajectories if enabled
+    if (show_human_logs) {
+        for (int i = 0; i < num_agents; i++) {
+            int idx = env->active_agent_indices[i];
+            Vector3 prev_point = {0};
+            bool has_prev = false;
+
+            for (int j = 0; j < env->entities[idx].array_size; j++) {
+                float x = env->entities[idx].traj_x[j];
+                float y = env->entities[idx].traj_y[j];
+                float valid = env->entities[idx].traj_valid[j];
+
+                if (!valid) {
+                    has_prev = false;
+                    continue;
+                }
+
+                Vector3 curr_point = {x, y, 0.5f};
+                if (has_prev) {
+                    DrawLine3D(prev_point, curr_point, Fade(LIGHTGREEN, 0.6f));
+                }
+                prev_point = curr_point;
+                has_prev = true;
+            }
+        }
+    }
+
+    // Draw policy trajectories (blue for all vehicles)
+    for (int i = 0; i < num_agents; i++) {
+        // Draw trajectory lines
+        for (int t = 1; t < frame_count; t++) {
+            if (traj_valid[i * frame_count + t] && traj_valid[i * frame_count + t - 1]) {
+                Vector3 start = {traj_x[i * frame_count + t - 1], traj_y[i * frame_count + t - 1], 1.0f};
+                Vector3 end = {traj_x[i * frame_count + t], traj_y[i * frame_count + t], 1.0f};
+                DrawLine3D(start, end, BLUE);
+            }
+        }
+
+        // Draw start marker
+        if (traj_valid[i * frame_count]) {
+            DrawSphere((Vector3){traj_x[i * frame_count], traj_y[i * frame_count], 1.0f}, 0.8f, BLUE);
+        }
+
+        // Draw end marker (last valid position)
+        for (int t = frame_count - 1; t >= 0; t--) {
+            if (traj_valid[i * frame_count + t]) {
+                DrawSphere((Vector3){traj_x[i * frame_count + t], traj_y[i * frame_count + t], 1.0f}, 0.8f,
+                           Fade(BLUE, 0.5f));
+                break;
+            }
+        }
+    }
+
+    // Draw the scene (roads, vehicles at t=0, curbs, grid)
+    draw_scene(env, client, 1, 0, lasers, show_grid);
+    EndMode3D();
+    EndDrawing();
+
+    TakeScreenshot(output_filename);
+    printf("Snapshot saved to %s\n", output_filename);
+
+    free(traj_x);
+    free(traj_y);
+    free(traj_valid);
+}
+
 int eval_gif(const char *map_name, const char *policy_name, int show_grid, int obs_only, int lasers,
              int show_human_logs, int frame_skip, const char *view_mode, const char *output_topdown,
              const char *output_agent, int num_maps, int zoom_in, int control) {
@@ -338,6 +466,7 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
         .init_steps = conf.init_steps,
         .init_mode = conf.init_mode,
         .control_mode = conf.control_mode,
+        .use_guidance_observations = conf.use_guidance_observations,
         .map_name = (char *)map_name,
     };
 
@@ -392,10 +521,50 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
     if (control == ACTIONS_FROM_POLICY) {
         weights = load_weights(policy_name);
         printf("Active agents in map: %d\n", env.active_agent_count);
-        net = init_drivenet(weights, env.active_agent_count, env.dynamics_model);
+        net = init_drivenet(weights, env.active_agent_count, env.dynamics_model, env.use_guidance_observations);
     }
 
     int frame_count = env.episode_length > 0 ? env.episode_length : TRAJECTORY_LENGTH_DEFAULT;
+
+    // Snapshot mode: render single PNG with trajectory overlays
+    if (conf.snapshot_only) {
+        char snapshot_filename[256];
+        if (output_topdown != NULL) {
+            strcpy(snapshot_filename, output_topdown);
+            char *ext = strrchr(snapshot_filename, '.');
+            if (ext != NULL) {
+                strcpy(ext, ".png");
+            }
+        } else {
+            char policy_base[256];
+            strcpy(policy_base, policy_name);
+            *strrchr(policy_base, '.') = '\0';
+
+            char map[256];
+            strcpy(map, basename((char *)map_name));
+            *strrchr(map, '.') = '\0';
+
+            char video_dir[256];
+            sprintf(video_dir, "%s/video", policy_base);
+            char mkdir_cmd[512];
+            snprintf(mkdir_cmd, sizeof(mkdir_cmd), "mkdir -p \"%s\"", video_dir);
+            system(mkdir_cmd);
+
+            sprintf(snapshot_filename, "%s/video/%s_snapshot.png", policy_base, map);
+        }
+
+        render_snapshot(&env, client, net, frame_count, map_height, show_human_logs, show_grid, lasers, zoom_in,
+                        img_width, img_height, snapshot_filename);
+
+        CloseWindow();
+        free(client);
+        free_allocated(&env);
+        free_drivenet(net);
+        free(weights);
+        return 0;
+    }
+
+    // Video mode (existing behavior)
     char filename_topdown[256];
     char filename_agent[256];
 
@@ -508,6 +677,13 @@ int eval_gif(const char *map_name, const char *policy_name, int show_grid, int o
 }
 
 int main(int argc, char *argv[]) {
+    // Parse configuration from INI file
+    env_init_config conf = {0};
+    const char *ini_file = "pufferlib/config/ocean/drive.ini";
+    if (ini_parse(ini_file, handler, &conf) < 0) {
+        fprintf(stderr, "Error: Could not load %s. Cannot determine environment configuration.\n", ini_file);
+        return -1;
+    }
     // Visualization-only parameters (not in [env] section)
     int show_grid = 0;
     int obs_only = 0;
@@ -522,7 +698,7 @@ int main(int argc, char *argv[]) {
     const char *policy_name = "resources/drive/puffer_drive_weights.bin";
     const char *output_topdown = NULL;
     const char *output_agent = NULL;
-    int num_maps = 100;
+    int num_maps = conf.num_maps;
     int control = ACTIONS_FROM_POLICY; // ACTIONS_INFERRED_FROM_HUMAN;
 
     // Parse command line arguments
diff --git a/pufferlib/ocean/env_binding.h b/pufferlib/ocean/env_binding.h
index 803dcdfc82..3672ab24a0 100644
--- a/pufferlib/ocean/env_binding.h
+++ b/pufferlib/ocean/env_binding.h
@@ -1006,7 +1006,6 @@ static PyObject *vec_collect_expert_data(PyObject *self, PyObject *args) {
     }
 
     int trajectory_length = PyArray_DIM(expert_actions_discrete, 0);
-    int max_obs = 7 + 7 * (MAX_AGENTS - 1) + 7 * MAX_ROAD_SEGMENT_OBSERVATIONS;
 
     // Process each environment
     int agent_offset = 0;
@@ -1014,6 +1013,12 @@ static PyObject *vec_collect_expert_data(PyObject *self, PyObject *args) {
         Env *env = vec->envs[i];
         int num_agents = env->active_agent_count;
 
+        // Compute max_obs dynamically based on environment config
+        int ego_dim_base = (env->dynamics_model == JERK) ? EGO_FEATURES_JERK : EGO_FEATURES_CLASSIC;
+        int guidance_size = env->use_guidance_observations ? GUIDANCE_OBS_SIZE : 0;
+        int ego_dim = ego_dim_base + guidance_size;
+        int max_obs = ego_dim + PARTNER_FEATURES * (MAX_AGENTS - 1) + ROAD_FEATURES * MAX_ROAD_SEGMENT_OBSERVATIONS;
+
         // Allocate temporary buffers for this environment
         float *env_actions_discrete = (float *)malloc(trajectory_length * num_agents * 1 * sizeof(float));
         float *env_actions_continuous = (float *)malloc(trajectory_length * num_agents * 2 * sizeof(float));
@@ -1115,6 +1120,7 @@ PyMODINIT_FUNC PyInit_binding(void) {
     PyModule_AddIntConstant(m, "PARTNER_FEATURES", PARTNER_FEATURES);
     PyModule_AddIntConstant(m, "EGO_FEATURES_CLASSIC", EGO_FEATURES_CLASSIC);
     PyModule_AddIntConstant(m, "EGO_FEATURES_JERK", EGO_FEATURES_JERK);
+    PyModule_AddIntConstant(m, "GUIDANCE_OBS_SIZE", GUIDANCE_OBS_SIZE);
 
     return m;
 }
diff --git a/pufferlib/ocean/env_config.h b/pufferlib/ocean/env_config.h
index f4dd7acdf5..391d0a1c98 100644
--- a/pufferlib/ocean/env_config.h
+++ b/pufferlib/ocean/env_config.h
@@ -19,6 +19,7 @@ typedef struct {
     float guidance_speed_weight;    // Weight for speed deviation penalty
     float guidance_heading_weight;  // Weight for heading deviation penalty
     float waypoint_reach_threshold; // Distance threshold for hitting waypoints
+    int use_guidance_observations;  // Boolean: whether to include egocentric guidance waypoints in observations
     float goal_radius;
     float goal_speed;
     int collision_behavior;
@@ -32,7 +33,9 @@ typedef struct {
     int init_steps;
     int init_mode;
     int control_mode;
+    int num_maps;
     char map_dir[256];
+    int snapshot_only;
 } env_init_config;
 
 // INI file parser handler - parses all environment configuration from drive.ini
@@ -80,6 +83,8 @@ static int handler(void *config, const char *section, const char *name, const ch
         env_config->guidance_heading_weight = atof(value);
     } else if (MATCH("env", "waypoint_reach_threshold")) {
         env_config->waypoint_reach_threshold = atof(value);
+    } else if (MATCH("env", "use_guidance_observations")) {
+        env_config->use_guidance_observations = atoi(value);
     } else if (MATCH("env", "goal_radius")) {
         env_config->goal_radius = atof(value);
     } else if (MATCH("env", "goal_speed")) {
@@ -126,6 +131,10 @@ static int handler(void *config, const char *section, const char *name, const ch
             env_config->map_dir[sizeof(env_config->map_dir) - 1] = '\0';
         }
         // printf("Parsed map_dir: '%s'\n", env_config->map_dir);
+    } else if (MATCH("env", "num_maps")) {
+        env_config->num_maps = atoi(value);
+    } else if (MATCH("render", "snapshot_only")) {
+        env_config->snapshot_only = (strcmp(value, "True") == 0 || strcmp(value, "true") == 0 || atoi(value) == 1);
     } else {
         return 0; // Unknown section/name, indicate failure to handle
     }
diff --git a/pufferlib/ocean/torch.py b/pufferlib/ocean/torch.py
index 98cc11a4ea..57899227b7 100644
--- a/pufferlib/ocean/torch.py
+++ b/pufferlib/ocean/torch.py
@@ -22,8 +22,8 @@ def __init__(self, env, input_size=128, hidden_size=128, **kwargs):
         self.road_features = env.road_features
         self.road_features_after_onehot = env.road_features + 6  # 6 is the number of one-hot encoded categories
 
-        # Determine ego dimension from environment's dynamics model
-        self.ego_dim = 10 if env.dynamics_model == "jerk" else 7
+        # Determine ego dimension from environment (includes guidance observations if enabled)
+        self.ego_dim = env.ego_features
 
         self.ego_encoder = nn.Sequential(
             pufferlib.pytorch.layer_init(nn.Linear(self.ego_dim, input_size)),
diff --git a/pufferlib/pufferl.py b/pufferlib/pufferl.py
index af6e56e66e..0cd116ae47 100644
--- a/pufferlib/pufferl.py
+++ b/pufferlib/pufferl.py
@@ -24,6 +24,7 @@
 
 import numpy as np
 import psutil
+from multiprocessing.pool import ThreadPool
 
 import torch
 import torch.distributed
@@ -430,31 +431,33 @@ def train(self):
                 approx_kl = ((ratio - 1) - logratio).mean()
                 clipfrac = ((ratio - 1.0).abs() > config["clip_coef"]).float().mean()
 
-            # Compute log likelihood loss of human actions under current policy.
-            # 1: Sample a batch of human actions and observations from dataset
-            # Shape: [n_sequences, bptt_horizon, feature_dim]
-            discrete_human_actions, continuous_human_actions, human_observations = (
-                self.vecenv.driver_env.sample_expert_data(n_samples=config["human_sequences"], return_both=True)
-            )
-
-            use_continuous = self.vecenv.driver_env._action_type_flag == 1
-            human_actions = continuous_human_actions if use_continuous else discrete_human_actions
-            human_actions = human_actions.reshape(-1, 1).to(device)
-            human_observations = human_observations.to(device)
+            # Compute log likelihood loss of human actions under current policy (if enabled)
+            if config["human_sequences"] > 0:
+                # 1: Sample a batch of human actions and observations from dataset
+                # Shape: [n_sequences, bptt_horizon, feature_dim]
+                discrete_human_actions, continuous_human_actions, human_observations = (
+                    self.vecenv.driver_env.sample_expert_data(n_samples=config["human_sequences"], return_both=True)
+                )
 
-            # 2: Compute the log-likelihood of human actions under the current policy,
-            # given the corresponding human observations. A higher likelihood indicates
-            # that the policy behaves more like a human under the same observations.
-            human_state = dict(
-                lstm_h=None,
-                lstm_c=None,
-            )
+                # Select appropriate action type for training
+                use_continuous = self.vecenv.driver_env._action_type_flag == 1
+                human_actions = continuous_human_actions if use_continuous else discrete_human_actions
+                human_actions = human_actions.to(device)
+                human_observations = human_observations.to(device)
+
+                # 2: Compute the log-likelihood of human actions under the current policy,
+                # given the corresponding human observations. A higher likelihood indicates
+                # that the policy behaves more like a human under the same observations.
+                human_state = dict(
+                    lstm_h=None,
+                    lstm_c=None,
+                )
 
-            human_logits, _ = self.policy(human_observations, human_state)
+                human_logits, _ = self.policy(human_observations, human_state)
 
-            _, human_log_prob, human_entropy = pufferlib.pytorch.sample_logits(
-                logits=human_logits, action=human_actions
-            )
+                _, human_log_prob, human_entropy = pufferlib.pytorch.sample_logits(
+                    logits=human_logits, action=human_actions
+                )
 
             adv = advantages[idx]
             adv = compute_puff_advantage(
@@ -482,7 +485,10 @@ def train(self):
             v_loss_clipped = (v_clipped - mb_returns) ** 2
             v_loss = 0.5 * torch.max(v_loss_unclipped, v_loss_clipped).mean()
 
-            human_nll = human_log_prob.mean()
+            if config["human_sequences"] > 0:
+                human_nll = human_log_prob.mean()
+            else:
+                human_nll = torch.tensor(0.0, device=device)
 
             entropy_loss = entropy.mean()
 
@@ -507,7 +513,8 @@ def train(self):
             losses["clipfrac"] += clipfrac.item() / self.total_minibatches
             losses["importance"] += ratio.mean().item() / self.total_minibatches
             losses["human_nll"] += human_nll / self.total_minibatches
-            self.realism["human_log_prob"] = human_log_prob.mean().item()
+            if config["human_sequences"] > 0:
+                self.realism["human_log_prob"] = human_log_prob.mean().item()
 
             # Learn on accumulated minibatches
             profile("learn", epoch)
@@ -1077,7 +1084,7 @@ def train(env_name, args=None, vecenv=None, policy=None, logger=None):
     elif args["wandb"]:
         logger = WandbLogger(args)
 
-    train_config = dict(**args["train"], env=env_name, eval=args.get("eval", {}))
+    train_config = dict(**args["train"], env=env_name, eval=args.get("eval", {}), env_args=args.get("env", {}))
     pufferl = PuffeRL(train_config, vecenv, policy, logger)
 
     all_logs = []
@@ -1630,8 +1637,96 @@ def puffer_type(value):
     return args
 
 
+def render(env_name, args=None):
+    args = args or load_config(env_name)
+    render_configs = args.get("render", {})
+
+    # Renders first num_maps from map_dir using visualize binary
+    try:
+        map_dir = render_configs["map_dir"]
+        num_maps = render_configs.get("num_maps", 1)
+        view_mode = render_configs["view_mode"]
+        render_policy_path = render_configs["policy_path"]
+        overwork = render_configs.get("overwork", False)
+        num_workers = args["vec"]["num_workers"]
+        output_dir = render_configs["output_dir"]
+    except KeyError as e:
+        raise pufferlib.APIUsageError(f"Missing render config: {e}")
+
+    cpu_cores = psutil.cpu_count(logical=False)
+    if num_workers > cpu_cores and not overwork:
+        raise pufferlib.APIUsageError(
+            " ".join(
+                [
+                    f"num_workers ({num_workers}) > hardware cores ({cpu_cores}) is disallowed by default.",
+                    "PufferLib multiprocessing is heavily optimized for 1 process per hardware core.",
+                    "If you really want to do this, set overwork=True (--vec-overwork in our demo.py).",
+                ]
+            )
+        )
+
+    if num_maps > len(os.listdir(map_dir)):
+        num_maps = len(os.listdir(map_dir))
+
+    render_maps = [os.path.join(map_dir, f) for f in sorted(os.listdir(map_dir)) if f.endswith(".bin")][:num_maps]
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Rebuild visualize binary
+    ensure_drive_binary()
+
+    snapshot_only = render_configs.get("snapshot_only", False)
+
+    def render_task(map_path):
+        base_cmd = (
+            ["./visualize"]
+            if sys.platform == "darwin"
+            else ["xvfb-run", "-a", "-s", "-screen 0 1280x720x24", "./visualize"]
+        )
+        cmd = base_cmd.copy()
+        cmd.extend(["--map-name", map_path])
+        if render_configs.get("show_grid", False):
+            cmd.append("--show-grid")
+        if render_configs.get("obs_only", False):
+            cmd.append("--obs-only")
+        if render_configs.get("show_lasers", False):
+            cmd.append("--lasers")
+        if render_configs.get("show_human_logs", False):
+            cmd.append("--show-human-logs")
+        if render_configs.get("zoom_in", False):
+            cmd.append("--zoom-in")
+        cmd.extend(["--view", view_mode])
+        if render_policy_path is not None:
+            cmd.extend(["--policy-name", render_policy_path])
+
+        map_name = os.path.basename(map_path).replace(".bin", "")
+
+        if snapshot_only:
+            cmd.extend(["--output-topdown", os.path.join(output_dir, f"topdown_{map_name}.png")])
+        else:
+            if view_mode == "topdown" or view_mode == "both":
+                cmd.extend(["--output-topdown", os.path.join(output_dir, f"topdown_{map_name}.mp4")])
+            if view_mode == "agent" or view_mode == "both":
+                cmd.extend(["--output-agent", os.path.join(output_dir, f"agent_{map_name}.mp4")])
+
+        env_vars = os.environ.copy()
+        env_vars["ASAN_OPTIONS"] = "exitcode=0"
+        try:
+            result = subprocess.run(cmd, cwd=os.getcwd(), capture_output=True, text=True, timeout=600, env=env_vars)
+            if result.returncode != 0:
+                print(f"Error rendering {map_name}: {result.stderr}")
+        except subprocess.TimeoutExpired:
+            print(f"Timeout rendering {map_name}: exceeded 600 seconds")
+
+    if render_maps:
+        output_type = "snapshots" if snapshot_only else "videos"
+        print(f"Rendering {len(render_maps)} {output_type} from {map_dir} with {num_workers} workers...")
+        with ThreadPool(num_workers) as pool:
+            pool.map(render_task, render_maps)
+        print(f"Finished rendering {output_type} to {output_dir}")
+
+
 def main():
-    err = "Usage: puffer [train, eval, sweep, controlled_exp, autotune, profile, export, sanity] [env_name] [optional args]. --help for more info"
+    err = "Usage: puffer [train, eval, sweep, controlled_exp, autotune, profile, export, sanity, render] [env_name] [optional args]. --help for more info"
     if len(sys.argv) < 3:
         raise pufferlib.APIUsageError(err)
 
@@ -1653,6 +1748,8 @@ def main():
         export(env_name=env_name)
     elif mode == "sanity":
         sanity(env_name=env_name)
+    elif mode == "render":
+        render(env_name=env_name)
     else:
         raise pufferlib.APIUsageError(err)
 
diff --git a/pufferlib/utils.py b/pufferlib/utils.py
index e82c1cd620..12611fba47 100644
--- a/pufferlib/utils.py
+++ b/pufferlib/utils.py
@@ -24,6 +24,7 @@ def run_human_replay_eval_in_subprocess(config, logger, global_step):
 
         # Prepare evaluation command
         eval_config = config["eval"]
+        env_config = config.get("env_args", {})
         cmd = [
             sys.executable,
             "-m",
@@ -40,6 +41,11 @@ def run_human_replay_eval_in_subprocess(config, logger, global_step):
             str(eval_config["human_replay_num_agents"]),
             "--eval.human-replay-control-mode",
             str(eval_config["human_replay_control_mode"]),
+            # Pass through environment config that affects observation size
+            "--env.use-guidance-observations",
+            str(env_config.get("use_guidance_observations", 0)),
+            "--env.dynamics-model",
+            str(env_config.get("dynamics_model", "classic")),
         ]
 
         # Run human replay evaluation in subprocess
@@ -100,6 +106,7 @@ def run_wosac_eval_in_subprocess(config, logger, global_step):
 
         # Prepare evaluation command
         eval_config = config.get("eval", {})
+        env_config = config.get("env_args", {})
         cmd = [
             sys.executable,
             "-m",
@@ -126,6 +133,13 @@ def run_wosac_eval_in_subprocess(config, logger, global_step):
             str(eval_config.get("wosac_goal_radius", 1.0)),
             "--eval.wosac-sanity-check",
             str(eval_config.get("wosac_sanity_check", False)),
+            "--eval.wosac-aggregate-results",
+            str(eval_config.get("wosac_aggregate_results", True)),
+            # Pass through environment config that affects observation size
+            "--env.use-guidance-observations",
+            str(env_config.get("use_guidance_observations", 0)),
+            "--env.dynamics-model",
+            str(env_config.get("dynamics_model", "classic")),
         ]
 
         if not model_files: