Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,4 @@ pufferlib/resources/drive/output*.mp4

# Local TODO tracking
TODO.md
*.mp4
22 changes: 13 additions & 9 deletions pufferlib/config/ocean/adaptive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@
package = ocean
env_name = puffer_adaptive_drive
policy_name = Drive
transformer_name = Transformer
; Changed from rnn_name
rnn_name = Recurrent

[vec]
num_workers = 16
num_envs = 16
batch_size = 2
batch_size = 1
; backend = Serial

[policy]
input_size = 128
; Increased from 64 for richer representations
input_size = 64
hidden_size = 256

[rnn]
input_size = 256
hidden_size = 256

[transformer]
Expand All @@ -29,8 +31,8 @@ dropout = 0.0
; Dropout (keep at 0 for RL stability initially)

[env]
num_agents = 1512
num_ego_agents = 756
num_agents = 1024
num_ego_agents = 512
; Options: discrete, continuous
action_type = discrete
; Options: classic, jerk
Expand Down Expand Up @@ -120,7 +122,7 @@ minibatch_size = 36400
; 400 * 91
max_minibatch_size = 36400
minibatch_multiplier = 400
policy_architecture = Transformer
policy_architecture = Recurrent
; Matches scenario_length for buffer organization
bptt_horizon = 32
; Keep for backward compatibility
Expand All @@ -131,7 +133,7 @@ clip_coef = 0.2
ent_coef = 0.005
gae_lambda = 0.95
gamma = 0.98
learning_rate = 0.0003
learning_rate = 0.003
; Reduced from 0.003 (transformers often need lower LR)
max_grad_norm = 1.0
prio_alpha = 0.85
Expand Down Expand Up @@ -193,6 +195,8 @@ human_replay_num_agents = 32
human_replay_num_rollouts = 100
; Number of maps to use for human replay evaluation
human_replay_num_maps = 100
; Number of maps to render for human replay (subset of eval maps)
human_replay_render_num_maps = 3

[sweep.train.learning_rate]
distribution = log_normal
Expand Down
42 changes: 23 additions & 19 deletions pufferlib/config/ocean/drive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
package = ocean
env_name = puffer_drive
policy_name = Drive
rnn_name = Transformer
rnn_name = Recurrent

[vec]
num_workers = 16
Expand All @@ -14,9 +14,9 @@ batch_size = 2
input_size = 64
hidden_size = 256

; [rnn]
; input_size = 256
; hidden_size = 256
[rnn]
input_size = 256
hidden_size = 256

[transformer]
input_size = 256
Expand Down Expand Up @@ -112,15 +112,13 @@ discount_weight_ub = 0.80
[train]
seed=42
total_timesteps = 2_000_000_000
# learning_rate = 0.02
# gamma = 0.985
anneal_lr = True
; Needs to be: num_agents * num_workers * BPTT horizon
; Needs to be: num_agents * num_workers * context_window
batch_size = auto
minibatch_size = 32768
max_minibatch_size = 32768
; minibatch_size = 256
; max_minibatch_size = 256
minibatch_multiplier = 400
policy_architecture = Recurrent
bptt_horizon = 32
adam_beta1 = 0.9
adam_beta2 = 0.999
Expand All @@ -130,17 +128,15 @@ ent_coef = 0.005
gae_lambda = 0.95
gamma = 0.98
learning_rate = 0.003
max_grad_norm = 1
prio_alpha = 0.8499999999999999
prio_beta0 = 0.8499999999999999
max_grad_norm = 1.0
prio_alpha = 0.85
prio_beta0 = 0.85
update_epochs = 1
vf_clip_coef = 0.1999999999999999
vf_coef = 2
vf_clip_coef = 0.2
vf_coef = 2.0
vtrace_c_clip = 1
vtrace_rho_clip = 1
checkpoint_interval = 100
use_transformer = True
context_window = 32
checkpoint_interval = 10
# Rendering options
render = True
render_interval = 100
Expand Down Expand Up @@ -184,8 +180,16 @@ wosac_sanity_check = False
wosac_aggregate_results = True
; If True, enable human replay evaluation (pair policy-controlled agent with human replays)
human_replay_eval = False
; Control only the self-driving car
human_replay_control_mode = "control_sdc_only"
; Control mode for human replay (control_vehicles with max_controlled_agents=1 controls one agent)
human_replay_control_mode = "control_vehicles"
; Number of agents in human replay evaluation environment
human_replay_num_agents = 32
; Number of rollouts for human replay evaluation
human_replay_num_rollouts = 100
; Number of maps to use for human replay evaluation
human_replay_num_maps = 100
; Number of maps to render for human replay (subset of eval maps)
human_replay_render_num_maps = 3

[sweep.train.learning_rate]
distribution = log_normal
Expand Down
20 changes: 9 additions & 11 deletions pufferlib/ocean/drive/drive.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,18 @@ void test_drivenet() {
void demo() {

// Note: The settings below are hardcoded for demo purposes. Since the policy was
// trained with these exact settings, that changing them may lead to
// weird behavior.
// trained with these exact settings, changing them may lead to weird behavior.
Drive env = {
.human_agent_idx = 0,
.dynamics_model = conf.dynamics_model,
.reward_vehicle_collision = conf.reward_vehicle_collision,
.reward_offroad_collision = conf.reward_offroad_collision,
.reward_ade = conf.reward_ade,
.goal_radius = conf.goal_radius,
.dt = conf.dt,
.dynamics_model = CLASSIC,
.reward_vehicle_collision = -1.0f,
.reward_offroad_collision = -1.0f,
.goal_radius = 2.0f,
.dt = 0.1f,
.map_name = "resources/drive/binaries/training/map_000.bin",
.init_steps = conf.init_steps,
.collision_behavior = conf.collision_behavior,
.offroad_behavior = conf.offroad_behavior,
.init_steps = 0,
.collision_behavior = 0,
.offroad_behavior = 0,
};
allocate(&env);
c_reset(&env);
Expand Down
3 changes: 2 additions & 1 deletion pufferlib/ocean/drive/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1084,7 +1084,8 @@ def test_performance(timeout=10, atn_cache=1024, num_agents=1024):
if __name__ == "__main__":
# test_performance()
# Process the train dataset
process_all_maps(data_folder="/data/processed/training")
# process_all_maps(data_folder="/data/processed/training")
process_all_maps(data_folder="/data/nuplan_gpudrive/nuplan")
# Process the validation/test dataset
# process_all_maps(data_folder="data/processed/validation")
# # Process the validation_interactive dataset
Expand Down
Loading