Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions pufferlib/config/ocean/adaptive.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@
package = ocean
env_name = puffer_adaptive_drive
policy_name = Drive
transformer_name = Transformer
; Changed from rnn_name
rnn_name = Recurrent

[vec]
num_workers = 16
num_envs = 16
batch_size = 2
batch_size = 1
; backend = Serial

[policy]
input_size = 128
; Increased from 64 for richer representations
input_size = 64
hidden_size = 256

[rnn]
input_size = 256
hidden_size = 256

[transformer]
Expand All @@ -29,8 +31,8 @@ dropout = 0.0
; Dropout (keep at 0 for RL stability initially)

[env]
num_agents = 1512
num_ego_agents = 756
num_agents = 1024
num_ego_agents = 512
; Options: discrete, continuous
action_type = discrete
; Options: classic, jerk
Expand Down Expand Up @@ -120,7 +122,7 @@ minibatch_size = 36400
; 400 * 91
max_minibatch_size = 36400
minibatch_multiplier = 400
policy_architecture = Transformer
policy_architecture = Recurrent
; Matches scenario_length for buffer organization
bptt_horizon = 32
; Keep for backward compatibility
Expand Down Expand Up @@ -193,6 +195,8 @@ human_replay_num_agents = 32
human_replay_num_rollouts = 100
; Number of maps to use for human replay evaluation
human_replay_num_maps = 100
; Number of maps to render for human replay (subset of eval maps)
human_replay_render_num_maps = 3

[sweep.train.learning_rate]
distribution = log_normal
Expand Down
20 changes: 9 additions & 11 deletions pufferlib/ocean/drive/drive.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,18 @@ void test_drivenet() {
void demo() {

// Note: The settings below are hardcoded for demo purposes. Since the policy was
// trained with these exact settings, that changing them may lead to
// weird behavior.
// trained with these exact settings, changing them may lead to weird behavior.
Drive env = {
.human_agent_idx = 0,
.dynamics_model = conf.dynamics_model,
.reward_vehicle_collision = conf.reward_vehicle_collision,
.reward_offroad_collision = conf.reward_offroad_collision,
.reward_ade = conf.reward_ade,
.goal_radius = conf.goal_radius,
.dt = conf.dt,
.dynamics_model = CLASSIC,
.reward_vehicle_collision = -1.0f,
.reward_offroad_collision = -1.0f,
.goal_radius = 2.0f,
.dt = 0.1f,
.map_name = "resources/drive/binaries/training/map_000.bin",
.init_steps = conf.init_steps,
.collision_behavior = conf.collision_behavior,
.offroad_behavior = conf.offroad_behavior,
.init_steps = 0,
.collision_behavior = 0,
.offroad_behavior = 0,
};
allocate(&env);
c_reset(&env);
Expand Down
Loading
Loading