From ced67e257e41fe54bed445195f67281d3bbf294e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Wa=C3=ABl=20Doulazmi?= Date: Fri, 16 Jan 2026 18:23:39 +0000 Subject: [PATCH 01/17] Add a Flag to build_ocean so Raylib can work on Debian 11 --- scripts/build_ocean.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build_ocean.sh b/scripts/build_ocean.sh index f090291120..a737f5654d 100755 --- a/scripts/build_ocean.sh +++ b/scripts/build_ocean.sh @@ -101,6 +101,7 @@ FLAGS=( $LINK_ARCHIVES -lm -lpthread + -ldl $ERROR_LIMIT_FLAG -DPLATFORM_DESKTOP ) From 2dff5d2e87bf4def9dcaeff1a86a568b7535cef4 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 17 Mar 2026 22:23:54 +0000 Subject: [PATCH 02/17] Add shared map infrastructure to reduce memory usage Environments sharing the same map binary now share read-only road elements, grid maps, and neighbor caches via a reference-counted SharedMapData cache. This eliminates duplicate allocations (~36-73 MB per CARLA map) across envs. Co-Authored-By: Claude Opus 4.6 --- pufferlib/ocean/drive/binding.c | 202 +++++++++++++++----------- pufferlib/ocean/drive/drive.h | 245 +++++++++++++++++++++++++++++--- pufferlib/ocean/drive/drive.py | 3 + 3 files changed, 349 insertions(+), 101 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index c51942f61c..d367452685 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -1,7 +1,33 @@ +#include #include "drive.h" #define Env Drive #define MY_SHARED #define MY_PUT + +// Module-level map cache: indexed by map_id, populated by my_shared(), used by my_init(). +static SharedMapData **g_map_cache = NULL; +static int g_map_cache_size = 0; + +static void release_map_cache_internal(void) { + if (g_map_cache == NULL) return; + for (int i = 0; i < g_map_cache_size; i++) { + if (g_map_cache[i] != NULL) { + free_shared_map_data(g_map_cache[i]); + g_map_cache[i] = NULL; + } + } + free(g_map_cache); + g_map_cache = NULL; + g_map_cache_size = 0; +} + +static PyObject *release_map_cache_py(PyObject *self, PyObject *args) { + release_map_cache_internal(); + Py_RETURN_NONE; +} + +#define MY_METHODS {"release_map_cache", release_map_cache_py, METH_VARARGS, "Release the shared map data cache"} + #include "../env_binding.h" static int my_put(Env *env, PyObject *args, PyObject *kwargs) { @@ -134,6 +160,33 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { clock_gettime(CLOCK_REALTIME, &ts); srand(ts.tv_nsec); + // Release any existing map cache from a previous call + release_map_cache_internal(); + + // Build reward_bounds array for create_shared_map_data + RewardBound reward_bounds[NUM_REWARD_COEFS]; + reward_bounds[REWARD_COEF_GOAL_RADIUS] = (RewardBound){reward_bound_goal_radius_min, reward_bound_goal_radius_max}; + reward_bounds[REWARD_COEF_COLLISION] = (RewardBound){reward_bound_collision_min, reward_bound_collision_max}; + reward_bounds[REWARD_COEF_OFFROAD] = (RewardBound){reward_bound_offroad_min, reward_bound_offroad_max}; + reward_bounds[REWARD_COEF_COMFORT] = (RewardBound){reward_bound_comfort_min, reward_bound_comfort_max}; + reward_bounds[REWARD_COEF_LANE_ALIGN] = (RewardBound){reward_bound_lane_align_min, reward_bound_lane_align_max}; + reward_bounds[REWARD_COEF_LANE_CENTER] = (RewardBound){reward_bound_lane_center_min, reward_bound_lane_center_max}; + reward_bounds[REWARD_COEF_VELOCITY] = (RewardBound){reward_bound_velocity_min, reward_bound_velocity_max}; + reward_bounds[REWARD_COEF_TRAFFIC_LIGHT] = + (RewardBound){reward_bound_traffic_light_min, reward_bound_traffic_light_max}; + reward_bounds[REWARD_COEF_CENTER_BIAS] = (RewardBound){reward_bound_center_bias_min, reward_bound_center_bias_max}; + reward_bounds[REWARD_COEF_VEL_ALIGN] = (RewardBound){reward_bound_vel_align_min, reward_bound_vel_align_max}; + reward_bounds[REWARD_COEF_OVERSPEED] = (RewardBound){reward_bound_overspeed_min, reward_bound_overspeed_max}; + reward_bounds[REWARD_COEF_TIMESTEP] = (RewardBound){reward_bound_timestep_min, reward_bound_timestep_max}; + reward_bounds[REWARD_COEF_REVERSE] = (RewardBound){reward_bound_reverse_min, reward_bound_reverse_max}; + reward_bounds[REWARD_COEF_THROTTLE] = (RewardBound){reward_bound_throttle_min, reward_bound_throttle_max}; + reward_bounds[REWARD_COEF_STEER] = (RewardBound){reward_bound_steer_min, reward_bound_steer_max}; + reward_bounds[REWARD_COEF_ACC] = (RewardBound){reward_bound_acc_min, reward_bound_acc_max}; + + // Allocate map cache indexed by map_id (0..num_maps-1) + g_map_cache_size = num_maps; + g_map_cache = (SharedMapData **)calloc(num_maps, sizeof(SharedMapData *)); + int max_envs = use_all_maps ? num_maps : num_agents; if (init_mode == INIT_VARIABLE_AGENT_NUMBER) { @@ -165,9 +218,21 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { int offset = 0; for (int i = 0; i < env_count; i++) { + int map_id = rand() % num_maps; PyList_SetItem(agent_offsets, i, PyLong_FromLong(offset)); - PyList_SetItem(map_ids_list, i, PyLong_FromLong(rand() % num_maps)); + PyList_SetItem(map_ids_list, i, PyLong_FromLong(map_id)); offset += agent_counts[i]; + + // Lazily populate map cache for assigned maps + if (g_map_cache[map_id] == NULL) { + PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); + const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); + g_map_cache[map_id] = create_shared_map_data( + map_file_path, init_mode, control_mode, init_steps, goal_behavior, + reward_randomization, turn_off_normalization, reward_conditioning, + min_goal_distance, max_goal_distance, min_avg_speed_to_consider_goal_attempt, + reward_bounds); + } } PyList_SetItem(agent_offsets, env_count, PyLong_FromLong(num_agents)); // In random mode, we guarantee num_agents accross all envs @@ -186,71 +251,48 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *agent_offsets = PyList_New(max_envs + 1); PyObject *map_ids = PyList_New(max_envs); - // getting env count + // getting env count — use cached SharedMapData to count agents instead of loading per-env while (use_all_maps ? map_idx < max_envs : total_agent_count < num_agents && env_count < max_envs) { int map_id = use_all_maps ? map_idx++ : rand() % num_maps; - Drive *env = calloc(1, sizeof(Drive)); - env->init_mode = init_mode; - env->control_mode = control_mode; - env->init_steps = init_steps; - env->goal_behavior = goal_behavior; - env->reward_randomization = reward_randomization; - env->turn_off_normalization = turn_off_normalization; - env->reward_conditioning = reward_conditioning; - env->min_goal_distance = min_goal_distance; - env->max_goal_distance = max_goal_distance; - env->min_avg_speed_to_consider_goal_attempt = min_avg_speed_to_consider_goal_attempt; - // reward randomization bounds - env->reward_bounds[REWARD_COEF_GOAL_RADIUS] = - (RewardBound){reward_bound_goal_radius_min, reward_bound_goal_radius_max}; - env->reward_bounds[REWARD_COEF_COLLISION] = - (RewardBound){reward_bound_collision_min, reward_bound_collision_max}; - env->reward_bounds[REWARD_COEF_OFFROAD] = (RewardBound){reward_bound_offroad_min, reward_bound_offroad_max}; - env->reward_bounds[REWARD_COEF_COMFORT] = (RewardBound){reward_bound_comfort_min, reward_bound_comfort_max}; - env->reward_bounds[REWARD_COEF_LANE_ALIGN] = - (RewardBound){reward_bound_lane_align_min, reward_bound_lane_align_max}; - env->reward_bounds[REWARD_COEF_LANE_CENTER] = - (RewardBound){reward_bound_lane_center_min, reward_bound_lane_center_max}; - env->reward_bounds[REWARD_COEF_VELOCITY] = (RewardBound){reward_bound_velocity_min, reward_bound_velocity_max}; - env->reward_bounds[REWARD_COEF_TRAFFIC_LIGHT] = - (RewardBound){reward_bound_traffic_light_min, reward_bound_traffic_light_max}; - env->reward_bounds[REWARD_COEF_CENTER_BIAS] = - (RewardBound){reward_bound_center_bias_min, reward_bound_center_bias_max}; - env->reward_bounds[REWARD_COEF_VEL_ALIGN] = - (RewardBound){reward_bound_vel_align_min, reward_bound_vel_align_max}; - env->reward_bounds[REWARD_COEF_OVERSPEED] = - (RewardBound){reward_bound_overspeed_min, reward_bound_overspeed_max}; - env->reward_bounds[REWARD_COEF_TIMESTEP] = (RewardBound){reward_bound_timestep_min, reward_bound_timestep_max}; - env->reward_bounds[REWARD_COEF_REVERSE] = (RewardBound){reward_bound_reverse_min, reward_bound_reverse_max}; - env->reward_bounds[REWARD_COEF_THROTTLE] = (RewardBound){reward_bound_throttle_min, reward_bound_throttle_max}; - env->reward_bounds[REWARD_COEF_STEER] = (RewardBound){reward_bound_steer_min, reward_bound_steer_max}; - env->reward_bounds[REWARD_COEF_ACC] = (RewardBound){reward_bound_acc_min, reward_bound_acc_max}; - - // Get map file path from Python list - PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); - const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); - load_map_binary(map_file_path, env); - set_active_agents(env); + + // Lazily populate map cache + if (g_map_cache[map_id] == NULL) { + PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); + const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); + g_map_cache[map_id] = create_shared_map_data( + map_file_path, init_mode, control_mode, init_steps, goal_behavior, + reward_randomization, turn_off_normalization, reward_conditioning, + min_goal_distance, max_goal_distance, min_avg_speed_to_consider_goal_attempt, + reward_bounds); + } + SharedMapData *shared = g_map_cache[map_id]; + + // Count active agents using a temporary Drive (doesn't allocate map data) + Drive temp_env = {0}; + temp_env.init_mode = init_mode; + temp_env.control_mode = control_mode; + temp_env.init_steps = init_steps; + temp_env.goal_behavior = goal_behavior; + temp_env.reward_randomization = reward_randomization; + temp_env.turn_off_normalization = turn_off_normalization; + temp_env.reward_conditioning = reward_conditioning; + temp_env.agents = shared->template_agents; + temp_env.num_objects = shared->num_objects; + temp_env.sdc_track_index = shared->sdc_track_index; + temp_env.num_tracks_to_predict = shared->num_tracks_to_predict; + temp_env.tracks_to_predict_indices = shared->tracks_to_predict_indices; + set_active_agents(&temp_env); + int active_count = temp_env.active_agent_count; + + // Free the index arrays that set_active_agents allocated + free(temp_env.active_agent_indices); + free(temp_env.static_agent_indices); + free(temp_env.expert_static_agent_indices); // Skip map if it doesn't contain any controllable agents - if (env->active_agent_count == 0) { + if (active_count == 0) { maps_checked++; - - // Safeguard: if we've checked all available maps and found no active agents, raise an error if (maps_checked >= num_maps) { - for (int j = 0; j < env->num_objects; j++) { - free_agent(&env->agents[j]); - } - for (int j = 0; j < env->num_roads; j++) { - free_road_element(&env->road_elements[j]); - } - free(env->agents); - free(env->road_elements); - free(env->road_scenario_ids); - free(env->active_agent_indices); - free(env->static_agent_indices); - free(env->expert_static_agent_indices); - free(env); Py_DECREF(agent_offsets); Py_DECREF(map_ids); char error_msg[256]; @@ -258,30 +300,14 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { PyErr_SetString(PyExc_ValueError, error_msg); return NULL; } - - // Store map_id - PyObject *map_id_obj = PyLong_FromLong(map_id); - PyList_SetItem(map_ids, env_count, map_id_obj); - // Store agent offset - PyObject *offset = PyLong_FromLong(total_agent_count); - PyList_SetItem(agent_offsets, env_count, offset); - total_agent_count += env->active_agent_count; - env_count++; - for (int j = 0; j < env->num_objects; j++) { - free_agent(&env->agents[j]); - } - for (int j = 0; j < env->num_roads; j++) { - free_road_element(&env->road_elements[j]); - } - free(env->agents); - free(env->road_elements); - free(env->road_scenario_ids); - free(env->active_agent_indices); - free(env->static_agent_indices); - free(env->expert_static_agent_indices); - free(env); continue; } + + // Store map_id and agent offset + PyList_SetItem(map_ids, env_count, PyLong_FromLong(map_id)); + PyList_SetItem(agent_offsets, env_count, PyLong_FromLong(total_agent_count)); + total_agent_count += active_count; + env_count++; } if (total_agent_count >= num_agents) { @@ -399,6 +425,18 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { env->map_name = map_path; env->init_steps = init_steps; env->timestep = init_steps; + + // Check if map_id was provided and the map cache is populated + PyObject *map_id_obj = kwargs ? PyDict_GetItemString(kwargs, "map_id") : NULL; + if (map_id_obj != NULL && g_map_cache != NULL) { + int map_id = (int)PyLong_AsLong(map_id_obj); + if (map_id >= 0 && map_id < g_map_cache_size && g_map_cache[map_id] != NULL) { + init_from_shared(env, g_map_cache[map_id]); + return 0; + } + } + + // Fallback: load from disk (standalone use, tests, rendering, etc.) init(env); return 0; } diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index cabd5b0817..181391732e 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -279,6 +279,36 @@ struct AgentSpawnSettings { float h; }; +// Shared map data that can be reused across multiple environments using the same map. +// Holds road infrastructure (road_elements, grid_map, neighbor_cache) and agent templates. +// Reference-counted: freed when no env references it and the cache is released. +typedef struct SharedMapData SharedMapData; +struct SharedMapData { + // Road infrastructure (shared read-only across envs) + RoadMapElement *road_elements; + int *road_scenario_ids; + int num_roads; + + // Spatial index + GridMap *grid_map; + int *neighbor_offsets; + + // World coordinate means + float world_mean_x; + float world_mean_y; + float world_mean_z; + + // Agent templates from binary (for cloning into per-env agents) + Agent *template_agents; + int num_objects; + int sdc_track_index; + int num_tracks_to_predict; + int *tracks_to_predict_indices; + + // Reference counting + int ref_count; +}; + struct Drive { Client *client; float *observations; @@ -351,6 +381,9 @@ struct Drive { int turn_off_normalization; RewardBound reward_bounds[NUM_REWARD_COEFS]; float min_avg_speed_to_consider_goal_attempt; + + // Shared map data (NULL if map data is owned by this env) + SharedMapData *shared_map; }; // ======================================== @@ -2058,6 +2091,7 @@ void remove_bad_trajectories(Drive *env) { void init(Drive *env) { env->human_agent_idx = 0; env->timestep = 0; + env->shared_map = NULL; load_map_binary(env->map_name, env); set_means(env); init_grid_map(env); @@ -2075,34 +2109,207 @@ void init(Drive *env) { env->logs = (Log *)calloc(env->active_agent_count, sizeof(Log)); } -void c_close(Drive *env) { - free_agents(env->agents, env->num_objects); - for (int i = 0; i < env->num_roads; i++) { - free_road_element(&env->road_elements[i]); +// Initialize an environment using shared map data instead of loading from disk. +// Road elements, grid_map, and neighbor cache are shared (pointer copy, not duplicated). +// Agents are cloned per-env since they have mutable sim state. +void init_from_shared(Drive *env, SharedMapData *shared) { + env->human_agent_idx = 0; + env->timestep = 0; + env->shared_map = shared; + shared->ref_count++; + + // Point to shared road/grid data (read-only, not duplicated) + env->road_elements = shared->road_elements; + env->road_scenario_ids = shared->road_scenario_ids; + env->num_roads = shared->num_roads; + env->grid_map = shared->grid_map; + env->neighbor_offsets = shared->neighbor_offsets; + env->world_mean_x = shared->world_mean_x; + env->world_mean_y = shared->world_mean_y; + env->world_mean_z = shared->world_mean_z; + env->sdc_track_index = shared->sdc_track_index; + env->num_tracks_to_predict = shared->num_tracks_to_predict; + + // Copy tracks_to_predict_indices (small, per-env) + if (shared->num_tracks_to_predict > 0) { + env->tracks_to_predict_indices = (int *)malloc(shared->num_tracks_to_predict * sizeof(int)); + memcpy(env->tracks_to_predict_indices, shared->tracks_to_predict_indices, + shared->num_tracks_to_predict * sizeof(int)); + } else { + env->tracks_to_predict_indices = NULL; } - free(env->road_elements); - free(env->road_scenario_ids); - free(env->active_agent_indices); - free(env->logs); - // GridMap cleanup - int grid_cell_count = env->grid_map->grid_cols * env->grid_map->grid_rows; - for (int grid_index = 0; grid_index < grid_cell_count; grid_index++) { - free(env->grid_map->cells[grid_index]); + + // Generate collision/z offsets (static globals, idempotent) + generate_offsets(collision_offsets, COLLISION_RANGE); + generate_offsets(z_offsets, Z_RANGE); + + if (env->init_mode == INIT_VARIABLE_AGENT_NUMBER) { + // Variable agent mode: agents are spawned fresh, not cloned from template. + // Set num_objects=0 so spawn_active_agents' free_agents() call is a no-op. + env->agents = NULL; + env->num_objects = 0; + } else { + // Clone agents from template (each env needs its own mutable copy) + env->num_objects = shared->num_objects; + env->agents = (Agent *)calloc(shared->num_objects, sizeof(Agent)); + for (int i = 0; i < shared->num_objects; i++) { + Agent *src = &shared->template_agents[i]; + Agent *dst = &env->agents[i]; + // Copy all scalar fields + *dst = *src; + // Deep copy trajectory arrays + allocate_agent_trajectories(dst); + memcpy(dst->log_trajectory_x, src->log_trajectory_x, src->trajectory_length * sizeof(float)); + memcpy(dst->log_trajectory_y, src->log_trajectory_y, src->trajectory_length * sizeof(float)); + memcpy(dst->log_trajectory_z, src->log_trajectory_z, src->trajectory_length * sizeof(float)); + memcpy(dst->log_velocity_x, src->log_velocity_x, src->trajectory_length * sizeof(float)); + memcpy(dst->log_velocity_y, src->log_velocity_y, src->trajectory_length * sizeof(float)); + memcpy(dst->log_heading, src->log_heading, src->trajectory_length * sizeof(float)); + memcpy(dst->log_valid, src->log_valid, src->trajectory_length * sizeof(int)); + // Route and path are per-env, start NULL + dst->route = NULL; + dst->path = NULL; + } } - free(env->grid_map->cells); - free(env->grid_map->cell_entities_count); - free(env->neighbor_offsets); + env->logs_capacity = 0; + set_active_agents(env); + env->logs_capacity = env->active_agent_count; + remove_bad_trajectories(env); + set_start_position(env); + init_goal_positions(env); + env->logs = (Log *)calloc(env->active_agent_count, sizeof(Log)); +} + +// Free a SharedMapData and all data it owns. +void free_shared_map_data(SharedMapData *shared) { + if (shared == NULL) return; + + // Free road elements + for (int i = 0; i < shared->num_roads; i++) { + free_road_element(&shared->road_elements[i]); + } + free(shared->road_elements); + free(shared->road_scenario_ids); + + // Free grid map + int grid_cell_count = shared->grid_map->grid_cols * shared->grid_map->grid_rows; + for (int grid_index = 0; grid_index < grid_cell_count; grid_index++) { + free(shared->grid_map->cells[grid_index]); + } + free(shared->grid_map->cells); + free(shared->grid_map->cell_entities_count); + free(shared->neighbor_offsets); for (int i = 0; i < grid_cell_count; i++) { - free(env->grid_map->neighbor_cache_entities[i]); + free(shared->grid_map->neighbor_cache_entities[i]); } - free(env->grid_map->neighbor_cache_entities); - free(env->grid_map->neighbor_cache_count); - free(env->grid_map); + free(shared->grid_map->neighbor_cache_entities); + free(shared->grid_map->neighbor_cache_count); + free(shared->grid_map); + + // Free template agents + free_agents(shared->template_agents, shared->num_objects); + + // Free tracks_to_predict + free(shared->tracks_to_predict_indices); + + free(shared); +} + +// Create a SharedMapData by loading a map binary and building all spatial structures. +SharedMapData *create_shared_map_data(const char *map_file_path, int init_mode, int control_mode, + int init_steps, int goal_behavior, int reward_randomization, + int turn_off_normalization, int reward_conditioning, + float min_goal_distance, float max_goal_distance, + float min_avg_speed_to_consider_goal_attempt, + RewardBound *reward_bounds) { + SharedMapData *shared = (SharedMapData *)calloc(1, sizeof(SharedMapData)); + + // Create a temporary Drive to use existing loading functions + Drive temp = {0}; + temp.init_mode = init_mode; + temp.control_mode = control_mode; + temp.init_steps = init_steps; + temp.goal_behavior = goal_behavior; + temp.reward_randomization = reward_randomization; + temp.turn_off_normalization = turn_off_normalization; + temp.reward_conditioning = reward_conditioning; + temp.min_goal_distance = min_goal_distance; + temp.max_goal_distance = max_goal_distance; + temp.min_avg_speed_to_consider_goal_attempt = min_avg_speed_to_consider_goal_attempt; + memcpy(temp.reward_bounds, reward_bounds, NUM_REWARD_COEFS * sizeof(RewardBound)); + + // Load map binary (allocates agents, road_elements, etc.) + load_map_binary(map_file_path, &temp); + + // Transfer ownership to shared struct + shared->template_agents = temp.agents; + shared->num_objects = temp.num_objects; + shared->num_roads = temp.num_roads; + shared->road_elements = temp.road_elements; + shared->road_scenario_ids = temp.road_scenario_ids; + shared->sdc_track_index = temp.sdc_track_index; + shared->num_tracks_to_predict = temp.num_tracks_to_predict; + shared->tracks_to_predict_indices = temp.tracks_to_predict_indices; + + // Compute world means + set_means(&temp); + shared->world_mean_x = temp.world_mean_x; + shared->world_mean_y = temp.world_mean_y; + shared->world_mean_z = temp.world_mean_z; + + // Build grid map and neighbor cache + init_grid_map(&temp); + generate_offsets(collision_offsets, COLLISION_RANGE); + generate_offsets(z_offsets, Z_RANGE); + temp.grid_map->vision_range = 21; + init_neighbor_offsets(&temp); + cache_neighbor_offsets(&temp); + + // Transfer grid data ownership to shared struct + shared->grid_map = temp.grid_map; + shared->neighbor_offsets = temp.neighbor_offsets; + + shared->ref_count = 0; + return shared; +} + +void c_close(Drive *env) { + // Always free per-env agent data + free_agents(env->agents, env->num_objects); + free(env->active_agent_indices); + free(env->logs); free(env->static_agent_indices); free(env->expert_static_agent_indices); free(env->tracks_to_predict_indices); free(env->ini_file); + + if (env->shared_map != NULL) { + // Map data is shared — just decrement ref count, don't free + env->shared_map->ref_count--; + env->shared_map = NULL; + } else { + // Map data is owned by this env — free it + for (int i = 0; i < env->num_roads; i++) { + free_road_element(&env->road_elements[i]); + } + free(env->road_elements); + free(env->road_scenario_ids); + + int grid_cell_count = env->grid_map->grid_cols * env->grid_map->grid_rows; + for (int grid_index = 0; grid_index < grid_cell_count; grid_index++) { + free(env->grid_map->cells[grid_index]); + } + free(env->grid_map->cells); + free(env->grid_map->cell_entities_count); + free(env->neighbor_offsets); + for (int i = 0; i < grid_cell_count; i++) { + free(env->grid_map->neighbor_cache_entities[i]); + } + free(env->grid_map->neighbor_cache_entities); + free(env->grid_map->neighbor_cache_count); + free(env->grid_map); + } } void allocate(Drive *env) { diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 2868d03f3b..8f10ba99b6 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -393,6 +393,7 @@ def __init__( episode_length=(int(episode_length) if episode_length is not None else None), termination_mode=(int(self.termination_mode) if self.termination_mode is not None else 0), map_path=self.map_files[map_ids[i]], + map_id=map_ids[i], max_agents=nxt - cur, ini_file="pufferlib/config/ocean/drive.ini", init_steps=init_steps, @@ -544,6 +545,7 @@ def resample_maps(self): dt=self.dt, episode_length=(int(self.episode_length) if self.episode_length is not None else None), map_path=self.map_files[map_ids[i]], + map_id=map_ids[i], max_agents=nxt - cur, ini_file="pufferlib/config/ocean/drive.ini", init_steps=self.init_steps, @@ -708,6 +710,7 @@ def render(self): def close(self): binding.vec_close(self.c_envs) + binding.release_map_cache() def calculate_area(p1, p2, p3): From 5962ac590ecd8e0e7a9a0cf6fbde3df48908e0ea Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 17 Mar 2026 23:19:17 +0000 Subject: [PATCH 03/17] Skip binaries dir in code isolation to avoid slow NFS walks Walk pufferlib/ in Python and create per-file symlinks, skipping resources/drive/binaries (60K+ map files). Symlink that dir as a single entry instead. Removes dependency on rsync/cp -rs. Co-Authored-By: Claude Opus 4.6 --- scripts/submit_cluster.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/scripts/submit_cluster.py b/scripts/submit_cluster.py index 5d35bb32a0..ef77df96b9 100644 --- a/scripts/submit_cluster.py +++ b/scripts/submit_cluster.py @@ -260,13 +260,27 @@ def launch_training(args, from_config, cmd, save_dir, project_root, container_co else: os.remove(dst) os.symlink(src, dst) - # Copy pufferlib/ as a real dir tree so we can replace .so files + # Create symlink tree of pufferlib/, excluding resources/drive/binaries + # (read-only data, 60K+ map files) which is symlinked as a single dir. pufferlib_dst = os.path.join(isolated_root, "pufferlib") if os.path.islink(pufferlib_dst): os.remove(pufferlib_dst) pufferlib_src = os.path.join(project_root, "pufferlib") - subprocess.run(["cp", "-rs", pufferlib_src, pufferlib_dst], check=True) - # Copy .so files over their symlinks so they survive rebuilds + skip_dir = os.path.join(pufferlib_src, "resources", "drive", "binaries") + for dirpath, dirnames, filenames in os.walk(pufferlib_src): + if os.path.abspath(dirpath) == os.path.abspath(skip_dir): + dirnames.clear() + continue + rel = os.path.relpath(dirpath, pufferlib_src) + dst_dir = os.path.join(pufferlib_dst, rel) + os.makedirs(dst_dir, exist_ok=True) + for fname in filenames: + src_file = os.path.join(dirpath, fname) + dst_file = os.path.join(dst_dir, fname) + os.symlink(src_file, dst_file) + # Symlink binaries dir back to original (read-only, shared across runs) + os.symlink(skip_dir, os.path.join(pufferlib_dst, "resources", "drive", "binaries")) + # Copy .so files so they survive rebuilds on other branches for so_link in glob.glob(os.path.join(pufferlib_dst, "**", "*.so"), recursive=True): if os.path.islink(so_link): real_path = os.path.realpath(so_link) From c1340c1b83fa165df43621b539e9d39f8d8917d5 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 18 Mar 2026 00:06:19 +0000 Subject: [PATCH 04/17] Fix segfault: don't free inherited map cache after fork After fork, child processes inherit g_map_cache pointers from the parent. Calling free_shared_map_data on these corrupts the heap since the memory belongs to the parent's address space. Track the creating PID and skip freeing if PID doesn't match. Co-Authored-By: Claude Opus 4.6 --- pufferlib/ocean/drive/binding.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index d367452685..d869829f93 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -7,9 +7,18 @@ // Module-level map cache: indexed by map_id, populated by my_shared(), used by my_init(). static SharedMapData **g_map_cache = NULL; static int g_map_cache_size = 0; +static pid_t g_map_cache_pid = 0; // PID of the process that created the cache static void release_map_cache_internal(void) { if (g_map_cache == NULL) return; + // After fork, child inherits g_map_cache but must not free parent's memory. + // Just discard the inherited pointers without freeing. + if (g_map_cache_pid != 0 && g_map_cache_pid != getpid()) { + g_map_cache = NULL; + g_map_cache_size = 0; + g_map_cache_pid = 0; + return; + } for (int i = 0; i < g_map_cache_size; i++) { if (g_map_cache[i] != NULL) { free_shared_map_data(g_map_cache[i]); @@ -19,6 +28,7 @@ static void release_map_cache_internal(void) { free(g_map_cache); g_map_cache = NULL; g_map_cache_size = 0; + g_map_cache_pid = 0; } static PyObject *release_map_cache_py(PyObject *self, PyObject *args) { @@ -186,6 +196,7 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { // Allocate map cache indexed by map_id (0..num_maps-1) g_map_cache_size = num_maps; g_map_cache = (SharedMapData **)calloc(num_maps, sizeof(SharedMapData *)); + g_map_cache_pid = getpid(); int max_envs = use_all_maps ? num_maps : num_agents; From 4d78b8256e4c329ee23a36e32be61c9edb652a49 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 18 Mar 2026 00:35:05 +0000 Subject: [PATCH 05/17] Add debug prints to trace segfault in move_dynamics/c_step Adds NULL checks and bounds checks with stderr output to identify the root cause of worker segfaults after fork. Co-Authored-By: Claude Opus 4.6 --- pufferlib/ocean/drive/binding.c | 7 +++++++ pufferlib/ocean/drive/drive.h | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index d869829f93..eee83fb2d8 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -441,6 +441,10 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { PyObject *map_id_obj = kwargs ? PyDict_GetItemString(kwargs, "map_id") : NULL; if (map_id_obj != NULL && g_map_cache != NULL) { int map_id = (int)PyLong_AsLong(map_id_obj); + fprintf(stderr, "DEBUG: my_init pid=%d map_id=%d cache_size=%d cache_pid=%d cache[map_id]=%p\n", + getpid(), map_id, g_map_cache_size, g_map_cache_pid, + (map_id >= 0 && map_id < g_map_cache_size) ? (void*)g_map_cache[map_id] : NULL); + fflush(stderr); if (map_id >= 0 && map_id < g_map_cache_size && g_map_cache[map_id] != NULL) { init_from_shared(env, g_map_cache[map_id]); return 0; @@ -448,6 +452,9 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { } // Fallback: load from disk (standalone use, tests, rendering, etc.) + fprintf(stderr, "DEBUG: my_init pid=%d FALLBACK to disk load (map_id_obj=%p, g_map_cache=%p)\n", + getpid(), (void*)map_id_obj, (void*)g_map_cache); + fflush(stderr); init(env); return 0; } diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 181391732e..95db13cada 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2113,6 +2113,9 @@ void init(Drive *env) { // Road elements, grid_map, and neighbor cache are shared (pointer copy, not duplicated). // Agents are cloned per-env since they have mutable sim state. void init_from_shared(Drive *env, SharedMapData *shared) { + fprintf(stderr, "DEBUG: init_from_shared pid=%d shared=%p num_objects=%d num_roads=%d\n", + getpid(), (void*)shared, shared->num_objects, shared->num_roads); + fflush(stderr); env->human_agent_idx = 0; env->timestep = 0; env->shared_map = shared; @@ -2907,6 +2910,17 @@ void move_expert(Drive *env, float *actions, int agent_idx) { } void move_dynamics(Drive *env, int action_idx, int agent_idx) { + if (env->agents == NULL) { + fprintf(stderr, "FATAL: env->agents is NULL in move_dynamics (pid=%d, agent_idx=%d)\n", getpid(), agent_idx); + fflush(stderr); + abort(); + } + if (agent_idx < 0 || agent_idx >= env->num_objects) { + fprintf(stderr, "FATAL: agent_idx=%d out of range [0, %d) in move_dynamics (pid=%d)\n", + agent_idx, env->num_objects, getpid()); + fflush(stderr); + abort(); + } Agent *agent = &env->agents[agent_idx]; if (agent->removed) return; @@ -3163,6 +3177,17 @@ void c_reset(Drive *env) { } void c_step(Drive *env) { + if (env->agents == NULL) { + fprintf(stderr, "FATAL: env->agents is NULL at start of c_step (pid=%d)\n", getpid()); + fflush(stderr); + abort(); + } + if (env->rewards == NULL || env->terminals == NULL || env->truncations == NULL) { + fprintf(stderr, "FATAL: NULL buffer in c_step (pid=%d, rewards=%p, terminals=%p, truncations=%p)\n", + getpid(), (void*)env->rewards, (void*)env->terminals, (void*)env->truncations); + fflush(stderr); + abort(); + } memset(env->rewards, 0, env->active_agent_count * sizeof(float)); memset(env->terminals, 0, env->active_agent_count * sizeof(unsigned char)); memset(env->truncations, 0, env->active_agent_count * sizeof(unsigned char)); From e7f83ece9023ff67c76af7096115fe2d40efa560 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 18 Mar 2026 00:55:06 +0000 Subject: [PATCH 06/17] Fix segfault: reuse map cache across PufferDrive instances in same process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bug: each PufferDrive.__init__ calls binding.shared() which freed and rebuilt g_map_cache. When multiple PufferDrive instances exist in one process (Serial workers), earlier instances' Drive structs had shared_map pointers to freed SharedMapData, causing use-after-free crashes in checkNeighbors/compute_agent_metrics. Fix: only rebuild the cache after fork (PID mismatch) or on first call. Same-process calls reuse the existing cache, which is correct since the map data doesn't change — only agent-to-map assignment varies. Co-Authored-By: Claude Opus 4.6 --- pufferlib/ocean/drive/binding.c | 28 +++++++++++++++------------- pufferlib/ocean/drive/drive.h | 25 ------------------------- 2 files changed, 15 insertions(+), 38 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index eee83fb2d8..cbff94a989 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -170,8 +170,15 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { clock_gettime(CLOCK_REALTIME, &ts); srand(ts.tv_nsec); - // Release any existing map cache from a previous call - release_map_cache_internal(); + // Reuse existing cache if it was created by this process and has the right size. + // Multiple PufferDrive instances in the same process (e.g. Serial workers) share one cache. + // Only rebuild after fork (PID mismatch) or on first call. + int reuse_cache = (g_map_cache != NULL && g_map_cache_pid == getpid() && g_map_cache_size == num_maps); + + if (!reuse_cache) { + // Release any inherited or stale cache + release_map_cache_internal(); + } // Build reward_bounds array for create_shared_map_data RewardBound reward_bounds[NUM_REWARD_COEFS]; @@ -193,10 +200,12 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { reward_bounds[REWARD_COEF_STEER] = (RewardBound){reward_bound_steer_min, reward_bound_steer_max}; reward_bounds[REWARD_COEF_ACC] = (RewardBound){reward_bound_acc_min, reward_bound_acc_max}; - // Allocate map cache indexed by map_id (0..num_maps-1) - g_map_cache_size = num_maps; - g_map_cache = (SharedMapData **)calloc(num_maps, sizeof(SharedMapData *)); - g_map_cache_pid = getpid(); + if (!reuse_cache) { + // Allocate map cache indexed by map_id (0..num_maps-1) + g_map_cache_size = num_maps; + g_map_cache = (SharedMapData **)calloc(num_maps, sizeof(SharedMapData *)); + g_map_cache_pid = getpid(); + } int max_envs = use_all_maps ? num_maps : num_agents; @@ -441,10 +450,6 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { PyObject *map_id_obj = kwargs ? PyDict_GetItemString(kwargs, "map_id") : NULL; if (map_id_obj != NULL && g_map_cache != NULL) { int map_id = (int)PyLong_AsLong(map_id_obj); - fprintf(stderr, "DEBUG: my_init pid=%d map_id=%d cache_size=%d cache_pid=%d cache[map_id]=%p\n", - getpid(), map_id, g_map_cache_size, g_map_cache_pid, - (map_id >= 0 && map_id < g_map_cache_size) ? (void*)g_map_cache[map_id] : NULL); - fflush(stderr); if (map_id >= 0 && map_id < g_map_cache_size && g_map_cache[map_id] != NULL) { init_from_shared(env, g_map_cache[map_id]); return 0; @@ -452,9 +457,6 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { } // Fallback: load from disk (standalone use, tests, rendering, etc.) - fprintf(stderr, "DEBUG: my_init pid=%d FALLBACK to disk load (map_id_obj=%p, g_map_cache=%p)\n", - getpid(), (void*)map_id_obj, (void*)g_map_cache); - fflush(stderr); init(env); return 0; } diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 95db13cada..181391732e 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2113,9 +2113,6 @@ void init(Drive *env) { // Road elements, grid_map, and neighbor cache are shared (pointer copy, not duplicated). // Agents are cloned per-env since they have mutable sim state. void init_from_shared(Drive *env, SharedMapData *shared) { - fprintf(stderr, "DEBUG: init_from_shared pid=%d shared=%p num_objects=%d num_roads=%d\n", - getpid(), (void*)shared, shared->num_objects, shared->num_roads); - fflush(stderr); env->human_agent_idx = 0; env->timestep = 0; env->shared_map = shared; @@ -2910,17 +2907,6 @@ void move_expert(Drive *env, float *actions, int agent_idx) { } void move_dynamics(Drive *env, int action_idx, int agent_idx) { - if (env->agents == NULL) { - fprintf(stderr, "FATAL: env->agents is NULL in move_dynamics (pid=%d, agent_idx=%d)\n", getpid(), agent_idx); - fflush(stderr); - abort(); - } - if (agent_idx < 0 || agent_idx >= env->num_objects) { - fprintf(stderr, "FATAL: agent_idx=%d out of range [0, %d) in move_dynamics (pid=%d)\n", - agent_idx, env->num_objects, getpid()); - fflush(stderr); - abort(); - } Agent *agent = &env->agents[agent_idx]; if (agent->removed) return; @@ -3177,17 +3163,6 @@ void c_reset(Drive *env) { } void c_step(Drive *env) { - if (env->agents == NULL) { - fprintf(stderr, "FATAL: env->agents is NULL at start of c_step (pid=%d)\n", getpid()); - fflush(stderr); - abort(); - } - if (env->rewards == NULL || env->terminals == NULL || env->truncations == NULL) { - fprintf(stderr, "FATAL: NULL buffer in c_step (pid=%d, rewards=%p, terminals=%p, truncations=%p)\n", - getpid(), (void*)env->rewards, (void*)env->terminals, (void*)env->truncations); - fflush(stderr); - abort(); - } memset(env->rewards, 0, env->active_agent_count * sizeof(float)); memset(env->terminals, 0, env->active_agent_count * sizeof(unsigned char)); memset(env->truncations, 0, env->active_agent_count * sizeof(unsigned char)); From c2fa8ab7f0c268342843a6f2baea0888512ba5d3 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 18 Mar 2026 01:24:28 +0000 Subject: [PATCH 07/17] Apply clang-format to C files Co-Authored-By: Claude Opus 4.6 --- pufferlib/ocean/drive/binding.c | 21 ++++++++++----------- pufferlib/ocean/drive/drive.h | 13 ++++++------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index cbff94a989..f348b9d825 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -7,10 +7,11 @@ // Module-level map cache: indexed by map_id, populated by my_shared(), used by my_init(). static SharedMapData **g_map_cache = NULL; static int g_map_cache_size = 0; -static pid_t g_map_cache_pid = 0; // PID of the process that created the cache +static pid_t g_map_cache_pid = 0; // PID of the process that created the cache static void release_map_cache_internal(void) { - if (g_map_cache == NULL) return; + if (g_map_cache == NULL) + return; // After fork, child inherits g_map_cache but must not free parent's memory. // Just discard the inherited pointers without freeing. if (g_map_cache_pid != 0 && g_map_cache_pid != getpid()) { @@ -248,10 +249,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); g_map_cache[map_id] = create_shared_map_data( - map_file_path, init_mode, control_mode, init_steps, goal_behavior, - reward_randomization, turn_off_normalization, reward_conditioning, - min_goal_distance, max_goal_distance, min_avg_speed_to_consider_goal_attempt, - reward_bounds); + map_file_path, init_mode, control_mode, init_steps, goal_behavior, reward_randomization, + turn_off_normalization, reward_conditioning, min_goal_distance, max_goal_distance, + min_avg_speed_to_consider_goal_attempt, reward_bounds); } } PyList_SetItem(agent_offsets, env_count, @@ -279,11 +279,10 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { if (g_map_cache[map_id] == NULL) { PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); - g_map_cache[map_id] = create_shared_map_data( - map_file_path, init_mode, control_mode, init_steps, goal_behavior, - reward_randomization, turn_off_normalization, reward_conditioning, - min_goal_distance, max_goal_distance, min_avg_speed_to_consider_goal_attempt, - reward_bounds); + g_map_cache[map_id] = create_shared_map_data(map_file_path, init_mode, control_mode, init_steps, + goal_behavior, reward_randomization, turn_off_normalization, + reward_conditioning, min_goal_distance, max_goal_distance, + min_avg_speed_to_consider_goal_attempt, reward_bounds); } SharedMapData *shared = g_map_cache[map_id]; diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 181391732e..b584ab0f18 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2183,7 +2183,8 @@ void init_from_shared(Drive *env, SharedMapData *shared) { // Free a SharedMapData and all data it owns. void free_shared_map_data(SharedMapData *shared) { - if (shared == NULL) return; + if (shared == NULL) + return; // Free road elements for (int i = 0; i < shared->num_roads; i++) { @@ -2217,12 +2218,10 @@ void free_shared_map_data(SharedMapData *shared) { } // Create a SharedMapData by loading a map binary and building all spatial structures. -SharedMapData *create_shared_map_data(const char *map_file_path, int init_mode, int control_mode, - int init_steps, int goal_behavior, int reward_randomization, - int turn_off_normalization, int reward_conditioning, - float min_goal_distance, float max_goal_distance, - float min_avg_speed_to_consider_goal_attempt, - RewardBound *reward_bounds) { +SharedMapData *create_shared_map_data(const char *map_file_path, int init_mode, int control_mode, int init_steps, + int goal_behavior, int reward_randomization, int turn_off_normalization, + int reward_conditioning, float min_goal_distance, float max_goal_distance, + float min_avg_speed_to_consider_goal_attempt, RewardBound *reward_bounds) { SharedMapData *shared = (SharedMapData *)calloc(1, sizeof(SharedMapData)); // Create a temporary Drive to use existing loading functions From a6299bba343a4bd02e7346410d84363b1387abe3 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 22 Mar 2026 16:53:20 -0400 Subject: [PATCH 08/17] Remove unused reward_bounds from create_shared_map_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reward bounds are per-env config used during agent spawn, not map infrastructure. The shared map cache only stores roads, grids, and agent templates — it never reads reward bounds. Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/binding.c | 30 ++---------------------------- pufferlib/ocean/drive/drive.h | 16 +++------------- 2 files changed, 5 insertions(+), 41 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index f348b9d825..e1ee89802f 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -181,26 +181,6 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { release_map_cache_internal(); } - // Build reward_bounds array for create_shared_map_data - RewardBound reward_bounds[NUM_REWARD_COEFS]; - reward_bounds[REWARD_COEF_GOAL_RADIUS] = (RewardBound){reward_bound_goal_radius_min, reward_bound_goal_radius_max}; - reward_bounds[REWARD_COEF_COLLISION] = (RewardBound){reward_bound_collision_min, reward_bound_collision_max}; - reward_bounds[REWARD_COEF_OFFROAD] = (RewardBound){reward_bound_offroad_min, reward_bound_offroad_max}; - reward_bounds[REWARD_COEF_COMFORT] = (RewardBound){reward_bound_comfort_min, reward_bound_comfort_max}; - reward_bounds[REWARD_COEF_LANE_ALIGN] = (RewardBound){reward_bound_lane_align_min, reward_bound_lane_align_max}; - reward_bounds[REWARD_COEF_LANE_CENTER] = (RewardBound){reward_bound_lane_center_min, reward_bound_lane_center_max}; - reward_bounds[REWARD_COEF_VELOCITY] = (RewardBound){reward_bound_velocity_min, reward_bound_velocity_max}; - reward_bounds[REWARD_COEF_TRAFFIC_LIGHT] = - (RewardBound){reward_bound_traffic_light_min, reward_bound_traffic_light_max}; - reward_bounds[REWARD_COEF_CENTER_BIAS] = (RewardBound){reward_bound_center_bias_min, reward_bound_center_bias_max}; - reward_bounds[REWARD_COEF_VEL_ALIGN] = (RewardBound){reward_bound_vel_align_min, reward_bound_vel_align_max}; - reward_bounds[REWARD_COEF_OVERSPEED] = (RewardBound){reward_bound_overspeed_min, reward_bound_overspeed_max}; - reward_bounds[REWARD_COEF_TIMESTEP] = (RewardBound){reward_bound_timestep_min, reward_bound_timestep_max}; - reward_bounds[REWARD_COEF_REVERSE] = (RewardBound){reward_bound_reverse_min, reward_bound_reverse_max}; - reward_bounds[REWARD_COEF_THROTTLE] = (RewardBound){reward_bound_throttle_min, reward_bound_throttle_max}; - reward_bounds[REWARD_COEF_STEER] = (RewardBound){reward_bound_steer_min, reward_bound_steer_max}; - reward_bounds[REWARD_COEF_ACC] = (RewardBound){reward_bound_acc_min, reward_bound_acc_max}; - if (!reuse_cache) { // Allocate map cache indexed by map_id (0..num_maps-1) g_map_cache_size = num_maps; @@ -248,10 +228,7 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { if (g_map_cache[map_id] == NULL) { PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); - g_map_cache[map_id] = create_shared_map_data( - map_file_path, init_mode, control_mode, init_steps, goal_behavior, reward_randomization, - turn_off_normalization, reward_conditioning, min_goal_distance, max_goal_distance, - min_avg_speed_to_consider_goal_attempt, reward_bounds); + g_map_cache[map_id] = create_shared_map_data(map_file_path, init_mode, control_mode, init_steps); } } PyList_SetItem(agent_offsets, env_count, @@ -279,10 +256,7 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { if (g_map_cache[map_id] == NULL) { PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); - g_map_cache[map_id] = create_shared_map_data(map_file_path, init_mode, control_mode, init_steps, - goal_behavior, reward_randomization, turn_off_normalization, - reward_conditioning, min_goal_distance, max_goal_distance, - min_avg_speed_to_consider_goal_attempt, reward_bounds); + g_map_cache[map_id] = create_shared_map_data(map_file_path, init_mode, control_mode, init_steps); } SharedMapData *shared = g_map_cache[map_id]; diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index b584ab0f18..f9ce02aafb 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2218,25 +2218,15 @@ void free_shared_map_data(SharedMapData *shared) { } // Create a SharedMapData by loading a map binary and building all spatial structures. -SharedMapData *create_shared_map_data(const char *map_file_path, int init_mode, int control_mode, int init_steps, - int goal_behavior, int reward_randomization, int turn_off_normalization, - int reward_conditioning, float min_goal_distance, float max_goal_distance, - float min_avg_speed_to_consider_goal_attempt, RewardBound *reward_bounds) { +SharedMapData *create_shared_map_data(const char *map_file_path, int init_mode, int control_mode, int init_steps) { SharedMapData *shared = (SharedMapData *)calloc(1, sizeof(SharedMapData)); - // Create a temporary Drive to use existing loading functions + // Create a temporary Drive to use existing loading functions. + // Only fields needed by load_map_binary / set_means / init_grid_map are set. Drive temp = {0}; temp.init_mode = init_mode; temp.control_mode = control_mode; temp.init_steps = init_steps; - temp.goal_behavior = goal_behavior; - temp.reward_randomization = reward_randomization; - temp.turn_off_normalization = turn_off_normalization; - temp.reward_conditioning = reward_conditioning; - temp.min_goal_distance = min_goal_distance; - temp.max_goal_distance = max_goal_distance; - temp.min_avg_speed_to_consider_goal_attempt = min_avg_speed_to_consider_goal_attempt; - memcpy(temp.reward_bounds, reward_bounds, NUM_REWARD_COEFS * sizeof(RewardBound)); // Load map binary (allocates agents, road_elements, etc.) load_map_binary(map_file_path, &temp); From 8209718ca4d2ca3c3c6a21c337cf1bc52fffd640 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 22 Mar 2026 17:18:09 -0400 Subject: [PATCH 09/17] Fix: call compute_drivable_lane_points in init_from_shared spawn_agent reads env->drivable_lane_indices for length-weighted lane selection during agent spawning. init_from_shared was missing this call, causing a crash (num_drivable==0) when spawning agents with shared maps. Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/drive.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 8bc156449e..eae562d17b 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2312,6 +2312,10 @@ void init_from_shared(Drive *env, SharedMapData *shared) { generate_offsets(collision_offsets, COLLISION_RANGE); generate_offsets(z_offsets, Z_RANGE); + // Build per-env drivable lane index from shared road_elements + // (needed by spawn_agent for length-weighted lane selection) + compute_drivable_lane_points(env); + if (env->init_mode == INIT_VARIABLE_AGENT_NUMBER) { // Variable agent mode: agents are spawned fresh, not cloned from template. // Set num_objects=0 so spawn_active_agents' free_agents() call is a no-op. From 7e19d54ac79d5d9e1951b278f10bb50898b44990 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 22 Mar 2026 17:43:12 -0400 Subject: [PATCH 10/17] Improve shared map cache robustness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Complete cache key: include observation_window_size, polyline params in reuse check. Two Drive instances with different polyline settings in the same process now correctly rebuild the cache. - Factor out finalize_env(): shared tail of init() and init_from_shared() (set_active_agents → set_start_position → init_goal_positions → logs) is now a single function, eliminating semantic drift risk. - Document fork leak: explain why child discards inherited cache pointers without freeing, and that the leak is bounded. Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/binding.c | 39 +++++++++++++++++++++++---------- pufferlib/ocean/drive/drive.h | 29 ++++++++++++------------ 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 91470eaab1..65a4853543 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -8,16 +8,27 @@ static SharedMapData **g_map_cache = NULL; static int g_map_cache_size = 0; static pid_t g_map_cache_pid = 0; // PID of the process that created the cache +// Cache key: params that affect SharedMapData contents (roads, grid, polylines) +static float g_cache_observation_window_size = 0; +static float g_cache_polyline_reduction_threshold = 0; +static float g_cache_polyline_max_segment_length = 0; static void release_map_cache_internal(void) { if (g_map_cache == NULL) return; - // After fork, child inherits g_map_cache but must not free parent's memory. - // Just discard the inherited pointers without freeing. + // After fork, child inherits g_map_cache pointers via copy-on-write. + // We must NOT free them — they belong to the parent's address space. + // Discarding the pointers "leaks" the CoW pages in the child, but this + // is bounded (one map cache worth of memory per worker) and unavoidable + // with fork-based multiprocessing. The child rebuilds its own cache on + // the next my_shared() call. if (g_map_cache_pid != 0 && g_map_cache_pid != getpid()) { g_map_cache = NULL; g_map_cache_size = 0; g_map_cache_pid = 0; + g_cache_observation_window_size = 0; + g_cache_polyline_reduction_threshold = 0; + g_cache_polyline_max_segment_length = 0; return; } int has_refs = 0; @@ -36,6 +47,9 @@ static void release_map_cache_internal(void) { g_map_cache = NULL; g_map_cache_size = 0; g_map_cache_pid = 0; + g_cache_observation_window_size = 0; + g_cache_polyline_reduction_threshold = 0; + g_cache_polyline_max_segment_length = 0; } static PyObject *release_map_cache_py(PyObject *self, PyObject *args) { @@ -180,21 +194,24 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { clock_gettime(CLOCK_REALTIME, &ts); srand(ts.tv_nsec); - // Reuse existing cache if it was created by this process and has the right size. - // Multiple PufferDrive instances in the same process (e.g. Serial workers) share one cache. - // Only rebuild after fork (PID mismatch) or on first call. - int reuse_cache = (g_map_cache != NULL && g_map_cache_pid == getpid() && g_map_cache_size == num_maps); + // Reuse existing cache if it was created by this process with matching config. + // The cache key includes all params that affect SharedMapData contents: + // num_maps (array size), observation_window_size (grid vision_range), + // polyline params (road simplification). Other params (init_mode, reward bounds, + // etc.) only affect per-env state and don't change the shared map data. + int reuse_cache = (g_map_cache != NULL && g_map_cache_pid == getpid() && g_map_cache_size == num_maps && + g_cache_observation_window_size == observation_window_size && + g_cache_polyline_reduction_threshold == polyline_reduction_threshold && + g_cache_polyline_max_segment_length == polyline_max_segment_length); if (!reuse_cache) { - // Release any inherited or stale cache release_map_cache_internal(); - } - - if (!reuse_cache) { - // Allocate map cache indexed by map_id (0..num_maps-1) g_map_cache_size = num_maps; g_map_cache = (SharedMapData **)calloc(num_maps, sizeof(SharedMapData *)); g_map_cache_pid = getpid(); + g_cache_observation_window_size = observation_window_size; + g_cache_polyline_reduction_threshold = polyline_reduction_threshold; + g_cache_polyline_max_segment_length = polyline_max_segment_length; } int max_envs = use_all_maps ? num_maps : num_agents; diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index eae562d17b..200d3d2dd3 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -2255,6 +2255,19 @@ void remove_bad_trajectories(Drive *env) { env->timestep = 0; } +// Common post-load initialization shared by both init() and init_from_shared(). +// Called after map data and drivable lanes are ready. Spawns/configures agents, +// sets start positions, and allocates per-env logs. +void finalize_env(Drive *env) { + env->logs_capacity = 0; + set_active_agents(env); + env->logs_capacity = env->active_agent_count; + remove_bad_trajectories(env); + set_start_position(env); + init_goal_positions(env); + env->logs = (Log *)calloc(env->active_agent_count, sizeof(Log)); +} + void init(Drive *env) { env->human_agent_idx = 0; env->timestep = 0; @@ -2269,13 +2282,7 @@ void init(Drive *env) { generate_offsets(z_offsets, Z_RANGE); init_neighbor_offsets(env); cache_neighbor_offsets(env); - env->logs_capacity = 0; - set_active_agents(env); - env->logs_capacity = env->active_agent_count; - remove_bad_trajectories(env); - set_start_position(env); - init_goal_positions(env); - env->logs = (Log *)calloc(env->active_agent_count, sizeof(Log)); + finalize_env(env); } // Initialize an environment using shared map data instead of loading from disk. @@ -2345,13 +2352,7 @@ void init_from_shared(Drive *env, SharedMapData *shared) { } } - env->logs_capacity = 0; - set_active_agents(env); - env->logs_capacity = env->active_agent_count; - remove_bad_trajectories(env); - set_start_position(env); - init_goal_positions(env); - env->logs = (Log *)calloc(env->active_agent_count, sizeof(Log)); + finalize_env(env); } // Free a SharedMapData and all data it owns. From 2ba9f2baeb3a9310b109dfd00bf57edc1c49382e Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 22 Mar 2026 17:56:31 -0400 Subject: [PATCH 11/17] Fix clang-format: wrap long create_shared_map_data calls Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/binding.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 65a4853543..4044ede907 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -254,7 +254,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { if (g_map_cache[map_id] == NULL) { PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); - g_map_cache[map_id] = create_shared_map_data(map_file_path, init_mode, control_mode, init_steps, observation_window_size, polyline_reduction_threshold, polyline_max_segment_length); + g_map_cache[map_id] = create_shared_map_data(map_file_path, init_mode, control_mode, init_steps, + observation_window_size, polyline_reduction_threshold, + polyline_max_segment_length); } } PyList_SetItem(agent_offsets, env_count, @@ -282,7 +284,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { if (g_map_cache[map_id] == NULL) { PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); - g_map_cache[map_id] = create_shared_map_data(map_file_path, init_mode, control_mode, init_steps, observation_window_size, polyline_reduction_threshold, polyline_max_segment_length); + g_map_cache[map_id] = create_shared_map_data(map_file_path, init_mode, control_mode, init_steps, + observation_window_size, polyline_reduction_threshold, + polyline_max_segment_length); } SharedMapData *shared = g_map_cache[map_id]; From 64421769bac56b1480127316c7ff348c904b9ba9 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 22 Mar 2026 17:58:18 -0400 Subject: [PATCH 12/17] Fix clang-format: match exact indentation for wrapped function calls Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/binding.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 4044ede907..5f6d97b5b3 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -254,9 +254,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { if (g_map_cache[map_id] == NULL) { PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); - g_map_cache[map_id] = create_shared_map_data(map_file_path, init_mode, control_mode, init_steps, - observation_window_size, polyline_reduction_threshold, - polyline_max_segment_length); + g_map_cache[map_id] = + create_shared_map_data(map_file_path, init_mode, control_mode, init_steps, observation_window_size, + polyline_reduction_threshold, polyline_max_segment_length); } } PyList_SetItem(agent_offsets, env_count, From 64c6deaecbd282ce673b03bf304581a29256110a Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 22 Mar 2026 18:00:37 -0400 Subject: [PATCH 13/17] Add map file paths to cache key, fix remaining clang-format The cache now validates that the actual map file paths match on reuse, not just num_maps. Prevents stale data if map_dir changes between calls in the same process. Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/binding.c | 39 +++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 5f6d97b5b3..11e0527c46 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -12,6 +12,7 @@ static pid_t g_map_cache_pid = 0; // PID of the process that created the cache static float g_cache_observation_window_size = 0; static float g_cache_polyline_reduction_threshold = 0; static float g_cache_polyline_max_segment_length = 0; +static char **g_cache_map_paths = NULL; // strdup'd file paths, one per map_id static void release_map_cache_internal(void) { if (g_map_cache == NULL) @@ -29,6 +30,7 @@ static void release_map_cache_internal(void) { g_cache_observation_window_size = 0; g_cache_polyline_reduction_threshold = 0; g_cache_polyline_max_segment_length = 0; + g_cache_map_paths = NULL; // don't free — parent owns the strings return; } int has_refs = 0; @@ -44,12 +46,19 @@ static void release_map_cache_internal(void) { } } free(g_map_cache); + if (g_cache_map_paths != NULL) { + for (int i = 0; i < g_map_cache_size; i++) { + free(g_cache_map_paths[i]); + } + free(g_cache_map_paths); + } g_map_cache = NULL; g_map_cache_size = 0; g_map_cache_pid = 0; g_cache_observation_window_size = 0; g_cache_polyline_reduction_threshold = 0; g_cache_polyline_max_segment_length = 0; + g_cache_map_paths = NULL; } static PyObject *release_map_cache_py(PyObject *self, PyObject *args) { @@ -195,14 +204,24 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { srand(ts.tv_nsec); // Reuse existing cache if it was created by this process with matching config. - // The cache key includes all params that affect SharedMapData contents: - // num_maps (array size), observation_window_size (grid vision_range), - // polyline params (road simplification). Other params (init_mode, reward bounds, - // etc.) only affect per-env state and don't change the shared map data. + // The cache key includes: PID, num_maps, map file paths, and params that + // affect SharedMapData (observation_window_size for grid, polyline params for + // road simplification). Other params (init_mode, reward bounds, etc.) only + // affect per-env state and don't change the shared map data. int reuse_cache = (g_map_cache != NULL && g_map_cache_pid == getpid() && g_map_cache_size == num_maps && g_cache_observation_window_size == observation_window_size && g_cache_polyline_reduction_threshold == polyline_reduction_threshold && g_cache_polyline_max_segment_length == polyline_max_segment_length); + // Also check that map file paths haven't changed + if (reuse_cache && g_cache_map_paths != NULL) { + for (int i = 0; i < num_maps; i++) { + const char *path = PyUnicode_AsUTF8(PyList_GetItem(map_files_list, i)); + if (g_cache_map_paths[i] == NULL || strcmp(g_cache_map_paths[i], path) != 0) { + reuse_cache = 0; + break; + } + } + } if (!reuse_cache) { release_map_cache_internal(); @@ -212,6 +231,12 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { g_cache_observation_window_size = observation_window_size; g_cache_polyline_reduction_threshold = polyline_reduction_threshold; g_cache_polyline_max_segment_length = polyline_max_segment_length; + // Store map file paths for future reuse checks + g_cache_map_paths = (char **)calloc(num_maps, sizeof(char *)); + for (int i = 0; i < num_maps; i++) { + const char *path = PyUnicode_AsUTF8(PyList_GetItem(map_files_list, i)); + g_cache_map_paths[i] = strdup(path); + } } int max_envs = use_all_maps ? num_maps : num_agents; @@ -284,9 +309,9 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { if (g_map_cache[map_id] == NULL) { PyObject *map_file_obj = PyList_GetItem(map_files_list, map_id); const char *map_file_path = PyUnicode_AsUTF8(map_file_obj); - g_map_cache[map_id] = create_shared_map_data(map_file_path, init_mode, control_mode, init_steps, - observation_window_size, polyline_reduction_threshold, - polyline_max_segment_length); + g_map_cache[map_id] = + create_shared_map_data(map_file_path, init_mode, control_mode, init_steps, observation_window_size, + polyline_reduction_threshold, polyline_max_segment_length); } SharedMapData *shared = g_map_cache[map_id]; From 0e40487e3697588bafd82380f205f2941da798a8 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 22 Mar 2026 18:07:04 -0400 Subject: [PATCH 14/17] Extract reset_cache_globals to eliminate duplicated global reset Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/binding.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 11e0527c46..3276e61e60 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -14,6 +14,16 @@ static float g_cache_polyline_reduction_threshold = 0; static float g_cache_polyline_max_segment_length = 0; static char **g_cache_map_paths = NULL; // strdup'd file paths, one per map_id +static void reset_cache_globals(void) { + g_map_cache = NULL; + g_map_cache_size = 0; + g_map_cache_pid = 0; + g_cache_observation_window_size = 0; + g_cache_polyline_reduction_threshold = 0; + g_cache_polyline_max_segment_length = 0; + g_cache_map_paths = NULL; +} + static void release_map_cache_internal(void) { if (g_map_cache == NULL) return; @@ -24,25 +34,16 @@ static void release_map_cache_internal(void) { // with fork-based multiprocessing. The child rebuilds its own cache on // the next my_shared() call. if (g_map_cache_pid != 0 && g_map_cache_pid != getpid()) { - g_map_cache = NULL; - g_map_cache_size = 0; - g_map_cache_pid = 0; - g_cache_observation_window_size = 0; - g_cache_polyline_reduction_threshold = 0; - g_cache_polyline_max_segment_length = 0; - g_cache_map_paths = NULL; // don't free — parent owns the strings + reset_cache_globals(); return; } - int has_refs = 0; for (int i = 0; i < g_map_cache_size; i++) { if (g_map_cache[i] != NULL) { if (g_map_cache[i]->ref_count > 0) { fprintf(stderr, "WARNING: releasing map cache entry %d with ref_count=%d\n", i, g_map_cache[i]->ref_count); - has_refs = 1; } free_shared_map_data(g_map_cache[i]); - g_map_cache[i] = NULL; } } free(g_map_cache); @@ -52,13 +53,7 @@ static void release_map_cache_internal(void) { } free(g_cache_map_paths); } - g_map_cache = NULL; - g_map_cache_size = 0; - g_map_cache_pid = 0; - g_cache_observation_window_size = 0; - g_cache_polyline_reduction_threshold = 0; - g_cache_polyline_max_segment_length = 0; - g_cache_map_paths = NULL; + reset_cache_globals(); } static PyObject *release_map_cache_py(PyObject *self, PyObject *args) { From 376dcd29b39f4115747a7a48fd7adb39ade3adad Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 22 Mar 2026 18:13:58 -0400 Subject: [PATCH 15/17] Move drivable lanes to SharedMapData (computed once, shared across envs) Drivable lane indices are derived from road_elements which are shared and immutable. No reason to recompute per-env. Now computed once in create_shared_map_data and pointed to from init_from_shared. Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/drive.h | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 200d3d2dd3..fa192b8ecd 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -300,6 +300,12 @@ struct SharedMapData { float world_mean_y; float world_mean_z; + // Drivable lane index (computed from road_elements, used for agent spawning) + int *drivable_lane_indices; + float *drivable_lane_lengths; + int num_drivable; + float total_drivable_lane_length; + // Agent templates from binary (for cloning into per-env agents) Agent *template_agents; int num_objects; @@ -2319,9 +2325,11 @@ void init_from_shared(Drive *env, SharedMapData *shared) { generate_offsets(collision_offsets, COLLISION_RANGE); generate_offsets(z_offsets, Z_RANGE); - // Build per-env drivable lane index from shared road_elements - // (needed by spawn_agent for length-weighted lane selection) - compute_drivable_lane_points(env); + // Point to shared drivable lane data (read-only, used by spawn_agent) + env->drivable_lane_indices = shared->drivable_lane_indices; + env->drivable_lane_lengths = shared->drivable_lane_lengths; + env->num_drivable = shared->num_drivable; + env->total_drivable_lane_length = shared->total_drivable_lane_length; if (env->init_mode == INIT_VARIABLE_AGENT_NUMBER) { // Variable agent mode: agents are spawned fresh, not cloned from template. @@ -2382,6 +2390,10 @@ void free_shared_map_data(SharedMapData *shared) { free(shared->grid_map->neighbor_cache_count); free(shared->grid_map); + // Free drivable lane data + free(shared->drivable_lane_indices); + free(shared->drivable_lane_lengths); + // Free template agents free_agents(shared->template_agents, shared->num_objects); @@ -2410,6 +2422,7 @@ SharedMapData *create_shared_map_data(const char *map_file_path, int init_mode, load_map_binary(map_file_path, &temp); filter_road_elements(&temp); create_sparse_lane_points(&temp, polyline_reduction_threshold, polyline_max_segment_length); + compute_drivable_lane_points(&temp); // Transfer ownership to shared struct shared->template_agents = temp.agents; @@ -2420,6 +2433,10 @@ SharedMapData *create_shared_map_data(const char *map_file_path, int init_mode, shared->sdc_track_index = temp.sdc_track_index; shared->num_tracks_to_predict = temp.num_tracks_to_predict; shared->tracks_to_predict_indices = temp.tracks_to_predict_indices; + shared->drivable_lane_indices = temp.drivable_lane_indices; + shared->drivable_lane_lengths = temp.drivable_lane_lengths; + shared->num_drivable = temp.num_drivable; + shared->total_drivable_lane_length = temp.total_drivable_lane_length; // Compute world means set_means(&temp); @@ -2446,8 +2463,11 @@ void c_close(Drive *env) { // Always free per-env agent data free_agents(env->agents, env->num_objects); free(env->active_agent_indices); - free(env->drivable_lane_indices); - free(env->drivable_lane_lengths); + // Only free drivable lanes if not shared (shared envs point to SharedMapData's copy) + if (env->shared_map == NULL) { + free(env->drivable_lane_indices); + free(env->drivable_lane_lengths); + } free(env->logs); free(env->static_agent_indices); free(env->expert_static_agent_indices); From 204ea56cb84d5fe78c7c74b4bdd8082c2aa22917 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 22 Mar 2026 18:24:30 -0400 Subject: [PATCH 16/17] Fix lifetime bug: refuse to free cache with live refs release_map_cache_internal() was freeing SharedMapData even when ref_count > 0, causing use-after-free if a second Drive with different config triggered cache rebuild while the first was still alive. Now: first pass checks all entries for live refs, refuses to free if any exist. my_shared() raises RuntimeError if cache can't be released. Also: warn on cache miss fallback to disk when map_id was provided, since that likely indicates a cache invalidation bug. Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/binding.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 3276e61e60..361e50b9b8 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -40,9 +40,16 @@ static void release_map_cache_internal(void) { for (int i = 0; i < g_map_cache_size; i++) { if (g_map_cache[i] != NULL) { if (g_map_cache[i]->ref_count > 0) { - fprintf(stderr, "WARNING: releasing map cache entry %d with ref_count=%d\n", i, - g_map_cache[i]->ref_count); + fprintf(stderr, + "ERROR: cannot release map cache — entry %d still has %d live env(s). " + "Close all Drive instances before changing map config.\n", + i, g_map_cache[i]->ref_count); + return; // Refuse to free — callers must close envs first } + } + } + for (int i = 0; i < g_map_cache_size; i++) { + if (g_map_cache[i] != NULL) { free_shared_map_data(g_map_cache[i]); } } @@ -220,6 +227,14 @@ static PyObject *my_shared(PyObject *self, PyObject *args, PyObject *kwargs) { if (!reuse_cache) { release_map_cache_internal(); + if (g_map_cache != NULL) { + // release_map_cache_internal refused to free because envs are still alive. + // Cannot change map config while Drive instances exist. + PyErr_SetString(PyExc_RuntimeError, + "Cannot change map cache config while Drive environments are still open. " + "Call close() on all Drive instances first."); + return NULL; + } g_map_cache_size = num_maps; g_map_cache = (SharedMapData **)calloc(num_maps, sizeof(SharedMapData *)); g_map_cache_pid = getpid(); @@ -483,6 +498,11 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { } // Fallback: load from disk (standalone use, tests, rendering, etc.) + // If map_id was provided but cache miss occurred, warn — this likely indicates a bug. + if (map_id_obj != NULL) { + fprintf(stderr, "WARNING: map_id=%d provided but cache miss — falling back to disk loading\n", + (int)PyLong_AsLong(map_id_obj)); + } init(env); return 0; } From 49e3c9e60880238b3bacb272f170824baee1e09c Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 22 Mar 2026 19:57:00 -0400 Subject: [PATCH 17/17] Fix: diversify seed per sub-env in env_init All sub-envs were getting srand(seed) with the same seed, causing identical spawn positions across all agents. Now passes seed+i so each sub-env gets a unique rand() sequence for spawn_agent. Co-Authored-By: Claude Opus 4.6 (1M context) --- pufferlib/ocean/drive/drive.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 6f970fa408..ecd0b00c47 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -344,7 +344,7 @@ def __init__( self.rewards[cur:nxt], self.terminals[cur:nxt], self.truncations[cur:nxt], - seed, + seed + i, # unique seed per sub-env so spawn positions differ action_type=self._action_type_flag, human_agent_idx=human_agent_idx, reward_vehicle_collision=reward_vehicle_collision, @@ -503,7 +503,7 @@ def resample_maps(self): self.rewards[cur:nxt], self.terminals[cur:nxt], self.truncations[cur:nxt], - seed, + seed + i, # unique seed per sub-env so spawn positions differ action_type=self._action_type_flag, human_agent_idx=self.human_agent_idx, reward_vehicle_collision=self.reward_vehicle_collision,