From edae76052611f9ba0dacee4a29110d331eacfb68 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 4 May 2026 13:38:29 -0400 Subject: [PATCH] Log avg_distance_per_infraction (port from emerge/temp_training) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the metric: avg_distance_per_infraction = total_fleet_distance / max(1, total_fleet_infractions) which tracks how far agents drive between offroad/collision/red-light events — a useful single-scalar driving-quality signal for wandb. The two underlying log fields already exist in puffer-4 and are already aggregated per-step in add_log; only the binding-side ratio was missing. my_log gains an `n` parameter and multiplies the per-agent-normalized log->total_* fields by n to recover raw fleet totals. The ratio itself is invariant to the 1/n scaling, but the un-normalization makes the fmaxf(1.0f, total_infractions) clamp behave correctly: it floors the denominator at "one infraction across the whole fleet", so a window with zero infractions reports total fleet distance instead of distance/epsilon. Both static_vec_log call sites in vecenv.h pass n. Co-Authored-By: Claude Opus 4.7 (1M context) --- sim/binding.c | 16 +++++++++++++++- src/vecenv.h | 6 +++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/sim/binding.c b/sim/binding.c index df5841ba11..83a354f3d7 100644 --- a/sim/binding.c +++ b/sim/binding.c @@ -182,7 +182,20 @@ void my_init(Env *env, Dict *kwargs) { } } -void my_log(Log *log, Dict *out) { +void my_log(Log *log, Dict *out, float n) { + // static_vec_aggregate_logs divides every Log field by total agent count n, + // so log->total_* are per-agent rates here. We want the fleet ratio + // sum_distance / sum_infractions; the n cancels in that ratio so we + // could compute it directly from the rates. We multiply by n anyway + // to recover the raw fleet totals: this makes the fmaxf(1.0f, ...) + // clamp meaningful (it floors the denominator at "1 infraction across + // the whole fleet"), so a zero-infraction window reports total fleet + // distance instead of distance / epsilon = absurd value. + float total_distance_travelled = log->total_distance_travelled * n; + float total_infractions = log->total_infractions * n; + float avg_distance_per_infraction = + total_distance_travelled / fmaxf(1.0f, total_infractions); + dict_set(out, "score", log->score); dict_set(out, "episode_return", log->episode_return); dict_set(out, "episode_length", log->episode_length); @@ -191,6 +204,7 @@ void my_log(Log *log, Dict *out) { dict_set(out, "num_goals_reached", log->num_goals_reached); dict_set(out, "avg_speed_per_agent", log->avg_speed_per_agent); dict_set(out, "dnf_rate", log->dnf_rate); + dict_set(out, "avg_distance_per_infraction", avg_distance_per_infraction); dict_set(out, "n", log->n); } diff --git a/src/vecenv.h b/src/vecenv.h index 067fda8027..c88fe79b28 100644 --- a/src/vecenv.h +++ b/src/vecenv.h @@ -238,7 +238,7 @@ extern const char* cudaGetErrorString(cudaError_t); // Forward declare env-provided functions (defined in binding.c after this include) void my_init(Env* env, Dict* kwargs); -void my_log(Log* log, Dict* out); +void my_log(Log* log, Dict* out, float n); void my_env_constants(void* env, Dict* out); struct StaticThreading { @@ -643,7 +643,7 @@ void static_vec_log(StaticVec* vec, Dict* out) { for (int i = 0; i < vec->size; i++) { memset(&envs[i].log, 0, sizeof(Log)); } - my_log(&aggregate, out); + my_log(&aggregate, out, n); dict_set(out, "n", n); } @@ -653,7 +653,7 @@ void static_vec_eval_log(StaticVec* vec, Dict* out) { if (n == 0) { return; } - my_log(&aggregate, out); + my_log(&aggregate, out, n); dict_set(out, "n", n); }