From 306d2238695f18d47bc00b76ce955cb3381bf9b7 Mon Sep 17 00:00:00 2001 From: Gmymymy Date: Fri, 22 May 2026 15:05:25 +0800 Subject: [PATCH] bvar: fix bad_weak_ptr crash in AgentCombiner::get_or_create_tls_agent When a bvar (Reducer/IntRecorder/Percentile) is destroyed while another thread is concurrently writing to it, the AgentCombiner may no longer be managed by any shared_ptr by the time get_or_create_tls_agent() calls shared_from_this(). This causes std::bad_weak_ptr to be thrown and the process to terminate. Fix this by wrapping shared_from_this() in a try-catch: if the combiner is no longer alive, silently return NULL and skip the recording. This is safe because the metric is being torn down anyway. Also remove the now-incorrect LOG(FATAL) in the three operator<< callers. For allocation failures, get_or_create_tls_agent() already calls LOG(FATAL) internally (and aborts); the outer LOG(FATAL) was unreachable in that case and would incorrectly abort the process for the combiner-expired case. Fixes #3288 --- src/bvar/detail/combiner.h | 11 ++++++++++- src/bvar/detail/percentile.cpp | 1 - src/bvar/recorder.h | 1 - src/bvar/reducer.h | 1 - 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/bvar/detail/combiner.h b/src/bvar/detail/combiner.h index 3007f50da8..8a03dab641 100644 --- a/src/bvar/detail/combiner.h +++ b/src/bvar/detail/combiner.h @@ -341,7 +341,16 @@ friend class GlobalValue; if (!agent->combiner.expired()) { return agent; } - agent->reset(_element_identity, this->shared_from_this()); + self_shared_type self; + try { + self = this->shared_from_this(); + } catch (const std::bad_weak_ptr&) { + // The combiner is no longer managed by any shared_ptr, which can + // happen when a bvar is being destroyed concurrently with a write. + // Silently skip this recording instead of crashing. + return NULL; + } + agent->reset(_element_identity, self); // TODO: Is uniqueness-checking necessary here? { butil::AutoLock guard(_lock); diff --git a/src/bvar/detail/percentile.cpp b/src/bvar/detail/percentile.cpp index 99de328c42..e0b24a7d1a 100644 --- a/src/bvar/detail/percentile.cpp +++ b/src/bvar/detail/percentile.cpp @@ -108,7 +108,6 @@ Percentile::value_type Percentile::get_value() const { Percentile &Percentile::operator<<(int64_t latency) { agent_type* agent = _combiner->get_or_create_tls_agent(); if (BAIDU_UNLIKELY(!agent)) { - LOG(FATAL) << "Fail to create agent"; return *this; } if (latency < 0) { diff --git a/src/bvar/recorder.h b/src/bvar/recorder.h index b28b6372f6..ce80278dfa 100644 --- a/src/bvar/recorder.h +++ b/src/bvar/recorder.h @@ -269,7 +269,6 @@ inline IntRecorder& IntRecorder::operator<<(int64_t sample) { } agent_type* agent = _combiner->get_or_create_tls_agent(); if (BAIDU_UNLIKELY(!agent)) { - LOG(FATAL) << "Fail to create agent"; return *this; } uint64_t n; diff --git a/src/bvar/reducer.h b/src/bvar/reducer.h index 543e77c8b0..203d5cde0e 100644 --- a/src/bvar/reducer.h +++ b/src/bvar/reducer.h @@ -303,7 +303,6 @@ inline Reducer& Reducer::operator<<( // It's wait-free for most time agent_type* agent = _combiner->get_or_create_tls_agent(); if (__builtin_expect(!agent, 0)) { - LOG(FATAL) << "Fail to create agent"; return *this; } agent->element.modify(_combiner->op(), value);