Skip to content

Commit 003a628

Browse files
committed
refactor: replace dual rotation timers with single wall-clock check
Remove the monotonic 20h rotation interval and the supplementary wall-clock check. Instead, use a single periodic check (every 5 min) that compares wall-clock time against the certificate's not_after. Also derive the expiry buffer from the certificate lifetime (1/6) so it scales if the lifetime ever changes, and add comments documenting the relationship between lifetime and check interval.
1 parent 476ce29 commit 003a628

File tree

1 file changed

+31
-38
lines changed
  • crates/stackable-webhook/src/tls

1 file changed

+31
-38
lines changed

crates/stackable-webhook/src/tls/mod.rs

Lines changed: 31 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,25 @@ use crate::{
3939
mod cert_resolver;
4040

4141
pub const WEBHOOK_CA_LIFETIME: Duration = Duration::from_hours_unchecked(24);
42-
pub const WEBHOOK_CERTIFICATE_LIFETIME: Duration = Duration::from_hours_unchecked(24);
43-
pub const WEBHOOK_CERTIFICATE_ROTATION_INTERVAL: Duration = Duration::from_hours_unchecked(20);
4442

45-
/// How often to check wall-clock time for certificate expiry (5 minutes).
46-
/// This catches clock drift from system hibernation, VM migration, etc.
47-
const WALL_CLOCK_CHECK_INTERVAL: Duration = Duration::from_minutes_unchecked(5);
48-
49-
/// Rotate the certificate when it is within this buffer of expiry according
50-
/// to wall-clock time (4 hours before the 24h certificate expires).
51-
const WALL_CLOCK_EXPIRY_BUFFER: Duration = Duration::from_hours_unchecked(4);
43+
/// The wall-clock lifetime of generated webhook certificates. If this is ever
44+
/// reduced, ensure it stays well above [`CERTIFICATE_ROTATION_CHECK_INTERVAL`]
45+
/// (currently 5 minutes), otherwise the certificate could expire between checks.
46+
const WEBHOOK_CERTIFICATE_LIFETIME_HOURS: u64 = 24;
47+
pub const WEBHOOK_CERTIFICATE_LIFETIME: Duration =
48+
Duration::from_hours_unchecked(WEBHOOK_CERTIFICATE_LIFETIME_HOURS);
49+
50+
/// How often to check whether the certificate needs rotation. This is
51+
/// intentionally independent of the certificate lifetime — it controls how
52+
/// quickly we detect wall-clock drift (from hibernation, VM migration, etc.),
53+
/// not how long the certificate lives.
54+
const CERTIFICATE_ROTATION_CHECK_INTERVAL: Duration = Duration::from_minutes_unchecked(5);
55+
56+
/// Rotate the certificate when less than 1/6 of its lifetime remains
57+
/// (4 hours for the current 24h lifetime). Derived from
58+
/// [`WEBHOOK_CERTIFICATE_LIFETIME`] so it scales if the lifetime changes.
59+
const CERTIFICATE_EXPIRY_BUFFER: Duration =
60+
Duration::from_minutes_unchecked(WEBHOOK_CERTIFICATE_LIFETIME_HOURS * 60 / 6);
5261

5362
pub type Result<T, E = TlsServerError> = std::result::Result<T, E>;
5463

@@ -161,15 +170,12 @@ impl TlsServer {
161170
router,
162171
} = self;
163172

164-
let start = tokio::time::Instant::now() + *WEBHOOK_CERTIFICATE_ROTATION_INTERVAL;
165-
let mut interval = tokio::time::interval_at(start, *WEBHOOK_CERTIFICATE_ROTATION_INTERVAL);
166-
167-
// A separate, shorter interval to check wall-clock time against the
168-
// certificate's not_after. This catches monotonic/wall-clock drift
169-
// caused by hibernation, VM migration, cgroup freezing, etc.
170-
let wall_clock_check_start = tokio::time::Instant::now() + *WALL_CLOCK_CHECK_INTERVAL;
171-
let mut wall_clock_check =
172-
tokio::time::interval_at(wall_clock_check_start, *WALL_CLOCK_CHECK_INTERVAL);
173+
// Periodically check whether the certificate needs rotation based on
174+
// wall-clock time. This avoids the monotonic vs wall-clock drift problem
175+
// that can occur during hibernation, VM migration, or cgroup freezing.
176+
let check_start = tokio::time::Instant::now() + *CERTIFICATE_ROTATION_CHECK_INTERVAL;
177+
let mut rotation_check =
178+
tokio::time::interval_at(check_start, *CERTIFICATE_ROTATION_CHECK_INTERVAL);
173179

174180
let tls_acceptor = TlsAcceptor::from(Arc::new(config));
175181
let tcp_listener = TcpListener::bind(socket_addr)
@@ -198,11 +204,10 @@ impl TlsServer {
198204
loop {
199205
let tls_acceptor = tls_acceptor.clone();
200206

201-
// Wait for either a new TCP connection or the certificate rotation interval tick
202207
tokio::select! {
203208
// We opt for a biased execution of arms to make sure we always check if a
204-
// shutdown signal was received or the certificate needs rotation based on the
205-
// interval. This ensures, we always use a valid certificate for the TLS connection.
209+
// shutdown signal was received or the certificate needs rotation before
210+
// accepting new connections.
206211
biased;
207212

208213
// Once a shutdown signal is received (this future becomes `Poll::Ready`), break out
@@ -213,27 +218,15 @@ impl TlsServer {
213218
break;
214219
}
215220

216-
// This is cancellation-safe. If this branch is cancelled, the tick is NOT consumed.
217-
// As such, we will not miss rotating the certificate.
218-
_ = interval.tick() => {
219-
cert_resolver
220-
.rotate_certificate()
221-
.await
222-
.context(RotateCertificateSnafu)?
223-
}
224-
225-
// Wall-clock check: detect if the certificate is near expiry
226-
// even though the monotonic rotation interval hasn't fired yet.
227-
// This handles clock drift from hibernation, VM migration, etc.
228-
_ = wall_clock_check.tick() => {
229-
if cert_resolver.needs_rotation(*WALL_CLOCK_EXPIRY_BUFFER) {
230-
tracing::info!("wall-clock check detected certificate near expiry, rotating early");
221+
// Check wall-clock time to decide if the certificate needs rotation.
222+
// This is cancellation-safe: if cancelled, the tick is NOT consumed.
223+
_ = rotation_check.tick() => {
224+
if cert_resolver.needs_rotation(*CERTIFICATE_EXPIRY_BUFFER) {
225+
tracing::info!("certificate approaching expiry, rotating");
231226
cert_resolver
232227
.rotate_certificate()
233228
.await
234229
.context(RotateCertificateSnafu)?;
235-
// Reset the monotonic interval so we don't double-rotate
236-
interval.reset();
237230
}
238231
}
239232

0 commit comments

Comments
 (0)