@@ -39,16 +39,25 @@ use crate::{
3939mod cert_resolver;
4040
4141pub const WEBHOOK_CA_LIFETIME : Duration = Duration :: from_hours_unchecked ( 24 ) ;
42- pub const WEBHOOK_CERTIFICATE_LIFETIME : Duration = Duration :: from_hours_unchecked ( 24 ) ;
43- pub const WEBHOOK_CERTIFICATE_ROTATION_INTERVAL : Duration = Duration :: from_hours_unchecked ( 20 ) ;
4442
45- /// How often to check wall-clock time for certificate expiry (5 minutes).
46- /// This catches clock drift from system hibernation, VM migration, etc.
47- const WALL_CLOCK_CHECK_INTERVAL : Duration = Duration :: from_minutes_unchecked ( 5 ) ;
48-
49- /// Rotate the certificate when it is within this buffer of expiry according
50- /// to wall-clock time (4 hours before the 24h certificate expires).
51- const WALL_CLOCK_EXPIRY_BUFFER : Duration = Duration :: from_hours_unchecked ( 4 ) ;
43+ /// The wall-clock lifetime of generated webhook certificates. If this is ever
44+ /// reduced, ensure it stays well above [`CERTIFICATE_ROTATION_CHECK_INTERVAL`]
45+ /// (currently 5 minutes), otherwise the certificate could expire between checks.
46+ const WEBHOOK_CERTIFICATE_LIFETIME_HOURS : u64 = 24 ;
47+ pub const WEBHOOK_CERTIFICATE_LIFETIME : Duration =
48+ Duration :: from_hours_unchecked ( WEBHOOK_CERTIFICATE_LIFETIME_HOURS ) ;
49+
50+ /// How often to check whether the certificate needs rotation. This is
51+ /// intentionally independent of the certificate lifetime — it controls how
52+ /// quickly we detect wall-clock drift (from hibernation, VM migration, etc.),
53+ /// not how long the certificate lives.
54+ const CERTIFICATE_ROTATION_CHECK_INTERVAL : Duration = Duration :: from_minutes_unchecked ( 5 ) ;
55+
56+ /// Rotate the certificate when less than 1/6 of its lifetime remains
57+ /// (4 hours for the current 24h lifetime). Derived from
58+ /// [`WEBHOOK_CERTIFICATE_LIFETIME`] so it scales if the lifetime changes.
59+ const CERTIFICATE_EXPIRY_BUFFER : Duration =
60+ Duration :: from_minutes_unchecked ( WEBHOOK_CERTIFICATE_LIFETIME_HOURS * 60 / 6 ) ;
5261
5362pub type Result < T , E = TlsServerError > = std:: result:: Result < T , E > ;
5463
@@ -161,15 +170,12 @@ impl TlsServer {
161170 router,
162171 } = self ;
163172
164- let start = tokio:: time:: Instant :: now ( ) + * WEBHOOK_CERTIFICATE_ROTATION_INTERVAL ;
165- let mut interval = tokio:: time:: interval_at ( start, * WEBHOOK_CERTIFICATE_ROTATION_INTERVAL ) ;
166-
167- // A separate, shorter interval to check wall-clock time against the
168- // certificate's not_after. This catches monotonic/wall-clock drift
169- // caused by hibernation, VM migration, cgroup freezing, etc.
170- let wall_clock_check_start = tokio:: time:: Instant :: now ( ) + * WALL_CLOCK_CHECK_INTERVAL ;
171- let mut wall_clock_check =
172- tokio:: time:: interval_at ( wall_clock_check_start, * WALL_CLOCK_CHECK_INTERVAL ) ;
173+ // Periodically check whether the certificate needs rotation based on
174+ // wall-clock time. This avoids the monotonic vs wall-clock drift problem
175+ // that can occur during hibernation, VM migration, or cgroup freezing.
176+ let check_start = tokio:: time:: Instant :: now ( ) + * CERTIFICATE_ROTATION_CHECK_INTERVAL ;
177+ let mut rotation_check =
178+ tokio:: time:: interval_at ( check_start, * CERTIFICATE_ROTATION_CHECK_INTERVAL ) ;
173179
174180 let tls_acceptor = TlsAcceptor :: from ( Arc :: new ( config) ) ;
175181 let tcp_listener = TcpListener :: bind ( socket_addr)
@@ -198,11 +204,10 @@ impl TlsServer {
198204 loop {
199205 let tls_acceptor = tls_acceptor. clone ( ) ;
200206
201- // Wait for either a new TCP connection or the certificate rotation interval tick
202207 tokio:: select! {
203208 // We opt for a biased execution of arms to make sure we always check if a
204- // shutdown signal was received or the certificate needs rotation based on the
205- // interval. This ensures, we always use a valid certificate for the TLS connection .
209+ // shutdown signal was received or the certificate needs rotation before
210+ // accepting new connections .
206211 biased;
207212
208213 // Once a shutdown signal is received (this future becomes `Poll::Ready`), break out
@@ -213,27 +218,15 @@ impl TlsServer {
213218 break ;
214219 }
215220
216- // This is cancellation-safe. If this branch is cancelled, the tick is NOT consumed.
217- // As such, we will not miss rotating the certificate.
218- _ = interval. tick( ) => {
219- cert_resolver
220- . rotate_certificate( )
221- . await
222- . context( RotateCertificateSnafu ) ?
223- }
224-
225- // Wall-clock check: detect if the certificate is near expiry
226- // even though the monotonic rotation interval hasn't fired yet.
227- // This handles clock drift from hibernation, VM migration, etc.
228- _ = wall_clock_check. tick( ) => {
229- if cert_resolver. needs_rotation( * WALL_CLOCK_EXPIRY_BUFFER ) {
230- tracing:: info!( "wall-clock check detected certificate near expiry, rotating early" ) ;
221+ // Check wall-clock time to decide if the certificate needs rotation.
222+ // This is cancellation-safe: if cancelled, the tick is NOT consumed.
223+ _ = rotation_check. tick( ) => {
224+ if cert_resolver. needs_rotation( * CERTIFICATE_EXPIRY_BUFFER ) {
225+ tracing:: info!( "certificate approaching expiry, rotating" ) ;
231226 cert_resolver
232227 . rotate_certificate( )
233228 . await
234229 . context( RotateCertificateSnafu ) ?;
235- // Reset the monotonic interval so we don't double-rotate
236- interval. reset( ) ;
237230 }
238231 }
239232
0 commit comments