From 538955916b498c1459c551d7136dbc3cb17d54fb Mon Sep 17 00:00:00 2001 From: way zheng Date: Mon, 13 Oct 2025 16:11:30 -0700 Subject: [PATCH 01/22] Implement keyset key fail-fast feature with 7-day timeout - Update Main.java to wire keysetkey verticle callback to shutdown handler - Add Consumer import and overloaded createAndDeployRotatingStoreVerticle method - Update KeyManager to accept keyAvailabilityHandler callback - Call handler on successful/failed key retrieval in getMasterKey/getRefreshKey - Add 7-day timeout parameter to OperatorShutdownHandler constructor - Maintains backward compatibility with existing constructors This enables the operator to shut down after 7 days of consecutive keyset key sync failures, allowing Kubernetes to restart and potentially recover. --- src/main/java/com/uid2/operator/Main.java | 17 +++++-- .../com/uid2/operator/model/KeyManager.java | 45 +++++++++++++++---- 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/uid2/operator/Main.java b/src/main/java/com/uid2/operator/Main.java index c4fe54c7e..19855826c 100644 --- a/src/main/java/com/uid2/operator/Main.java +++ b/src/main/java/com/uid2/operator/Main.java @@ -61,6 +61,7 @@ import java.time.Duration; import java.time.Instant; import java.util.*; +import java.util.function.Consumer; import java.util.function.Supplier; import static com.uid2.operator.Const.Config.EnableRemoteConfigProp; @@ -114,7 +115,9 @@ public Main(Vertx vertx, JsonObject config) throws Exception { this.clientSideTokenGenerate = config.getBoolean(Const.Config.EnableClientSideTokenGenerate, false); this.validateServiceLinks = config.getBoolean(Const.Config.ValidateServiceLinks, false); this.encryptedCloudFilesEnabled = config.getBoolean(Const.Config.EncryptedFiles, false); - this.shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(config.getInteger(Const.Config.SaltsExpiredShutdownHours, 12)), Clock.systemUTC(), new ShutdownService()); + this.shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), + Duration.ofHours(config.getInteger(Const.Config.SaltsExpiredShutdownHours, 12)), Duration.ofDays(7), + Clock.systemUTC(), new ShutdownService()); this.uidInstanceIdProvider = new UidInstanceIdProvider(config); String coreAttestUrl = this.config.getString(Const.Config.CoreAttestUrlProp); @@ -243,7 +246,8 @@ public Main(Vertx vertx, JsonObject config) throws Exception { } private KeyManager getKeyManager() { - return new KeyManager(this.keysetKeyStore, this.keysetProvider); + return new KeyManager(this.keysetKeyStore, this.keysetProvider, + hasKeys -> shutdownHandler.handleKeysetKeyRefreshResponse(hasKeys)); } public static void recordStartupComplete() { @@ -420,7 +424,8 @@ private Future createStoreVerticles() throws Exception { } fs.add(createAndDeployRotatingStoreVerticle("auth", clientKeyProvider, "auth_refresh_ms")); fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms")); - fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms", + this.shutdownHandler::handleKeysetKeyRefreshResponse)); fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms")); fs.add(createAndDeployCloudSyncStoreVerticle("optout", fsOptOut, optOutCloudSync)); CompositeFuture.all(fs).onComplete(ar -> { @@ -433,9 +438,13 @@ private Future createStoreVerticles() throws Exception { } private Future createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs) { + return createAndDeployRotatingStoreVerticle(name, store, storeRefreshConfigMs, null); + } + + private Future createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs, Consumer refreshCallback) { final int intervalMs = config.getInteger(storeRefreshConfigMs, 10000); - RotatingStoreVerticle rotatingStoreVerticle = new RotatingStoreVerticle(name, intervalMs, store); + RotatingStoreVerticle rotatingStoreVerticle = new RotatingStoreVerticle(name, intervalMs, store, refreshCallback); return vertx.deployVerticle(rotatingStoreVerticle); } diff --git a/src/main/java/com/uid2/operator/model/KeyManager.java b/src/main/java/com/uid2/operator/model/KeyManager.java index 19bae8d07..b03c1b4f4 100644 --- a/src/main/java/com/uid2/operator/model/KeyManager.java +++ b/src/main/java/com/uid2/operator/model/KeyManager.java @@ -13,16 +13,23 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.function.Consumer; import java.util.stream.Collectors; public class KeyManager { private static final Logger LOGGER = LoggerFactory.getLogger(UIDOperatorVerticle.class); private final IKeysetKeyStore keysetKeyStore; private final RotatingKeysetProvider keysetProvider; + private final Consumer keyAvailabilityHandler; public KeyManager(IKeysetKeyStore keysetKeyStore, RotatingKeysetProvider keysetProvider) { + this(keysetKeyStore, keysetProvider, null); + } + + public KeyManager(IKeysetKeyStore keysetKeyStore, RotatingKeysetProvider keysetProvider, Consumer keyAvailabilityHandler) { this.keysetKeyStore = keysetKeyStore; this.keysetProvider = keysetProvider; + this.keyAvailabilityHandler = keyAvailabilityHandler; } public KeyManagerSnapshot getKeyManagerSnapshot(int siteId) { @@ -105,11 +112,22 @@ public KeysetKey getMasterKey() { } public KeysetKey getMasterKey(Instant asOf) { - KeysetKey key = this.keysetKeyStore.getSnapshot().getActiveKey(Const.Data.MasterKeysetId, asOf); - if (key == null) { - throw new NoActiveKeyException(String.format("Cannot get a master key with keyset ID %d.", Const.Data.MasterKeysetId)); + try { + KeysetKey key = this.keysetKeyStore.getSnapshot().getActiveKey(Const.Data.MasterKeysetId, asOf); + if (key == null) { + throw new NoActiveKeyException(String.format("Cannot get a master key with keyset ID %d.", Const.Data.MasterKeysetId)); + } + // Reset timer on successful key retrieval + if (keyAvailabilityHandler != null) { + keyAvailabilityHandler.accept(true); + } + return key; + } catch (NoActiveKeyException e) { + if (keyAvailabilityHandler != null) { + keyAvailabilityHandler.accept(false); + } + throw e; } - return key; } public KeysetKey getRefreshKey() { @@ -117,11 +135,22 @@ public KeysetKey getRefreshKey() { } public KeysetKey getRefreshKey(Instant asOf) { - KeysetKey key = this.keysetKeyStore.getSnapshot().getActiveKey(Const.Data.RefreshKeysetId, asOf); - if (key == null) { - throw new NoActiveKeyException(String.format("Cannot get a refresh key with keyset ID %d.", Const.Data.RefreshKeysetId)); + try { + KeysetKey key = this.keysetKeyStore.getSnapshot().getActiveKey(Const.Data.RefreshKeysetId, asOf); + if (key == null) { + throw new NoActiveKeyException(String.format("Cannot get a refresh key with keyset ID %d.", Const.Data.RefreshKeysetId)); + } + // Reset timer on successful key retrieval + if (keyAvailabilityHandler != null) { + keyAvailabilityHandler.accept(true); + } + return key; + } catch (NoActiveKeyException e) { + if (keyAvailabilityHandler != null) { + keyAvailabilityHandler.accept(false); + } + throw e; } - return key; } public static class NoActiveKeyException extends RuntimeException { From 6729f3cabedb48395b08703cebe02f4528647b18 Mon Sep 17 00:00:00 2001 From: way zheng Date: Mon, 13 Oct 2025 22:15:51 -0700 Subject: [PATCH 02/22] Add keysetkey fail-fast logic and handleKeysetKeyRefreshResponse --- .../vertx/OperatorShutdownHandler.java | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 84075fb03..8f6bdb5f5 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -16,17 +16,28 @@ public class OperatorShutdownHandler { private static final Logger LOGGER = LoggerFactory.getLogger(OperatorShutdownHandler.class); private static final int SALT_FAILURE_LOG_INTERVAL_MINUTES = 10; + private static final int KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES = 10; private final Duration attestShutdownWaitTime; private final Duration saltShutdownWaitTime; + private final Duration keysetKeyShutdownWaitTime; private final AtomicReference attestFailureStartTime = new AtomicReference<>(null); private final AtomicReference saltFailureStartTime = new AtomicReference<>(null); + private final AtomicReference keysetKeyFailureStartTime = new AtomicReference<>(null); private final AtomicReference lastSaltFailureLogTime = new AtomicReference<>(null); + private final AtomicReference lastKeysetKeyFailureLogTime = new AtomicReference<>(null); private final Clock clock; private final ShutdownService shutdownService; - public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime, Clock clock, ShutdownService shutdownService) { + public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime, Clock clock, + ShutdownService shutdownService) { + this(attestShutdownWaitTime, saltShutdownWaitTime, Duration.ofDays(7), clock, shutdownService); + } + + public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime, + Duration keysetKeyShutdownWaitTime, Clock clock, ShutdownService shutdownService) { this.attestShutdownWaitTime = attestShutdownWaitTime; this.saltShutdownWaitTime = saltShutdownWaitTime; + this.keysetKeyShutdownWaitTime = keysetKeyShutdownWaitTime; this.clock = clock; this.shutdownService = shutdownService; } @@ -54,6 +65,39 @@ public void logSaltFailureAtInterval() { } } + public void handleKeysetKeyRefreshResponse(Boolean success) { + if (success) { + Instant previousFailureTime = keysetKeyFailureStartTime.getAndSet(null); + if (previousFailureTime != null) { + Duration failureDuration = Duration.between(previousFailureTime, clock.instant()); + LOGGER.info("keyset keys sync recovered after {} ({}d {}h {}m). shutdown timer reset.", + failureDuration, + failureDuration.toDays(), + failureDuration.toHoursPart(), + failureDuration.toMinutesPart()); + } + } else { + logKeysetKeyFailureAtInterval(); + Instant t = keysetKeyFailureStartTime.get(); + if (t == null) { + keysetKeyFailureStartTime.set(clock.instant()); + LOGGER.warn( + "keyset keys sync started failing. shutdown timer started (will shutdown in 7 days if not recovered)"); + } else if (Duration.between(t, clock.instant()).compareTo(this.keysetKeyShutdownWaitTime) > 0) { + LOGGER.error("keyset keys have been failing to sync for too long. shutting down operator"); + this.shutdownService.Shutdown(1); + } + } + } + + public void logKeysetKeyFailureAtInterval() { + Instant t = lastKeysetKeyFailureLogTime.get(); + if (t == null || clock.instant().isAfter(t.plus(KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES, ChronoUnit.MINUTES))) { + LOGGER.error("keyset keys sync failing"); + lastKeysetKeyFailureLogTime.set(Instant.now()); + } + } + public void handleAttestResponse(Pair response) { if (response.left() == AttestationResponseCode.AttestationFailure) { LOGGER.error("core attestation failed with AttestationFailure, shutting down operator, core response: {}", response.right()); From 651898f4fa4af7b3a77882073aaac46a54e7bd20 Mon Sep 17 00:00:00 2001 From: way zheng Date: Mon, 13 Oct 2025 23:26:22 -0700 Subject: [PATCH 03/22] Remove unnecessary KeyManager callback - RotatingStoreVerticle handles monitoring --- src/main/java/com/uid2/operator/Main.java | 3 +- .../com/uid2/operator/model/KeyManager.java | 45 ++++--------------- 2 files changed, 9 insertions(+), 39 deletions(-) diff --git a/src/main/java/com/uid2/operator/Main.java b/src/main/java/com/uid2/operator/Main.java index 19855826c..3bcbd5fea 100644 --- a/src/main/java/com/uid2/operator/Main.java +++ b/src/main/java/com/uid2/operator/Main.java @@ -246,8 +246,7 @@ public Main(Vertx vertx, JsonObject config) throws Exception { } private KeyManager getKeyManager() { - return new KeyManager(this.keysetKeyStore, this.keysetProvider, - hasKeys -> shutdownHandler.handleKeysetKeyRefreshResponse(hasKeys)); + return new KeyManager(this.keysetKeyStore, this.keysetProvider); } public static void recordStartupComplete() { diff --git a/src/main/java/com/uid2/operator/model/KeyManager.java b/src/main/java/com/uid2/operator/model/KeyManager.java index b03c1b4f4..19bae8d07 100644 --- a/src/main/java/com/uid2/operator/model/KeyManager.java +++ b/src/main/java/com/uid2/operator/model/KeyManager.java @@ -13,23 +13,16 @@ import java.util.Comparator; import java.util.List; import java.util.Map; -import java.util.function.Consumer; import java.util.stream.Collectors; public class KeyManager { private static final Logger LOGGER = LoggerFactory.getLogger(UIDOperatorVerticle.class); private final IKeysetKeyStore keysetKeyStore; private final RotatingKeysetProvider keysetProvider; - private final Consumer keyAvailabilityHandler; public KeyManager(IKeysetKeyStore keysetKeyStore, RotatingKeysetProvider keysetProvider) { - this(keysetKeyStore, keysetProvider, null); - } - - public KeyManager(IKeysetKeyStore keysetKeyStore, RotatingKeysetProvider keysetProvider, Consumer keyAvailabilityHandler) { this.keysetKeyStore = keysetKeyStore; this.keysetProvider = keysetProvider; - this.keyAvailabilityHandler = keyAvailabilityHandler; } public KeyManagerSnapshot getKeyManagerSnapshot(int siteId) { @@ -112,22 +105,11 @@ public KeysetKey getMasterKey() { } public KeysetKey getMasterKey(Instant asOf) { - try { - KeysetKey key = this.keysetKeyStore.getSnapshot().getActiveKey(Const.Data.MasterKeysetId, asOf); - if (key == null) { - throw new NoActiveKeyException(String.format("Cannot get a master key with keyset ID %d.", Const.Data.MasterKeysetId)); - } - // Reset timer on successful key retrieval - if (keyAvailabilityHandler != null) { - keyAvailabilityHandler.accept(true); - } - return key; - } catch (NoActiveKeyException e) { - if (keyAvailabilityHandler != null) { - keyAvailabilityHandler.accept(false); - } - throw e; + KeysetKey key = this.keysetKeyStore.getSnapshot().getActiveKey(Const.Data.MasterKeysetId, asOf); + if (key == null) { + throw new NoActiveKeyException(String.format("Cannot get a master key with keyset ID %d.", Const.Data.MasterKeysetId)); } + return key; } public KeysetKey getRefreshKey() { @@ -135,22 +117,11 @@ public KeysetKey getRefreshKey() { } public KeysetKey getRefreshKey(Instant asOf) { - try { - KeysetKey key = this.keysetKeyStore.getSnapshot().getActiveKey(Const.Data.RefreshKeysetId, asOf); - if (key == null) { - throw new NoActiveKeyException(String.format("Cannot get a refresh key with keyset ID %d.", Const.Data.RefreshKeysetId)); - } - // Reset timer on successful key retrieval - if (keyAvailabilityHandler != null) { - keyAvailabilityHandler.accept(true); - } - return key; - } catch (NoActiveKeyException e) { - if (keyAvailabilityHandler != null) { - keyAvailabilityHandler.accept(false); - } - throw e; + KeysetKey key = this.keysetKeyStore.getSnapshot().getActiveKey(Const.Data.RefreshKeysetId, asOf); + if (key == null) { + throw new NoActiveKeyException(String.format("Cannot get a refresh key with keyset ID %d.", Const.Data.RefreshKeysetId)); } + return key; } public static class NoActiveKeyException extends RuntimeException { From 4ad136e80516a79ba82a9cb86f48657414d2d13d Mon Sep 17 00:00:00 2001 From: way zheng Date: Mon, 13 Oct 2025 23:28:33 -0700 Subject: [PATCH 04/22] Update uid2-shared version reference --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 40af0e576..25e46f27f 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ 2.1.0 2.1.13 2.1.0 - 11.1.69 + 11.1.70-alpha-314-SNAPSHOT ${project.version} 21 21 From ef241d0af51d3eee880cedc4e3a5ad8d98079ae1 Mon Sep 17 00:00:00 2001 From: way zheng Date: Mon, 13 Oct 2025 23:40:11 -0700 Subject: [PATCH 05/22] Enable DEBUG logging for OperatorShutdownHandler --- conf/logback.xml | 2 ++ .../java/com/uid2/operator/vertx/OperatorShutdownHandler.java | 2 ++ 2 files changed, 4 insertions(+) diff --git a/conf/logback.xml b/conf/logback.xml index bdd1f5726..25aa78c9c 100644 --- a/conf/logback.xml +++ b/conf/logback.xml @@ -9,6 +9,8 @@ + + diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 8f6bdb5f5..6d55fc834 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -75,6 +75,8 @@ public void handleKeysetKeyRefreshResponse(Boolean success) { failureDuration.toDays(), failureDuration.toHoursPart(), failureDuration.toMinutesPart()); + } else { + LOGGER.debug("keyset keys sync successful - timer remains null"); } } else { logKeysetKeyFailureAtInterval(); From 246f71615ce6e364de021c34d824d4abdaf5ac2c Mon Sep 17 00:00:00 2001 From: way zheng Date: Mon, 13 Oct 2025 23:44:01 -0700 Subject: [PATCH 06/22] Add timer accumulation logging and keyset key unit tests --- .../vertx/OperatorShutdownHandler.java | 14 +++- .../operator/OperatorShutdownHandlerTest.java | 77 +++++++++++++++++++ 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 6d55fc834..83e3b092f 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -85,9 +85,17 @@ public void handleKeysetKeyRefreshResponse(Boolean success) { keysetKeyFailureStartTime.set(clock.instant()); LOGGER.warn( "keyset keys sync started failing. shutdown timer started (will shutdown in 7 days if not recovered)"); - } else if (Duration.between(t, clock.instant()).compareTo(this.keysetKeyShutdownWaitTime) > 0) { - LOGGER.error("keyset keys have been failing to sync for too long. shutting down operator"); - this.shutdownService.Shutdown(1); + } else { + Duration elapsed = Duration.between(t, clock.instant()); + LOGGER.debug("keyset keys sync still failing - timer at {} ({}d {}h {}m) / 7 days", + elapsed, + elapsed.toDays(), + elapsed.toHoursPart(), + elapsed.toMinutesPart()); + if (elapsed.compareTo(this.keysetKeyShutdownWaitTime) > 0) { + LOGGER.error("keyset keys have been failing to sync for too long. shutting down operator"); + this.shutdownService.Shutdown(1); + } } } } diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 10a00b813..a49d48669 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -166,4 +166,81 @@ void saltsLogErrorAtInterval(VertxTestContext testContext) { testContext.completeNow(); } + + @Test + void shutdownOnKeysetKeyFailedTooLong(VertxTestContext testContext) { + ListAppender logWatcher = new ListAppender<>(); + logWatcher.start(); + ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); + + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + Assertions.assertTrue(logWatcher.list.get(0).getFormattedMessage().contains("keyset keys sync failing")); + Assertions.assertTrue(logWatcher.list.get(1).getFormattedMessage().contains("keyset keys sync started failing")); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(7, ChronoUnit.DAYS).plusSeconds(60)); + try { + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + } catch (RuntimeException e) { + verify(shutdownService).Shutdown(1); + Assertions.assertTrue(logWatcher.list.stream().anyMatch(log -> + log.getFormattedMessage().contains("keyset keys have been failing to sync for too long"))); + testContext.completeNow(); + } + } + + @Test + void keysetKeyRecoverOnSuccess(VertxTestContext testContext) { + ListAppender logWatcher = new ListAppender<>(); + logWatcher.start(); + ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); + + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(3, ChronoUnit.DAYS)); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); + + Assertions.assertTrue(logWatcher.list.stream().anyMatch(log -> + log.getFormattedMessage().contains("keyset keys sync recovered"))); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(7, ChronoUnit.DAYS)); + assertDoesNotThrow(() -> { + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + }); + verify(shutdownService, never()).Shutdown(anyInt()); + testContext.completeNow(); + } + + @Test + void keysetKeyLogErrorAtInterval(VertxTestContext testContext) { + ListAppender logWatcher = new ListAppender<>(); + logWatcher.start(); + ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); + + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + long errorLogCount1 = logWatcher.list.stream().filter(log -> + log.getFormattedMessage().contains("keyset keys sync failing")).count(); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(9, ChronoUnit.MINUTES)); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + long errorLogCount2 = logWatcher.list.stream().filter(log -> + log.getFormattedMessage().contains("keyset keys sync failing")).count(); + Assertions.assertEquals(errorLogCount1, errorLogCount2); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.MINUTES)); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + long errorLogCount3 = logWatcher.list.stream().filter(log -> + log.getFormattedMessage().contains("keyset keys sync failing")).count(); + Assertions.assertTrue(errorLogCount3 > errorLogCount2); + + testContext.completeNow(); + } + + @Test + void keysetKeyNoShutdownWhenAlwaysSuccessful(VertxTestContext testContext) { + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); + + verify(shutdownService, never()).Shutdown(anyInt()); + testContext.completeNow(); + } } From 7609538e62a35dbedb8df71301526dccd79cee6f Mon Sep 17 00:00:00 2001 From: Release Workflow Date: Tue, 14 Oct 2025 06:49:48 +0000 Subject: [PATCH 07/22] [CI Pipeline] Released Snapshot version: 5.58.63-alpha-245-SNAPSHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 25e46f27f..577fc8323 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.uid2 uid2-operator - 5.58.62 + 5.58.63-alpha-245-SNAPSHOT UTF-8 From da09d577379438a8e936a745e6c6a45454a88125 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 11:15:44 -0700 Subject: [PATCH 08/22] make KeysetKeysFailedShutdownHours configurable, simplify reconvery logic --- conf/default-config.json | 2 ++ conf/docker-config.json | 1 + conf/integ-config.json | 1 + conf/local-config.json | 1 + conf/local-e2e-docker-private-config.json | 1 + conf/local-e2e-docker-public-config.json | 1 + conf/local-e2e-private-config.json | 1 + conf/local-e2e-public-config.json | 1 + src/main/java/com/uid2/operator/Main.java | 3 ++- .../vertx/OperatorShutdownHandler.java | 19 ++++--------------- 10 files changed, 15 insertions(+), 16 deletions(-) diff --git a/conf/default-config.json b/conf/default-config.json index c5de0d87b..36bd2a286 100644 --- a/conf/default-config.json +++ b/conf/default-config.json @@ -37,6 +37,8 @@ "optout_inmem_cache": false, "enclave_platform": null, "failure_shutdown_wait_hours": 120, + "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "sharing_token_expiry_seconds": 2592000, "operator_type": "public", "enable_remote_config": true, diff --git a/conf/docker-config.json b/conf/docker-config.json index 0d841ba65..f346045bc 100644 --- a/conf/docker-config.json +++ b/conf/docker-config.json @@ -40,6 +40,7 @@ "enclave_platform": null, "failure_shutdown_wait_hours": 120, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": true, diff --git a/conf/integ-config.json b/conf/integ-config.json index af8243946..a9b0b9049 100644 --- a/conf/integ-config.json +++ b/conf/integ-config.json @@ -16,6 +16,7 @@ "cloud_encryption_keys_metadata_path": "http://localhost:8088/cloud_encryption_keys/retrieve", "runtime_config_metadata_path": "http://localhost:8088/operator/config", "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": false, diff --git a/conf/local-config.json b/conf/local-config.json index 061072ee6..52697a1c0 100644 --- a/conf/local-config.json +++ b/conf/local-config.json @@ -38,6 +38,7 @@ "key_sharing_endpoint_provide_app_names": true, "client_side_token_generate_log_invalid_http_origins": true, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "public", "encrypted_files": false, "disable_optout_token": true, diff --git a/conf/local-e2e-docker-private-config.json b/conf/local-e2e-docker-private-config.json index a6a58bbac..bdf259c84 100644 --- a/conf/local-e2e-docker-private-config.json +++ b/conf/local-e2e-docker-private-config.json @@ -30,6 +30,7 @@ "optout_delta_rotate_interval": 60, "cloud_refresh_interval": 30, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "private", "enable_remote_config": true, "uid_instance_id_prefix": "local-private-operator" diff --git a/conf/local-e2e-docker-public-config.json b/conf/local-e2e-docker-public-config.json index d20fb52f0..c4c899a63 100644 --- a/conf/local-e2e-docker-public-config.json +++ b/conf/local-e2e-docker-public-config.json @@ -36,6 +36,7 @@ "optout_status_api_enabled": true, "cloud_refresh_interval": 30, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": true, diff --git a/conf/local-e2e-private-config.json b/conf/local-e2e-private-config.json index 55866d9a2..a69994d47 100644 --- a/conf/local-e2e-private-config.json +++ b/conf/local-e2e-private-config.json @@ -41,6 +41,7 @@ "client_side_token_generate_domain_name_check_enabled": false, "client_side_token_generate_log_invalid_http_origins": true, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "private", "enable_remote_config": true, "uid_instance_id_prefix": "local-private-operator" diff --git a/conf/local-e2e-public-config.json b/conf/local-e2e-public-config.json index 60a832abc..075481496 100644 --- a/conf/local-e2e-public-config.json +++ b/conf/local-e2e-public-config.json @@ -42,6 +42,7 @@ "key_sharing_endpoint_provide_app_names": true, "client_side_token_generate_log_invalid_http_origins": true, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": true, diff --git a/src/main/java/com/uid2/operator/Main.java b/src/main/java/com/uid2/operator/Main.java index 3bcbd5fea..fe0ebaedf 100644 --- a/src/main/java/com/uid2/operator/Main.java +++ b/src/main/java/com/uid2/operator/Main.java @@ -116,7 +116,8 @@ public Main(Vertx vertx, JsonObject config) throws Exception { this.validateServiceLinks = config.getBoolean(Const.Config.ValidateServiceLinks, false); this.encryptedCloudFilesEnabled = config.getBoolean(Const.Config.EncryptedFiles, false); this.shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), - Duration.ofHours(config.getInteger(Const.Config.SaltsExpiredShutdownHours, 12)), Duration.ofDays(7), + Duration.ofHours(config.getInteger(Const.Config.SaltsExpiredShutdownHours, 12)), + Duration.ofHours(config.getInteger(Const.Config.KeysetKeysFailedShutdownHours, 168)), Clock.systemUTC(), new ShutdownService()); this.uidInstanceIdProvider = new UidInstanceIdProvider(config); diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 83e3b092f..c4381d990 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -67,28 +67,17 @@ public void logSaltFailureAtInterval() { public void handleKeysetKeyRefreshResponse(Boolean success) { if (success) { - Instant previousFailureTime = keysetKeyFailureStartTime.getAndSet(null); - if (previousFailureTime != null) { - Duration failureDuration = Duration.between(previousFailureTime, clock.instant()); - LOGGER.info("keyset keys sync recovered after {} ({}d {}h {}m). shutdown timer reset.", - failureDuration, - failureDuration.toDays(), - failureDuration.toHoursPart(), - failureDuration.toMinutesPart()); - } else { - LOGGER.debug("keyset keys sync successful - timer remains null"); - } + keysetKeyFailureStartTime.set(null); + LOGGER.debug("keyset keys sync successful"); } else { logKeysetKeyFailureAtInterval(); Instant t = keysetKeyFailureStartTime.get(); if (t == null) { keysetKeyFailureStartTime.set(clock.instant()); - LOGGER.warn( - "keyset keys sync started failing. shutdown timer started (will shutdown in 7 days if not recovered)"); + LOGGER.warn("keyset keys sync started failing. shutdown timer started"); } else { Duration elapsed = Duration.between(t, clock.instant()); - LOGGER.debug("keyset keys sync still failing - timer at {} ({}d {}h {}m) / 7 days", - elapsed, + LOGGER.debug("keyset keys sync still failing - elapsed time: {}d {}h {}m", elapsed.toDays(), elapsed.toHoursPart(), elapsed.toMinutesPart()); From d4ffde030f77162728504483de3ecff579c6dc11 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 11:16:14 -0700 Subject: [PATCH 09/22] make KeysetKeysFailedShutdownHours configurable, simplify reconvery logic --- conf/default-config.json | 2 -- 1 file changed, 2 deletions(-) diff --git a/conf/default-config.json b/conf/default-config.json index 36bd2a286..c5de0d87b 100644 --- a/conf/default-config.json +++ b/conf/default-config.json @@ -37,8 +37,6 @@ "optout_inmem_cache": false, "enclave_platform": null, "failure_shutdown_wait_hours": 120, - "salts_expired_shutdown_hours": 12, - "keysetkeys_failed_shutdown_hours": 168, "sharing_token_expiry_seconds": 2592000, "operator_type": "public", "enable_remote_config": true, From bb18e29a40b11e3e84c3f2796d58fa301d03ab00 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 11:31:29 -0700 Subject: [PATCH 10/22] remove fall back constuctor --- .../com/uid2/operator/vertx/OperatorShutdownHandler.java | 5 ----- .../java/com/uid2/operator/OperatorShutdownHandlerTest.java | 2 +- .../java/com/uid2/operator/benchmark/BenchmarkCommon.java | 2 +- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index c4381d990..3cf512c57 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -28,11 +28,6 @@ public class OperatorShutdownHandler { private final Clock clock; private final ShutdownService shutdownService; - public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime, Clock clock, - ShutdownService shutdownService) { - this(attestShutdownWaitTime, saltShutdownWaitTime, Duration.ofDays(7), clock, shutdownService); - } - public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime, Duration keysetKeyShutdownWaitTime, Clock clock, ShutdownService shutdownService) { this.attestShutdownWaitTime = attestShutdownWaitTime; diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index a49d48669..160c33d6b 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -43,7 +43,7 @@ void beforeEach() { mocks = MockitoAnnotations.openMocks(this); when(clock.instant()).thenAnswer(i -> Instant.now()); doThrow(new RuntimeException()).when(shutdownService).Shutdown(1); - this.operatorShutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(12), clock, shutdownService); + this.operatorShutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(12), Duration.ofHours(168), clock, shutdownService); } @AfterEach diff --git a/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java b/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java index 10565009b..92cf7743f 100644 --- a/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java +++ b/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java @@ -73,7 +73,7 @@ static IUIDOperatorService createUidOperatorService() throws Exception { saltProvider.getSnapshot(Instant.now()).getFirstLevelSalt(), /* out */ optOutPartitionFiles); final IOptOutStore optOutStore = new StaticOptOutStore(optOutLocalStorage, make1mOptOutEntryConfig(), optOutPartitionFiles); - final OperatorShutdownHandler shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(1), Duration.ofHours(1), Clock.systemUTC(), new ShutdownService()); + final OperatorShutdownHandler shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(1), Duration.ofHours(1), Duration.ofHours(1), Clock.systemUTC(), new ShutdownService()); return new UIDOperatorService( optOutStore, saltProvider, From d35aed9337394f65d35d0127aa2a5fbf52c9ffb0 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 11:42:09 -0700 Subject: [PATCH 11/22] bump version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 577fc8323..87c8c0f29 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ 2.1.0 2.1.13 2.1.0 - 11.1.70-alpha-314-SNAPSHOT + 11.1.71-alpha-315-SNAPSHOT ${project.version} 21 21 From 51357a55fb0c79a61124e68488184ef545ace702 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 11:47:17 -0700 Subject: [PATCH 12/22] shut down behavior changes --- .../com/uid2/operator/OperatorShutdownHandlerTest.java | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 160c33d6b..5c8914353 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -190,17 +190,11 @@ void shutdownOnKeysetKeyFailedTooLong(VertxTestContext testContext) { @Test void keysetKeyRecoverOnSuccess(VertxTestContext testContext) { - ListAppender logWatcher = new ListAppender<>(); - logWatcher.start(); - ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); when(clock.instant()).thenAnswer(i -> Instant.now().plus(3, ChronoUnit.DAYS)); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); - Assertions.assertTrue(logWatcher.list.stream().anyMatch(log -> - log.getFormattedMessage().contains("keyset keys sync recovered"))); - when(clock.instant()).thenAnswer(i -> Instant.now().plus(7, ChronoUnit.DAYS)); assertDoesNotThrow(() -> { this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); From 619bf84cb6bac2c0e2c77cf1042d7339af688fb6 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 11:54:18 -0700 Subject: [PATCH 13/22] remove redundant logging --- .../vertx/OperatorShutdownHandler.java | 11 -------- .../operator/OperatorShutdownHandlerTest.java | 28 +------------------ 2 files changed, 1 insertion(+), 38 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 3cf512c57..64f6ac191 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -16,7 +16,6 @@ public class OperatorShutdownHandler { private static final Logger LOGGER = LoggerFactory.getLogger(OperatorShutdownHandler.class); private static final int SALT_FAILURE_LOG_INTERVAL_MINUTES = 10; - private static final int KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES = 10; private final Duration attestShutdownWaitTime; private final Duration saltShutdownWaitTime; private final Duration keysetKeyShutdownWaitTime; @@ -24,7 +23,6 @@ public class OperatorShutdownHandler { private final AtomicReference saltFailureStartTime = new AtomicReference<>(null); private final AtomicReference keysetKeyFailureStartTime = new AtomicReference<>(null); private final AtomicReference lastSaltFailureLogTime = new AtomicReference<>(null); - private final AtomicReference lastKeysetKeyFailureLogTime = new AtomicReference<>(null); private final Clock clock; private final ShutdownService shutdownService; @@ -65,7 +63,6 @@ public void handleKeysetKeyRefreshResponse(Boolean success) { keysetKeyFailureStartTime.set(null); LOGGER.debug("keyset keys sync successful"); } else { - logKeysetKeyFailureAtInterval(); Instant t = keysetKeyFailureStartTime.get(); if (t == null) { keysetKeyFailureStartTime.set(clock.instant()); @@ -84,14 +81,6 @@ public void handleKeysetKeyRefreshResponse(Boolean success) { } } - public void logKeysetKeyFailureAtInterval() { - Instant t = lastKeysetKeyFailureLogTime.get(); - if (t == null || clock.instant().isAfter(t.plus(KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES, ChronoUnit.MINUTES))) { - LOGGER.error("keyset keys sync failing"); - lastKeysetKeyFailureLogTime.set(Instant.now()); - } - } - public void handleAttestResponse(Pair response) { if (response.left() == AttestationResponseCode.AttestationFailure) { LOGGER.error("core attestation failed with AttestationFailure, shutting down operator, core response: {}", response.right()); diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 5c8914353..00825e59d 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -174,8 +174,7 @@ void shutdownOnKeysetKeyFailedTooLong(VertxTestContext testContext) { ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); - Assertions.assertTrue(logWatcher.list.get(0).getFormattedMessage().contains("keyset keys sync failing")); - Assertions.assertTrue(logWatcher.list.get(1).getFormattedMessage().contains("keyset keys sync started failing")); + Assertions.assertTrue(logWatcher.list.get(0).getFormattedMessage().contains("keyset keys sync started failing")); when(clock.instant()).thenAnswer(i -> Instant.now().plus(7, ChronoUnit.DAYS).plusSeconds(60)); try { @@ -203,31 +202,6 @@ void keysetKeyRecoverOnSuccess(VertxTestContext testContext) { testContext.completeNow(); } - @Test - void keysetKeyLogErrorAtInterval(VertxTestContext testContext) { - ListAppender logWatcher = new ListAppender<>(); - logWatcher.start(); - ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); - - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); - long errorLogCount1 = logWatcher.list.stream().filter(log -> - log.getFormattedMessage().contains("keyset keys sync failing")).count(); - - when(clock.instant()).thenAnswer(i -> Instant.now().plus(9, ChronoUnit.MINUTES)); - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); - long errorLogCount2 = logWatcher.list.stream().filter(log -> - log.getFormattedMessage().contains("keyset keys sync failing")).count(); - Assertions.assertEquals(errorLogCount1, errorLogCount2); - - when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.MINUTES)); - this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); - long errorLogCount3 = logWatcher.list.stream().filter(log -> - log.getFormattedMessage().contains("keyset keys sync failing")).count(); - Assertions.assertTrue(errorLogCount3 > errorLogCount2); - - testContext.completeNow(); - } - @Test void keysetKeyNoShutdownWhenAlwaysSuccessful(VertxTestContext testContext) { this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); From 112d7c38ec2e5164f7fe30faa8cd1863907f802a Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 13:10:44 -0700 Subject: [PATCH 14/22] log failure at an interval --- .../vertx/OperatorShutdownHandler.java | 21 +++++++++++++--- .../operator/OperatorShutdownHandlerTest.java | 25 +++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 64f6ac191..f99a6c376 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -16,6 +16,7 @@ public class OperatorShutdownHandler { private static final Logger LOGGER = LoggerFactory.getLogger(OperatorShutdownHandler.class); private static final int SALT_FAILURE_LOG_INTERVAL_MINUTES = 10; + private static final int KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES = 10; private final Duration attestShutdownWaitTime; private final Duration saltShutdownWaitTime; private final Duration keysetKeyShutdownWaitTime; @@ -23,6 +24,7 @@ public class OperatorShutdownHandler { private final AtomicReference saltFailureStartTime = new AtomicReference<>(null); private final AtomicReference keysetKeyFailureStartTime = new AtomicReference<>(null); private final AtomicReference lastSaltFailureLogTime = new AtomicReference<>(null); + private final AtomicReference lastKeysetKeyFailureLogTime = new AtomicReference<>(null); private final Clock clock; private final ShutdownService shutdownService; @@ -61,18 +63,17 @@ public void logSaltFailureAtInterval() { public void handleKeysetKeyRefreshResponse(Boolean success) { if (success) { keysetKeyFailureStartTime.set(null); + lastKeysetKeyFailureLogTime.set(null); LOGGER.debug("keyset keys sync successful"); } else { Instant t = keysetKeyFailureStartTime.get(); if (t == null) { keysetKeyFailureStartTime.set(clock.instant()); + lastKeysetKeyFailureLogTime.set(clock.instant()); LOGGER.warn("keyset keys sync started failing. shutdown timer started"); } else { + logKeysetKeyFailureProgressAtInterval(t); Duration elapsed = Duration.between(t, clock.instant()); - LOGGER.debug("keyset keys sync still failing - elapsed time: {}d {}h {}m", - elapsed.toDays(), - elapsed.toHoursPart(), - elapsed.toMinutesPart()); if (elapsed.compareTo(this.keysetKeyShutdownWaitTime) > 0) { LOGGER.error("keyset keys have been failing to sync for too long. shutting down operator"); this.shutdownService.Shutdown(1); @@ -81,6 +82,18 @@ public void handleKeysetKeyRefreshResponse(Boolean success) { } } + private void logKeysetKeyFailureProgressAtInterval(Instant failureStartTime) { + Instant lastLogTime = lastKeysetKeyFailureLogTime.get(); + if (lastLogTime == null || clock.instant().isAfter(lastLogTime.plus(KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES, ChronoUnit.MINUTES))) { + Duration elapsed = Duration.between(failureStartTime, clock.instant()); + LOGGER.warn("keyset keys sync still failing - elapsed time: {}d {}h {}m", + elapsed.toDays(), + elapsed.toHoursPart(), + elapsed.toMinutesPart()); + lastKeysetKeyFailureLogTime.set(clock.instant()); + } + } + public void handleAttestResponse(Pair response) { if (response.left() == AttestationResponseCode.AttestationFailure) { LOGGER.error("core attestation failed with AttestationFailure, shutting down operator, core response: {}", response.right()); diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 00825e59d..ac9ac4c85 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -211,4 +211,29 @@ void keysetKeyNoShutdownWhenAlwaysSuccessful(VertxTestContext testContext) { verify(shutdownService, never()).Shutdown(anyInt()); testContext.completeNow(); } + + @Test + void keysetKeyLogProgressAtInterval(VertxTestContext testContext) { + ListAppender logWatcher = new ListAppender<>(); + logWatcher.start(); + ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); + + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + long warnLogCount1 = logWatcher.list.stream().filter(log -> + log.getFormattedMessage().contains("keyset keys sync still failing")).count(); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(30, ChronoUnit.MINUTES)); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + long warnLogCount2 = logWatcher.list.stream().filter(log -> + log.getFormattedMessage().contains("keyset keys sync still failing")).count(); + Assertions.assertEquals(warnLogCount1, warnLogCount2); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(61, ChronoUnit.MINUTES)); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + long warnLogCount3 = logWatcher.list.stream().filter(log -> + log.getFormattedMessage().contains("keyset keys sync still failing")).count(); + Assertions.assertTrue(warnLogCount3 > warnLogCount2); + + testContext.completeNow(); + } } From c4de167080a097cbbbf1bbcf3228fac51c3ecc57 Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 13:14:05 -0700 Subject: [PATCH 15/22] log failure at an interval --- .../java/com/uid2/operator/OperatorShutdownHandlerTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index ac9ac4c85..e719fc8e1 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -222,13 +222,13 @@ void keysetKeyLogProgressAtInterval(VertxTestContext testContext) { long warnLogCount1 = logWatcher.list.stream().filter(log -> log.getFormattedMessage().contains("keyset keys sync still failing")).count(); - when(clock.instant()).thenAnswer(i -> Instant.now().plus(30, ChronoUnit.MINUTES)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(5, ChronoUnit.MINUTES)); this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); long warnLogCount2 = logWatcher.list.stream().filter(log -> log.getFormattedMessage().contains("keyset keys sync still failing")).count(); Assertions.assertEquals(warnLogCount1, warnLogCount2); - when(clock.instant()).thenAnswer(i -> Instant.now().plus(61, ChronoUnit.MINUTES)); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.MINUTES)); this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); long warnLogCount3 = logWatcher.list.stream().filter(log -> log.getFormattedMessage().contains("keyset keys sync still failing")).count(); From a71cbb8dba21e596b121ddc37355d3ae54f497d7 Mon Sep 17 00:00:00 2001 From: Release Workflow Date: Tue, 14 Oct 2025 20:18:38 +0000 Subject: [PATCH 16/22] [CI Pipeline] Released Snapshot version: 5.58.64-alpha-246-SNAPSHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 87c8c0f29..1dda9600f 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.uid2 uid2-operator - 5.58.63-alpha-245-SNAPSHOT + 5.58.64-alpha-246-SNAPSHOT UTF-8 From fecce7958b8e31b10378a0f3c1d99cc3fb9c3b3d Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 14:33:18 -0700 Subject: [PATCH 17/22] simplify delayed timer, since exact logging schedule is not that important --- .../java/com/uid2/operator/vertx/OperatorShutdownHandler.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index f99a6c376..0ee1a14ec 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -63,13 +63,11 @@ public void logSaltFailureAtInterval() { public void handleKeysetKeyRefreshResponse(Boolean success) { if (success) { keysetKeyFailureStartTime.set(null); - lastKeysetKeyFailureLogTime.set(null); LOGGER.debug("keyset keys sync successful"); } else { Instant t = keysetKeyFailureStartTime.get(); if (t == null) { keysetKeyFailureStartTime.set(clock.instant()); - lastKeysetKeyFailureLogTime.set(clock.instant()); LOGGER.warn("keyset keys sync started failing. shutdown timer started"); } else { logKeysetKeyFailureProgressAtInterval(t); From 639ad3ad25d61135410b371868327e7679bf83d8 Mon Sep 17 00:00:00 2001 From: Weihe Zheng Date: Tue, 14 Oct 2025 14:35:09 -0700 Subject: [PATCH 18/22] Remove DEBUG logger for OperatorShutdownHandler --- conf/logback.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/conf/logback.xml b/conf/logback.xml index 25aa78c9c..bdd1f5726 100644 --- a/conf/logback.xml +++ b/conf/logback.xml @@ -9,8 +9,6 @@ - - From aa6447cff058c1ddfb9a29473fbcf57228ed0fda Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 14:36:35 -0700 Subject: [PATCH 19/22] beautify --- .../com/uid2/operator/vertx/OperatorShutdownHandler.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 0ee1a14ec..f575ac17d 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -84,10 +84,7 @@ private void logKeysetKeyFailureProgressAtInterval(Instant failureStartTime) { Instant lastLogTime = lastKeysetKeyFailureLogTime.get(); if (lastLogTime == null || clock.instant().isAfter(lastLogTime.plus(KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES, ChronoUnit.MINUTES))) { Duration elapsed = Duration.between(failureStartTime, clock.instant()); - LOGGER.warn("keyset keys sync still failing - elapsed time: {}d {}h {}m", - elapsed.toDays(), - elapsed.toHoursPart(), - elapsed.toMinutesPart()); + LOGGER.warn("keyset keys sync still failing - elapsed time: {}d {}h {}m", elapsed.toDays(), elapsed.toHoursPart(), elapsed.toMinutesPart()); lastKeysetKeyFailureLogTime.set(clock.instant()); } } From c42844614de73c07e6127bd54e239eff800782ba Mon Sep 17 00:00:00 2001 From: way zheng Date: Tue, 14 Oct 2025 14:40:48 -0700 Subject: [PATCH 20/22] simplify calcualtion logics for delayed logging --- .../com/uid2/operator/vertx/OperatorShutdownHandler.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index f575ac17d..bba8f8540 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -63,27 +63,29 @@ public void logSaltFailureAtInterval() { public void handleKeysetKeyRefreshResponse(Boolean success) { if (success) { keysetKeyFailureStartTime.set(null); + lastKeysetKeyFailureLogTime.set(null); LOGGER.debug("keyset keys sync successful"); } else { Instant t = keysetKeyFailureStartTime.get(); if (t == null) { keysetKeyFailureStartTime.set(clock.instant()); + lastKeysetKeyFailureLogTime.set(clock.instant()); LOGGER.warn("keyset keys sync started failing. shutdown timer started"); } else { - logKeysetKeyFailureProgressAtInterval(t); Duration elapsed = Duration.between(t, clock.instant()); if (elapsed.compareTo(this.keysetKeyShutdownWaitTime) > 0) { LOGGER.error("keyset keys have been failing to sync for too long. shutting down operator"); this.shutdownService.Shutdown(1); + } else { + logKeysetKeyFailureProgressAtInterval(t, elapsed); } } } } - private void logKeysetKeyFailureProgressAtInterval(Instant failureStartTime) { + private void logKeysetKeyFailureProgressAtInterval(Instant failureStartTime, Duration elapsed) { Instant lastLogTime = lastKeysetKeyFailureLogTime.get(); if (lastLogTime == null || clock.instant().isAfter(lastLogTime.plus(KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES, ChronoUnit.MINUTES))) { - Duration elapsed = Duration.between(failureStartTime, clock.instant()); LOGGER.warn("keyset keys sync still failing - elapsed time: {}d {}h {}m", elapsed.toDays(), elapsed.toHoursPart(), elapsed.toMinutesPart()); lastKeysetKeyFailureLogTime.set(clock.instant()); } From ddd10f7d774d92bc5bd08f2f6a956706a92c04d1 Mon Sep 17 00:00:00 2001 From: Weihe Zheng Date: Tue, 14 Oct 2025 14:54:04 -0700 Subject: [PATCH 21/22] Update uid2-shared.version to 11.1.80 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1dda9600f..47bee6355 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ 2.1.0 2.1.13 2.1.0 - 11.1.71-alpha-315-SNAPSHOT + 11.1.80 ${project.version} 21 21 From dadcb3efd792699007e40e44b794a534a3bb2c21 Mon Sep 17 00:00:00 2001 From: Weihe Zheng Date: Tue, 14 Oct 2025 14:54:28 -0700 Subject: [PATCH 22/22] Update version from 5.58.64-alpha-246-SNAPSHOT to 5.58.62 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 47bee6355..84ef87565 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ com.uid2 uid2-operator - 5.58.64-alpha-246-SNAPSHOT + 5.58.62 UTF-8