diff --git a/conf/docker-config.json b/conf/docker-config.json index 0d841ba65..f346045bc 100644 --- a/conf/docker-config.json +++ b/conf/docker-config.json @@ -40,6 +40,7 @@ "enclave_platform": null, "failure_shutdown_wait_hours": 120, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": true, diff --git a/conf/integ-config.json b/conf/integ-config.json index af8243946..a9b0b9049 100644 --- a/conf/integ-config.json +++ b/conf/integ-config.json @@ -16,6 +16,7 @@ "cloud_encryption_keys_metadata_path": "http://localhost:8088/cloud_encryption_keys/retrieve", "runtime_config_metadata_path": "http://localhost:8088/operator/config", "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": false, diff --git a/conf/local-config.json b/conf/local-config.json index 061072ee6..52697a1c0 100644 --- a/conf/local-config.json +++ b/conf/local-config.json @@ -38,6 +38,7 @@ "key_sharing_endpoint_provide_app_names": true, "client_side_token_generate_log_invalid_http_origins": true, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "public", "encrypted_files": false, "disable_optout_token": true, diff --git a/conf/local-e2e-docker-private-config.json b/conf/local-e2e-docker-private-config.json index a6a58bbac..bdf259c84 100644 --- a/conf/local-e2e-docker-private-config.json +++ b/conf/local-e2e-docker-private-config.json @@ -30,6 +30,7 @@ "optout_delta_rotate_interval": 60, "cloud_refresh_interval": 30, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "private", "enable_remote_config": true, "uid_instance_id_prefix": "local-private-operator" diff --git a/conf/local-e2e-docker-public-config.json b/conf/local-e2e-docker-public-config.json index d20fb52f0..c4c899a63 100644 --- a/conf/local-e2e-docker-public-config.json +++ b/conf/local-e2e-docker-public-config.json @@ -36,6 +36,7 @@ "optout_status_api_enabled": true, "cloud_refresh_interval": 30, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": true, diff --git a/conf/local-e2e-private-config.json b/conf/local-e2e-private-config.json index 55866d9a2..a69994d47 100644 --- a/conf/local-e2e-private-config.json +++ b/conf/local-e2e-private-config.json @@ -41,6 +41,7 @@ "client_side_token_generate_domain_name_check_enabled": false, "client_side_token_generate_log_invalid_http_origins": true, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "private", "enable_remote_config": true, "uid_instance_id_prefix": "local-private-operator" diff --git a/conf/local-e2e-public-config.json b/conf/local-e2e-public-config.json index 60a832abc..075481496 100644 --- a/conf/local-e2e-public-config.json +++ b/conf/local-e2e-public-config.json @@ -42,6 +42,7 @@ "key_sharing_endpoint_provide_app_names": true, "client_side_token_generate_log_invalid_http_origins": true, "salts_expired_shutdown_hours": 12, + "keysetkeys_failed_shutdown_hours": 168, "operator_type": "public", "disable_optout_token": true, "enable_remote_config": true, diff --git a/pom.xml b/pom.xml index 40af0e576..84ef87565 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ 2.1.0 2.1.13 2.1.0 - 11.1.69 + 11.1.80 ${project.version} 21 21 diff --git a/src/main/java/com/uid2/operator/Main.java b/src/main/java/com/uid2/operator/Main.java index c4fe54c7e..fe0ebaedf 100644 --- a/src/main/java/com/uid2/operator/Main.java +++ b/src/main/java/com/uid2/operator/Main.java @@ -61,6 +61,7 @@ import java.time.Duration; import java.time.Instant; import java.util.*; +import java.util.function.Consumer; import java.util.function.Supplier; import static com.uid2.operator.Const.Config.EnableRemoteConfigProp; @@ -114,7 +115,10 @@ public Main(Vertx vertx, JsonObject config) throws Exception { this.clientSideTokenGenerate = config.getBoolean(Const.Config.EnableClientSideTokenGenerate, false); this.validateServiceLinks = config.getBoolean(Const.Config.ValidateServiceLinks, false); this.encryptedCloudFilesEnabled = config.getBoolean(Const.Config.EncryptedFiles, false); - this.shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(config.getInteger(Const.Config.SaltsExpiredShutdownHours, 12)), Clock.systemUTC(), new ShutdownService()); + this.shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), + Duration.ofHours(config.getInteger(Const.Config.SaltsExpiredShutdownHours, 12)), + Duration.ofHours(config.getInteger(Const.Config.KeysetKeysFailedShutdownHours, 168)), + Clock.systemUTC(), new ShutdownService()); this.uidInstanceIdProvider = new UidInstanceIdProvider(config); String coreAttestUrl = this.config.getString(Const.Config.CoreAttestUrlProp); @@ -420,7 +424,8 @@ private Future createStoreVerticles() throws Exception { } fs.add(createAndDeployRotatingStoreVerticle("auth", clientKeyProvider, "auth_refresh_ms")); fs.add(createAndDeployRotatingStoreVerticle("keyset", keysetProvider, "keyset_refresh_ms")); - fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms")); + fs.add(createAndDeployRotatingStoreVerticle("keysetkey", keysetKeyStore, "keysetkey_refresh_ms", + this.shutdownHandler::handleKeysetKeyRefreshResponse)); fs.add(createAndDeployRotatingStoreVerticle("salt", saltProvider, "salt_refresh_ms")); fs.add(createAndDeployCloudSyncStoreVerticle("optout", fsOptOut, optOutCloudSync)); CompositeFuture.all(fs).onComplete(ar -> { @@ -433,9 +438,13 @@ private Future createStoreVerticles() throws Exception { } private Future createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs) { + return createAndDeployRotatingStoreVerticle(name, store, storeRefreshConfigMs, null); + } + + private Future createAndDeployRotatingStoreVerticle(String name, IMetadataVersionedStore store, String storeRefreshConfigMs, Consumer refreshCallback) { final int intervalMs = config.getInteger(storeRefreshConfigMs, 10000); - RotatingStoreVerticle rotatingStoreVerticle = new RotatingStoreVerticle(name, intervalMs, store); + RotatingStoreVerticle rotatingStoreVerticle = new RotatingStoreVerticle(name, intervalMs, store, refreshCallback); return vertx.deployVerticle(rotatingStoreVerticle); } diff --git a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java index 84075fb03..bba8f8540 100644 --- a/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java +++ b/src/main/java/com/uid2/operator/vertx/OperatorShutdownHandler.java @@ -16,17 +16,23 @@ public class OperatorShutdownHandler { private static final Logger LOGGER = LoggerFactory.getLogger(OperatorShutdownHandler.class); private static final int SALT_FAILURE_LOG_INTERVAL_MINUTES = 10; + private static final int KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES = 10; private final Duration attestShutdownWaitTime; private final Duration saltShutdownWaitTime; + private final Duration keysetKeyShutdownWaitTime; private final AtomicReference attestFailureStartTime = new AtomicReference<>(null); private final AtomicReference saltFailureStartTime = new AtomicReference<>(null); + private final AtomicReference keysetKeyFailureStartTime = new AtomicReference<>(null); private final AtomicReference lastSaltFailureLogTime = new AtomicReference<>(null); + private final AtomicReference lastKeysetKeyFailureLogTime = new AtomicReference<>(null); private final Clock clock; private final ShutdownService shutdownService; - public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime, Clock clock, ShutdownService shutdownService) { + public OperatorShutdownHandler(Duration attestShutdownWaitTime, Duration saltShutdownWaitTime, + Duration keysetKeyShutdownWaitTime, Clock clock, ShutdownService shutdownService) { this.attestShutdownWaitTime = attestShutdownWaitTime; this.saltShutdownWaitTime = saltShutdownWaitTime; + this.keysetKeyShutdownWaitTime = keysetKeyShutdownWaitTime; this.clock = clock; this.shutdownService = shutdownService; } @@ -54,6 +60,37 @@ public void logSaltFailureAtInterval() { } } + public void handleKeysetKeyRefreshResponse(Boolean success) { + if (success) { + keysetKeyFailureStartTime.set(null); + lastKeysetKeyFailureLogTime.set(null); + LOGGER.debug("keyset keys sync successful"); + } else { + Instant t = keysetKeyFailureStartTime.get(); + if (t == null) { + keysetKeyFailureStartTime.set(clock.instant()); + lastKeysetKeyFailureLogTime.set(clock.instant()); + LOGGER.warn("keyset keys sync started failing. shutdown timer started"); + } else { + Duration elapsed = Duration.between(t, clock.instant()); + if (elapsed.compareTo(this.keysetKeyShutdownWaitTime) > 0) { + LOGGER.error("keyset keys have been failing to sync for too long. shutting down operator"); + this.shutdownService.Shutdown(1); + } else { + logKeysetKeyFailureProgressAtInterval(t, elapsed); + } + } + } + } + + private void logKeysetKeyFailureProgressAtInterval(Instant failureStartTime, Duration elapsed) { + Instant lastLogTime = lastKeysetKeyFailureLogTime.get(); + if (lastLogTime == null || clock.instant().isAfter(lastLogTime.plus(KEYSET_KEY_FAILURE_LOG_INTERVAL_MINUTES, ChronoUnit.MINUTES))) { + LOGGER.warn("keyset keys sync still failing - elapsed time: {}d {}h {}m", elapsed.toDays(), elapsed.toHoursPart(), elapsed.toMinutesPart()); + lastKeysetKeyFailureLogTime.set(clock.instant()); + } + } + public void handleAttestResponse(Pair response) { if (response.left() == AttestationResponseCode.AttestationFailure) { LOGGER.error("core attestation failed with AttestationFailure, shutting down operator, core response: {}", response.right()); diff --git a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java index 10a00b813..e719fc8e1 100644 --- a/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java +++ b/src/test/java/com/uid2/operator/OperatorShutdownHandlerTest.java @@ -43,7 +43,7 @@ void beforeEach() { mocks = MockitoAnnotations.openMocks(this); when(clock.instant()).thenAnswer(i -> Instant.now()); doThrow(new RuntimeException()).when(shutdownService).Shutdown(1); - this.operatorShutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(12), clock, shutdownService); + this.operatorShutdownHandler = new OperatorShutdownHandler(Duration.ofHours(12), Duration.ofHours(12), Duration.ofHours(168), clock, shutdownService); } @AfterEach @@ -166,4 +166,74 @@ void saltsLogErrorAtInterval(VertxTestContext testContext) { testContext.completeNow(); } + + @Test + void shutdownOnKeysetKeyFailedTooLong(VertxTestContext testContext) { + ListAppender logWatcher = new ListAppender<>(); + logWatcher.start(); + ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); + + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + Assertions.assertTrue(logWatcher.list.get(0).getFormattedMessage().contains("keyset keys sync started failing")); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(7, ChronoUnit.DAYS).plusSeconds(60)); + try { + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + } catch (RuntimeException e) { + verify(shutdownService).Shutdown(1); + Assertions.assertTrue(logWatcher.list.stream().anyMatch(log -> + log.getFormattedMessage().contains("keyset keys have been failing to sync for too long"))); + testContext.completeNow(); + } + } + + @Test + void keysetKeyRecoverOnSuccess(VertxTestContext testContext) { + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + when(clock.instant()).thenAnswer(i -> Instant.now().plus(3, ChronoUnit.DAYS)); + + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(7, ChronoUnit.DAYS)); + assertDoesNotThrow(() -> { + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + }); + verify(shutdownService, never()).Shutdown(anyInt()); + testContext.completeNow(); + } + + @Test + void keysetKeyNoShutdownWhenAlwaysSuccessful(VertxTestContext testContext) { + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(true); + + verify(shutdownService, never()).Shutdown(anyInt()); + testContext.completeNow(); + } + + @Test + void keysetKeyLogProgressAtInterval(VertxTestContext testContext) { + ListAppender logWatcher = new ListAppender<>(); + logWatcher.start(); + ((Logger) LoggerFactory.getLogger(OperatorShutdownHandler.class)).addAppender(logWatcher); + + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + long warnLogCount1 = logWatcher.list.stream().filter(log -> + log.getFormattedMessage().contains("keyset keys sync still failing")).count(); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(5, ChronoUnit.MINUTES)); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + long warnLogCount2 = logWatcher.list.stream().filter(log -> + log.getFormattedMessage().contains("keyset keys sync still failing")).count(); + Assertions.assertEquals(warnLogCount1, warnLogCount2); + + when(clock.instant()).thenAnswer(i -> Instant.now().plus(11, ChronoUnit.MINUTES)); + this.operatorShutdownHandler.handleKeysetKeyRefreshResponse(false); + long warnLogCount3 = logWatcher.list.stream().filter(log -> + log.getFormattedMessage().contains("keyset keys sync still failing")).count(); + Assertions.assertTrue(warnLogCount3 > warnLogCount2); + + testContext.completeNow(); + } } diff --git a/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java b/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java index 10565009b..92cf7743f 100644 --- a/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java +++ b/src/test/java/com/uid2/operator/benchmark/BenchmarkCommon.java @@ -73,7 +73,7 @@ static IUIDOperatorService createUidOperatorService() throws Exception { saltProvider.getSnapshot(Instant.now()).getFirstLevelSalt(), /* out */ optOutPartitionFiles); final IOptOutStore optOutStore = new StaticOptOutStore(optOutLocalStorage, make1mOptOutEntryConfig(), optOutPartitionFiles); - final OperatorShutdownHandler shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(1), Duration.ofHours(1), Clock.systemUTC(), new ShutdownService()); + final OperatorShutdownHandler shutdownHandler = new OperatorShutdownHandler(Duration.ofHours(1), Duration.ofHours(1), Duration.ofHours(1), Clock.systemUTC(), new ShutdownService()); return new UIDOperatorService( optOutStore, saltProvider,