109109import org .apache .accumulo .core .trace .TraceUtil ;
110110import org .apache .accumulo .core .util .HostAndPort ;
111111import org .apache .accumulo .core .util .Retry ;
112+ import org .apache .accumulo .core .util .Timer ;
112113import org .apache .accumulo .core .util .threads .ThreadPools ;
113114import org .apache .accumulo .core .util .threads .Threads ;
114115import org .apache .accumulo .manager .metrics .BalancerMetrics ;
@@ -195,7 +196,7 @@ public class Manager extends AbstractServer implements LiveTServerSet.Listener,
195196 final AuditedSecurityOperation security ;
196197 final Map <TServerInstance ,AtomicInteger > badServers =
197198 Collections .synchronizedMap (new HashMap <>());
198- final Map <TServerInstance ,AtomicInteger > tserverHaltRpcAttempts =
199+ final Map <TServerInstance ,GracefulHaltTimer > tserverHaltRpcAttempts =
199200 Collections .synchronizedMap (new HashMap <>());
200201 final Set <TServerInstance > serversToShutdown = Collections .synchronizedSet (new HashSet <>());
201202 final Migrations migrations = new Migrations ();
@@ -1143,6 +1144,30 @@ private List<TabletMigration> checkMigrationSanity(Set<TabletServerId> current,
11431144
11441145 }
11451146
1147+ /**
1148+ * This class tracks details about the haltRPCs used
1149+ */
1150+ private static class GracefulHaltTimer {
1151+
1152+ Duration maxHaltGraceDuration ;
1153+ Timer timer ;
1154+
1155+ public GracefulHaltTimer (AccumuloConfiguration config ) {
1156+ timer = null ;
1157+ maxHaltGraceDuration =
1158+ Duration .ofMillis (config .getTimeInMillis (Property .MANAGER_TSERVER_HALT_DURATION ));
1159+ }
1160+
1161+ public void startTimer () {
1162+ timer = Timer .startNew ();
1163+ }
1164+
1165+ public boolean shouldForceHalt () {
1166+ return maxHaltGraceDuration .toMillis () != 0 && timer != null
1167+ && timer .hasElapsed (maxHaltGraceDuration );
1168+ }
1169+ }
1170+
11461171 private SortedMap <TServerInstance ,TabletServerStatus >
11471172 gatherTableInformation (Set <TServerInstance > currentServers ) {
11481173 final long rpcTimeout = getConfiguration ().getTimeInMillis (Property .GENERAL_RPC_TIMEOUT );
@@ -1153,7 +1178,7 @@ private List<TabletMigration> checkMigrationSanity(Set<TabletServerId> current,
11531178 final SortedMap <TServerInstance ,TabletServerStatus > result = new ConcurrentSkipListMap <>();
11541179 final RateLimiter shutdownServerRateLimiter = RateLimiter .create (MAX_SHUTDOWNS_PER_SEC );
11551180 final int maxTserverRpcHaltAttempts =
1156- getConfiguration ().getCount (Property .MANAGER_TSERVER_HALT_ATTEMPTS );
1181+ getConfiguration ().getCount (Property .MANAGER_TSERVER_HALT_DURATION );
11571182 final boolean forceHaltingEnabled = maxTserverRpcHaltAttempts != 0 ;
11581183 for (TServerInstance serverInstance : currentServers ) {
11591184 final TServerInstance server = serverInstance ;
@@ -1195,9 +1220,9 @@ private List<TabletMigration> checkMigrationSanity(Set<TabletServerId> current,
11951220 > MAX_BAD_STATUS_COUNT ) {
11961221 if (shutdownServerRateLimiter .tryAcquire ()) {
11971222 log .warn ("attempting to stop {}" , server );
1198- if ( forceHaltingEnabled
1199- && ( tserverHaltRpcAttempts . computeIfAbsent ( server , s -> new AtomicInteger ( 0 ))
1200- . incrementAndGet () > maxTserverRpcHaltAttempts )) {
1223+ var gracefulHaltTimer = tserverHaltRpcAttempts . computeIfAbsent ( server ,
1224+ s -> new GracefulHaltTimer ( getConfiguration ()));
1225+ if ( gracefulHaltTimer . shouldForceHalt ( )) {
12011226 log .warn ("tserver {} is not responding to halt requests, deleting zlock" , server );
12021227 var zk = getContext ().getZooReaderWriter ();
12031228 var iid = getContext ().getInstanceID ();
@@ -1221,6 +1246,8 @@ private List<TabletMigration> checkMigrationSanity(Set<TabletServerId> current,
12211246 log .trace ("error attempting to halt tablet server {}" , server , e1 );
12221247 } catch (Exception e2 ) {
12231248 log .info ("error talking to troublesome tablet server {}" , server , e2 );
1249+ } finally {
1250+ gracefulHaltTimer .startTimer ();
12241251 }
12251252 }
12261253 } else {
0 commit comments