@@ -297,6 +297,9 @@ public void handleIO() throws IOException {
297297 }
298298 }
299299
300+ // Deal with the memcached nodes that removed from ZK but has operation in queue.
301+ handleDelayedClosingNodes ();
302+
300303 // Deal with the memcached server group that's been added by CacheManager.
301304 handleCacheNodesChange ();
302305
@@ -323,12 +326,18 @@ private void handleNodesToRemove(final List<MemcachedNode> nodesToRemove) {
323326 }
324327 /* ENABLE_MIGRATION end */
325328
329+ if (node .isActive ()) {
330+ // if a memcached node is removed from ZK but can still serve operations, do NOT cancel it.
331+ // operations that remain in operation queue will be processed until connection is lost.
332+ // once all remaining operations are processed, client will close connection.
333+ // if connection is lost before remaining operations are processed,
334+ // all of them will be canceled after connection is lost.
335+ continue ;
336+ }
337+
326338 // removing node is not related to failure mode.
327339 // so, cancel operations regardless of failure mode.
328- String cause = "node removed." ;
329- cancelOperations (node .destroyReadQueue (false ), cause );
330- cancelOperations (node .destroyWriteQueue (false ), cause );
331- cancelOperations (node .destroyInputQueue (), cause );
340+ cancelAllOperations (node , "node removed." );
332341 }
333342 }
334343
@@ -706,6 +715,38 @@ public void complete() {
706715 getLogger ().debug ("Added %s to writeQ of %s" , op , node );
707716 }
708717
718+ // Handle the memcached nodes that removed from ZK but has operation in queue.
719+ void handleDelayedClosingNodes () {
720+ Collection <MemcachedNode > closingNodes = locator .getDelayedClosingNodes ();
721+ if (closingNodes .isEmpty ()) {
722+ return ;
723+ }
724+
725+ Collection <MemcachedNode > closedNodes = new HashSet <>();
726+ for (MemcachedNode node : closingNodes ) {
727+ boolean isConnected = node .isConnected ();
728+ boolean hasOp = node .hasOp ();
729+
730+ if (isConnected && !hasOp ) {
731+ try {
732+ node .closeChannel ();
733+ } catch (IOException e ) {
734+ getLogger ().error ("Failed to closeChannel the node : " + node );
735+ }
736+ } else if (!isConnected && hasOp ) {
737+ cancelAllOperations (node , "connection lost after node removed." );
738+ } else {
739+ continue ;
740+ }
741+
742+ closedNodes .add (node );
743+ }
744+
745+ if (!closedNodes .isEmpty ()) {
746+ locator .updateDelayedClosingNodes (closedNodes );
747+ }
748+ }
749+
709750 // Handle the memcached server group that's been added by CacheManager.
710751 void handleCacheNodesChange () throws IOException {
711752 /* ENABLE_MIGRATION if */
@@ -1279,6 +1320,12 @@ private void cancelOperations(Collection<Operation> ops, String cause) {
12791320 }
12801321 }
12811322
1323+ private void cancelAllOperations (MemcachedNode node , String cause ) {
1324+ cancelOperations (node .destroyReadQueue (false ), cause );
1325+ cancelOperations (node .destroyWriteQueue (false ), cause );
1326+ cancelOperations (node .destroyInputQueue (), cause );
1327+ }
1328+
12821329 private void redistributeOperations (Collection <Operation > ops , String cause ) {
12831330 for (Operation op : ops ) {
12841331 if (op instanceof KeyedOperation ) {
0 commit comments