4646import java .io .File ;
4747import java .io .IOException ;
4848import java .nio .charset .StandardCharsets ;
49+ import java .util .ArrayList ;
50+ import java .util .List ;
4951import java .util .Map ;
5052import java .util .concurrent .CountDownLatch ;
5153
@@ -55,13 +57,27 @@ public class ZkHighAvailabilityContainer implements HighAvailabilityContainer, L
5557
5658 private final LeaderLatch leaderLatch ;
5759 private final CuratorFramework zkClient ;
60+
61+ // Package-private accessors for testing
62+ CuratorFramework getZkClient () {
63+ return zkClient ;
64+ }
65+
66+ LeaderLatch getLeaderLatch () {
67+ return leaderLatch ;
68+ }
69+
5870 private final String tableServiceMasterPath ;
5971 private final String optimizingServiceMasterPath ;
72+ private final String nodesPath ;
6073 private final AmsServerInfo tableServiceServerInfo ;
6174 private final AmsServerInfo optimizingServiceServerInfo ;
75+ private final boolean isMasterSlaveMode ;
6276 private volatile CountDownLatch followerLatch ;
77+ private String registeredNodePath ;
6378
6479 public ZkHighAvailabilityContainer (Configurations serviceConfig ) throws Exception {
80+ this .isMasterSlaveMode = serviceConfig .getBoolean (AmoroManagementConf .USE_MASTER_SLAVE_MODE );
6581 if (serviceConfig .getBoolean (AmoroManagementConf .HA_ENABLE )) {
6682 String zkServerAddress = serviceConfig .getString (AmoroManagementConf .HA_ZOOKEEPER_ADDRESS );
6783 int zkSessionTimeout =
@@ -71,6 +87,7 @@ public ZkHighAvailabilityContainer(Configurations serviceConfig) throws Exceptio
7187 String haClusterName = serviceConfig .getString (AmoroManagementConf .HA_CLUSTER_NAME );
7288 tableServiceMasterPath = AmsHAProperties .getTableServiceMasterPath (haClusterName );
7389 optimizingServiceMasterPath = AmsHAProperties .getOptimizingServiceMasterPath (haClusterName );
90+ nodesPath = AmsHAProperties .getNodesPath (haClusterName );
7491 ExponentialBackoffRetry retryPolicy = new ExponentialBackoffRetry (1000 , 3 , 5000 );
7592 setupZookeeperAuth (serviceConfig );
7693 this .zkClient =
@@ -83,6 +100,7 @@ public ZkHighAvailabilityContainer(Configurations serviceConfig) throws Exceptio
83100 zkClient .start ();
84101 createPathIfNeeded (tableServiceMasterPath );
85102 createPathIfNeeded (optimizingServiceMasterPath );
103+ createPathIfNeeded (nodesPath );
86104 String leaderPath = AmsHAProperties .getLeaderPath (haClusterName );
87105 createPathIfNeeded (leaderPath );
88106 leaderLatch = new LeaderLatch (zkClient , leaderPath );
@@ -103,8 +121,10 @@ public ZkHighAvailabilityContainer(Configurations serviceConfig) throws Exceptio
103121 zkClient = null ;
104122 tableServiceMasterPath = null ;
105123 optimizingServiceMasterPath = null ;
124+ nodesPath = null ;
106125 tableServiceServerInfo = null ;
107126 optimizingServiceServerInfo = null ;
127+ registeredNodePath = null ;
108128 // block follower latch forever when ha is disabled
109129 followerLatch = new CountDownLatch (1 );
110130 }
@@ -141,8 +161,25 @@ public void waitLeaderShip() throws Exception {
141161 }
142162
143163 @ Override
144- public void registAndElect () throws Exception {
145- // TODO Here you can register for AMS and participate in the election.
164+ public void registerAndElect () throws Exception {
165+ if (!isMasterSlaveMode ) {
166+ LOG .debug ("Master-slave mode is not enabled, skip node registration" );
167+ return ;
168+ }
169+ if (zkClient == null || nodesPath == null ) {
170+ LOG .warn ("HA is not enabled, skip node registration" );
171+ return ;
172+ }
173+ // Register node to ZK using ephemeral node
174+ // The node will be automatically deleted when the session expires
175+ String nodeInfo = JacksonUtil .toJSONString (optimizingServiceServerInfo );
176+ registeredNodePath =
177+ zkClient
178+ .create ()
179+ .creatingParentsIfNeeded ()
180+ .withMode (CreateMode .EPHEMERAL_SEQUENTIAL )
181+ .forPath (nodesPath + "/node-" , nodeInfo .getBytes (StandardCharsets .UTF_8 ));
182+ LOG .info ("Registered AMS node to ZK: {}" , registeredNodePath );
146183 }
147184
148185 @ Override
@@ -158,6 +195,18 @@ public void waitFollowerShip() throws Exception {
158195 public void close () {
159196 if (leaderLatch != null ) {
160197 try {
198+ // Unregister node from ZK
199+ if (registeredNodePath != null ) {
200+ try {
201+ zkClient .delete ().forPath (registeredNodePath );
202+ LOG .info ("Unregistered AMS node from ZK: {}" , registeredNodePath );
203+ } catch (KeeperException .NoNodeException e ) {
204+ // Node already deleted, ignore
205+ LOG .debug ("Node {} already deleted" , registeredNodePath );
206+ } catch (Exception e ) {
207+ LOG .warn ("Failed to unregister node from ZK: {}" , registeredNodePath , e );
208+ }
209+ }
161210 this .leaderLatch .close ();
162211 this .zkClient .close ();
163212 } catch (IOException e ) {
@@ -192,6 +241,60 @@ private AmsServerInfo buildServerInfo(String host, int thriftBindPort, int restB
192241 return amsServerInfo ;
193242 }
194243
244+ /**
245+ * Get list of alive nodes. Only the leader node can call this method.
246+ *
247+ * @return List of alive node information
248+ */
249+ public List <AmsServerInfo > getAliveNodes () {
250+ List <AmsServerInfo > aliveNodes = new ArrayList <>();
251+ if (!isMasterSlaveMode ) {
252+ LOG .debug ("Master-slave mode is not enabled, return empty node list" );
253+ return aliveNodes ;
254+ }
255+ if (zkClient == null || nodesPath == null ) {
256+ LOG .warn ("HA is not enabled, return empty node list" );
257+ return aliveNodes ;
258+ }
259+ if (!leaderLatch .hasLeadership ()) {
260+ LOG .warn ("Only leader node can get alive nodes list" );
261+ return aliveNodes ;
262+ }
263+ try {
264+ List <String > nodePaths = zkClient .getChildren ().forPath (nodesPath );
265+ for (String nodePath : nodePaths ) {
266+ try {
267+ String fullPath = nodesPath + "/" + nodePath ;
268+ byte [] data = zkClient .getData ().forPath (fullPath );
269+ if (data != null && data .length > 0 ) {
270+ String nodeInfoJson = new String (data , StandardCharsets .UTF_8 );
271+ AmsServerInfo nodeInfo = JacksonUtil .parseObject (nodeInfoJson , AmsServerInfo .class );
272+ aliveNodes .add (nodeInfo );
273+ }
274+ } catch (Exception e ) {
275+ LOG .warn ("Failed to get node info for path: {}" , nodePath , e );
276+ }
277+ }
278+ } catch (KeeperException .NoNodeException e ) {
279+ LOG .debug ("Nodes path {} does not exist" , nodesPath );
280+ } catch (Exception e ) {
281+ throw new RuntimeException (e );
282+ }
283+ return aliveNodes ;
284+ }
285+
286+ /**
287+ * Check if current node is the leader.
288+ *
289+ * @return true if current node is the leader, false otherwise
290+ */
291+ public boolean hasLeadership () {
292+ if (leaderLatch == null ) {
293+ return false ;
294+ }
295+ return leaderLatch .hasLeadership ();
296+ }
297+
195298 private void createPathIfNeeded (String path ) throws Exception {
196299 try {
197300 zkClient .create ().creatingParentsIfNeeded ().withMode (CreateMode .PERSISTENT ).forPath (path );
0 commit comments