Skip to content

Commit 636b088

Browse files
committed
Merge remote-tracking branch 'origin/unstable' with Split-Brain fixes
2 parents 59e6606 + 43766bd commit 636b088

2 files changed

Lines changed: 19 additions & 11 deletions

File tree

store/cluster.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ func (cluster *Cluster) PromoteNewMaster(ctx context.Context,
138138
if err != nil {
139139
return "", err
140140
}
141-
newMasterNodeID, err := shard.promoteNewMaster(ctx, masterNodeID, preferredNodeID)
141+
newMasterNodeID, err := shard.PromoteNewMaster(ctx, masterNodeID, preferredNodeID)
142142
if err != nil {
143143
return "", err
144144
}

store/cluster_shard.go

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -151,12 +151,21 @@ func (shard *Shard) removeNode(nodeID string) error {
151151
func (shard *Shard) getNewMasterNodeIndex(ctx context.Context, masterNodeIndex int, preferredNodeID string) int {
152152
newMasterNodeIndex := -1
153153
var newestOffset uint64
154+
// Get master sequence to handle empty shard
155+
var masterSequence uint64
156+
if masterNodeIndex >= 0 && masterNodeIndex < len(shard.Nodes) {
157+
masterNode := shard.Nodes[masterNodeIndex]
158+
if _, err := masterNode.GetClusterInfo(ctx); err == nil {
159+
if masterInfo, err := masterNode.GetClusterNodeInfo(ctx); err == nil {
160+
masterSequence = masterInfo.Sequence
161+
}
162+
}
163+
}
154164
for i, node := range shard.Nodes {
155-
// don't promote the current master node
165+
// Don't promote the current master
156166
if i == masterNodeIndex {
157167
continue
158168
}
159-
160169
_, err := node.GetClusterInfo(ctx)
161170
if err != nil {
162171
logger.Get().With(
@@ -166,7 +175,6 @@ func (shard *Shard) getNewMasterNodeIndex(ctx context.Context, masterNodeIndex i
166175
).Warn("Skip the node due to failed to get cluster info")
167176
continue
168177
}
169-
170178
clusterNodeInfo, err := node.GetClusterNodeInfo(ctx)
171179
if err != nil {
172180
logger.Get().With(
@@ -176,24 +184,24 @@ func (shard *Shard) getNewMasterNodeIndex(ctx context.Context, masterNodeIndex i
176184
).Warn("Skip the node due to failed to get info of node")
177185
continue
178186
}
179-
if clusterNodeInfo.Role != RoleSlave || clusterNodeInfo.Sequence == 0 {
187+
// FIX: allow sequence == 0 only when master sequence is also 0
188+
if clusterNodeInfo.Role != RoleSlave || (clusterNodeInfo.Sequence == 0 && masterSequence != 0) {
180189
logger.Get().With(
181190
zap.String("id", node.ID()),
182191
zap.String("addr", node.Addr()),
183192
zap.String("role", clusterNodeInfo.Role),
184193
zap.Uint64("sequence", clusterNodeInfo.Sequence),
185-
).Warn("Skip the node due to role or sequence invalid")
194+
zap.Uint64("master_sequence", masterSequence),
195+
).Warn("Skip the node due to invalid role or unsafe sequence")
186196
continue
187197
}
188-
189198
logger.Get().With(
190199
zap.String("id", node.ID()),
191200
zap.String("addr", node.Addr()),
192201
zap.String("role", clusterNodeInfo.Role),
193202
zap.Uint64("sequence", clusterNodeInfo.Sequence),
194203
).Info("Get slave node info successfully")
195-
196-
// If the preferredNodeID is not empty, we will use it as the new master node.
204+
// Preferred node takes priority
197205
if preferredNodeID != "" && node.ID() == preferredNodeID {
198206
newMasterNodeIndex = i
199207
break
@@ -212,7 +220,7 @@ func (shard *Shard) getNewMasterNodeIndex(ctx context.Context, masterNodeIndex i
212220
// The masterNodeID is used to check if the node is the current master node if it's not empty.
213221
// The preferredNodeID is used to specify the preferred node to be promoted as the new master node,
214222
// it will choose the node with the highest sequence number if the preferredNodeID is empty.
215-
func (shard *Shard) promoteNewMaster(ctx context.Context, masterNodeID, preferredNodeID string) (string, error) {
223+
func (shard *Shard) PromoteNewMaster(ctx context.Context, masterNodeID, preferredNodeID string) (string, error) {
216224
if len(shard.Nodes) <= 1 {
217225
return "", consts.ErrShardNoReplica
218226
}
@@ -320,4 +328,4 @@ func (shard *Shard) UnmarshalJSON(bytes []byte) error {
320328
shard.Nodes[i] = node
321329
}
322330
return nil
323-
}
331+
}

0 commit comments

Comments
 (0)