From 397bf8455929d944ba9a063551f28f4f5047f4cc Mon Sep 17 00:00:00 2001 From: dhingu Date: Mon, 22 Jun 2026 16:14:37 +0530 Subject: [PATCH 1/2] HBASE-30160: Prevent region creation if the encoded region names are the same --- .../hbase/master/assignment/RegionStates.java | 11 +- .../assignment/SplitTableRegionProcedure.java | 3 + .../procedure/CreateTableProcedure.java | 8 ++ .../hadoop/hbase/util/ModifyRegionUtils.java | 32 ++++++ .../TestEncodedNameCollisionDetection.java | 108 ++++++++++++++++++ 5 files changed, 160 insertions(+), 2 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEncodedNameCollisionDetection.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index b33c6c49fdd5..731fba3b4428 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -330,6 +330,14 @@ public List getRegionsOfTableForEnabling(TableName table) { regionNode -> !regionNode.isInState(State.SPLIT) && !regionNode.getRegionInfo().isSplit()); } + /** + * Returns all regions of the table, irrespective of assignment or split/offline state. + */ + public List getAllRegionsOfTable(TableName table) { + return getTableRegionStateNodes(table).stream().map(RegionStateNode::getRegionInfo) + .collect(Collectors.toList()); + } + /** * Get the regions for deleting a table. *

@@ -338,8 +346,7 @@ public List getRegionsOfTableForEnabling(TableName table) { * references to the regions, we will lose the data of the regions. */ public List getRegionsOfTableForDeleting(TableName table) { - return getTableRegionStateNodes(table).stream().map(RegionStateNode::getRegionInfo) - .collect(Collectors.toList()); + return getAllRegionsOfTable(table); } /** Returns Return the regions of the table and filter them. */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index 690a4e1d5d65..4aad02afe045 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -71,6 +71,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.ModifyRegionUtils; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.wal.WALSplitUtil; @@ -141,6 +142,8 @@ public SplitTableRegionProcedure(final MasterProcedureEnv env, final RegionInfo .setEndKey(bestSplitRow).setSplit(false).setRegionId(rid).build(); this.daughterTwoRI = RegionInfoBuilder.newBuilder(table).setStartKey(bestSplitRow) .setEndKey(regionToSplit.getEndKey()).setSplit(false).setRegionId(rid).build(); + ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(daughterOneRI, daughterTwoRI), + env.getAssignmentManager().getRegionStates().getAllRegionsOfTable(getTableName())); if (tableDescriptor.getRegionSplitPolicyClassName() != null) { // Since we don't have region reference here, creating the split policy instance without it. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java index 6f7ab6b82104..ca347eba7cc6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java @@ -296,6 +296,14 @@ private boolean prepareCreate(final MasterProcedureEnv env) throws IOException { return false; } + try { + ModifyRegionUtils.checkForEncodedNameCollisions(newRegions, + env.getAssignmentManager().getRegionStates().getAllRegionsOfTable(getTableName())); + } catch (DoNotRetryIOException e) { + setFailure("master-create-table", e); + return false; + } + if (!tableName.isSystemTable()) { // do not check rs group for system tables as we may block the bootstrap. Supplier forWhom = () -> "table " + tableName; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java index d91cd9b78615..0e91ce47fd90 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java @@ -21,7 +21,11 @@ import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.CompletionService; import java.util.concurrent.ExecutionException; @@ -30,6 +34,7 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; @@ -82,6 +87,33 @@ public static RegionInfo[] createRegionInfos(TableDescriptor tableDescriptor, return hRegionInfos; } + /** + * Checks a set of candidate regions against each other and against a set of already-existing + * regions for encoded-name collisions. + */ + public static void checkForEncodedNameCollisions(final Collection candidates, + final Collection existing) throws IOException { + if (candidates == null || candidates.isEmpty()) { + return; + } + Map seen = new HashMap<>(); + if (existing != null) { + for (RegionInfo ri : existing) { + seen.putIfAbsent(ri.getEncodedName(), ri); + } + } + Set candidateNames = new HashSet<>(); + for (RegionInfo ri : candidates) { + String encoded = ri.getEncodedName(); + RegionInfo conflict = seen.get(encoded); + if (conflict != null || !candidateNames.add(encoded)) { + throw new DoNotRetryIOException("Encoded region name collision detected: '" + encoded + + "' for table " + ri.getTable() + ". Refusing to proceed."); + } + seen.put(encoded, ri); + } + } + /** * Create new set of regions on the specified file-system. NOTE: that you should add the regions * to hbase:meta after this operation. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEncodedNameCollisionDetection.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEncodedNameCollisionDetection.java new file mode 100644 index 000000000000..2f3a6dd4ba1a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEncodedNameCollisionDetection.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import org.apache.hadoop.hbase.DoNotRetryIOException; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.ModifyRegionUtils; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +/** + * Tests for encoded region-name collision detection (HBASE-30160). If two regions end up with the + * same encoded name, we should fail fast instead of allowing subtle metadata corruption later. + */ + +@Tag(SmallTests.TAG) +public class TestEncodedNameCollisionDetection { + + /** + * Happy-path check: distinct candidate regions should pass without throwing. + */ + @Test + public void testAcceptsDistinctCandidates() { + TableName tableName = TableName.valueOf("test_table"); + long regionId = System.currentTimeMillis(); + + RegionInfo ri1 = RegionInfoBuilder.newBuilder(tableName).setStartKey(new byte[] { 0, 0 }) + .setEndKey(new byte[] { 1, 0 }).setSplit(false).setRegionId(regionId).build(); + + RegionInfo ri2 = RegionInfoBuilder.newBuilder(tableName).setStartKey(new byte[] { 1, 0 }) + .setEndKey(new byte[] { 2, 0 }).setSplit(false).setRegionId(regionId + 1).build(); + + assertDoesNotThrow( + () -> ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(ri1, ri2), null)); + } + + @Test + public void testDetectsDuplicatesInCandidates() { + TableName tableName = TableName.valueOf("test_table"); + RegionInfo ri1 = mockRegionInfo(tableName, "same-encoded-name"); + RegionInfo ri2 = mockRegionInfo(tableName, "same-encoded-name"); + + DoNotRetryIOException exception = assertThrows(DoNotRetryIOException.class, + () -> ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(ri1, ri2), null)); + assertTrue(exception.getMessage().contains("Encoded region name collision detected")); + } + + /** + * A candidate region should be rejected if its encoded name already exists. + */ + @Test + public void testDetectsCollisionWithExistingRegions() { + TableName tableName = TableName.valueOf("test_table"); + RegionInfo existingRegion = mockRegionInfo(tableName, "same-encoded-name"); + RegionInfo candidateRegion = mockRegionInfo(tableName, "same-encoded-name"); + + DoNotRetryIOException exception = assertThrows(DoNotRetryIOException.class, + () -> ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(candidateRegion), + Arrays.asList(existingRegion))); + assertTrue(exception.getMessage().contains("Encoded region name collision detected")); + } + + /** + * Test that checkForEncodedNameCollisions properly handles empty/null inputs. + */ + @Test + public void testHandlesEmptyInputs() throws IOException { + ModifyRegionUtils.checkForEncodedNameCollisions(null, null); + ModifyRegionUtils.checkForEncodedNameCollisions(Collections.emptyList(), null); + ModifyRegionUtils.checkForEncodedNameCollisions(null, Collections.emptyList()); + ModifyRegionUtils.checkForEncodedNameCollisions(Collections.emptyList(), + Collections.emptyList()); + } + + private RegionInfo mockRegionInfo(TableName tableName, String encodedName) { + RegionInfo regionInfo = mock(RegionInfo.class); + when(regionInfo.getEncodedName()).thenReturn(encodedName); + when(regionInfo.getTable()).thenReturn(tableName); + return regionInfo; + } +} From 82745d1785638d86bf2c390dbbd2ea8b782aeff7 Mon Sep 17 00:00:00 2001 From: dhingu Date: Thu, 25 Jun 2026 14:20:10 +0530 Subject: [PATCH 2/2] HBASE-30160: Address review comments --- .../assignment/SplitTableRegionProcedure.java | 2 +- .../procedure/CreateTableProcedure.java | 2 +- .../hadoop/hbase/util/ModifyRegionUtils.java | 24 +++++----- .../TestEncodedNameCollisionDetection.java | 45 +++++++++++++------ 4 files changed, 43 insertions(+), 30 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index 4aad02afe045..e601006c43a7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -143,7 +143,7 @@ public SplitTableRegionProcedure(final MasterProcedureEnv env, final RegionInfo this.daughterTwoRI = RegionInfoBuilder.newBuilder(table).setStartKey(bestSplitRow) .setEndKey(regionToSplit.getEndKey()).setSplit(false).setRegionId(rid).build(); ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(daughterOneRI, daughterTwoRI), - env.getAssignmentManager().getRegionStates().getAllRegionsOfTable(getTableName())); + env.getAssignmentManager().getRegionStates()); if (tableDescriptor.getRegionSplitPolicyClassName() != null) { // Since we don't have region reference here, creating the split policy instance without it. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java index ca347eba7cc6..26357c3b3730 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java @@ -298,7 +298,7 @@ private boolean prepareCreate(final MasterProcedureEnv env) throws IOException { try { ModifyRegionUtils.checkForEncodedNameCollisions(newRegions, - env.getAssignmentManager().getRegionStates().getAllRegionsOfTable(getTableName())); + env.getAssignmentManager().getRegionStates()); } catch (DoNotRetryIOException e) { setFailure("master-create-table", e); return false; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java index 0e91ce47fd90..72c13a8b7b39 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java @@ -21,10 +21,9 @@ import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.CompletionService; @@ -39,6 +38,7 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.yetus.audience.InterfaceAudience; @@ -88,29 +88,25 @@ public static RegionInfo[] createRegionInfos(TableDescriptor tableDescriptor, } /** - * Checks a set of candidate regions against each other and against a set of already-existing - * regions for encoded-name collisions. + * Checks candidate regions for encoded\-name collisions. Ensures there are no duplicates in the + * input and no conflicts with existing region states. */ public static void checkForEncodedNameCollisions(final Collection candidates, - final Collection existing) throws IOException { + final RegionStates regionStates) throws IOException { if (candidates == null || candidates.isEmpty()) { return; } - Map seen = new HashMap<>(); - if (existing != null) { - for (RegionInfo ri : existing) { - seen.putIfAbsent(ri.getEncodedName(), ri); - } - } + Objects.requireNonNull(regionStates, "regionStates is null"); Set candidateNames = new HashSet<>(); for (RegionInfo ri : candidates) { String encoded = ri.getEncodedName(); - RegionInfo conflict = seen.get(encoded); - if (conflict != null || !candidateNames.add(encoded)) { + if ( + !candidateNames.add(encoded) + || regionStates.getRegionStateNodeFromEncodedRegionName(encoded) != null + ) { throw new DoNotRetryIOException("Encoded region name collision detected: '" + encoded + "' for table " + ri.getTable() + ". Refusing to proceed."); } - seen.put(encoded, ri); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEncodedNameCollisionDetection.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEncodedNameCollisionDetection.java index 2f3a6dd4ba1a..2a2b8cd224c8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEncodedNameCollisionDetection.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestEncodedNameCollisionDetection.java @@ -23,13 +23,14 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import java.io.IOException; import java.util.Arrays; import java.util.Collections; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.master.assignment.RegionStateNode; +import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.ModifyRegionUtils; import org.junit.jupiter.api.Tag; @@ -57,18 +58,25 @@ public void testAcceptsDistinctCandidates() { RegionInfo ri2 = RegionInfoBuilder.newBuilder(tableName).setStartKey(new byte[] { 1, 0 }) .setEndKey(new byte[] { 2, 0 }).setSplit(false).setRegionId(regionId + 1).build(); + RegionStates regionStates = mock(RegionStates.class); + assertDoesNotThrow( - () -> ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(ri1, ri2), null)); + () -> ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(ri1, ri2), regionStates)); } + /** + * Verifies that duplicate encoded region names within candidate regions are rejected. + */ @Test public void testDetectsDuplicatesInCandidates() { TableName tableName = TableName.valueOf("test_table"); RegionInfo ri1 = mockRegionInfo(tableName, "same-encoded-name"); RegionInfo ri2 = mockRegionInfo(tableName, "same-encoded-name"); + RegionStates regionStates = mock(RegionStates.class); + DoNotRetryIOException exception = assertThrows(DoNotRetryIOException.class, - () -> ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(ri1, ri2), null)); + () -> ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(ri1, ri2), regionStates)); assertTrue(exception.getMessage().contains("Encoded region name collision detected")); } @@ -78,25 +86,34 @@ public void testDetectsDuplicatesInCandidates() { @Test public void testDetectsCollisionWithExistingRegions() { TableName tableName = TableName.valueOf("test_table"); - RegionInfo existingRegion = mockRegionInfo(tableName, "same-encoded-name"); RegionInfo candidateRegion = mockRegionInfo(tableName, "same-encoded-name"); + RegionStates regionStates = mock(RegionStates.class); + when(regionStates.getRegionStateNodeFromEncodedRegionName("same-encoded-name")) + .thenReturn(mock(RegionStateNode.class)); - DoNotRetryIOException exception = assertThrows(DoNotRetryIOException.class, - () -> ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(candidateRegion), - Arrays.asList(existingRegion))); + DoNotRetryIOException exception = + assertThrows(DoNotRetryIOException.class, () -> ModifyRegionUtils + .checkForEncodedNameCollisions(Arrays.asList(candidateRegion), regionStates)); assertTrue(exception.getMessage().contains("Encoded region name collision detected")); } /** - * Test that checkForEncodedNameCollisions properly handles empty/null inputs. + * Test that checkForEncodedNameCollisions handles empty/null inputs and rejects null RegionStates + * when candidates are present. */ @Test - public void testHandlesEmptyInputs() throws IOException { - ModifyRegionUtils.checkForEncodedNameCollisions(null, null); - ModifyRegionUtils.checkForEncodedNameCollisions(Collections.emptyList(), null); - ModifyRegionUtils.checkForEncodedNameCollisions(null, Collections.emptyList()); - ModifyRegionUtils.checkForEncodedNameCollisions(Collections.emptyList(), - Collections.emptyList()); + public void testInputValidationAndNullRegionStatesBehavior() { + assertDoesNotThrow(() -> ModifyRegionUtils.checkForEncodedNameCollisions(null, null)); + assertDoesNotThrow( + () -> ModifyRegionUtils.checkForEncodedNameCollisions(Collections.emptyList(), null)); + assertDoesNotThrow( + () -> ModifyRegionUtils.checkForEncodedNameCollisions(null, mock(RegionStates.class))); + assertDoesNotThrow(() -> ModifyRegionUtils + .checkForEncodedNameCollisions(Collections.emptyList(), mock(RegionStates.class))); + + RegionInfo candidateRegion = mockRegionInfo(TableName.valueOf("test_table"), "candidate"); + assertThrows(NullPointerException.class, + () -> ModifyRegionUtils.checkForEncodedNameCollisions(Arrays.asList(candidateRegion), null)); } private RegionInfo mockRegionInfo(TableName tableName, String encodedName) {